| lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame] | 1 | /* idna.c	Convert to or from IDN strings. | 
 | 2 |  * Copyright (C) 2002, 2003, 2004, 2011  Simon Josefsson | 
 | 3 |  * | 
 | 4 |  * This file is part of GNU Libidn. | 
 | 5 |  * | 
 | 6 |  * GNU Libidn is free software; you can redistribute it and/or | 
 | 7 |  * modify it under the terms of the GNU Lesser General Public | 
 | 8 |  * License as published by the Free Software Foundation; either | 
 | 9 |  * version 2.1 of the License, or (at your option) any later version. | 
 | 10 |  * | 
 | 11 |  * GNU Libidn is distributed in the hope that it will be useful, | 
 | 12 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 | 13 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
 | 14 |  * Lesser General Public License for more details. | 
 | 15 |  * | 
 | 16 |  * You should have received a copy of the GNU Lesser General Public | 
 | 17 |  * License along with GNU Libidn; if not, see <http://www.gnu.org/licenses/>. | 
 | 18 |  */ | 
 | 19 |  | 
 | 20 | #if HAVE_CONFIG_H | 
 | 21 | # include "config.h" | 
 | 22 | #endif | 
 | 23 |  | 
 | 24 | #include <stdlib.h> | 
 | 25 | #include <string.h> | 
 | 26 | #include <stringprep.h> | 
 | 27 | #include <punycode.h> | 
 | 28 | #include <stdint.h> | 
 | 29 |  | 
 | 30 | #include "idna.h" | 
 | 31 |  | 
 | 32 | #define DOTP(c) ((c) == 0x002E || (c) == 0x3002 ||	\ | 
 | 33 | 		 (c) == 0xFF0E || (c) == 0xFF61) | 
 | 34 |  | 
 | 35 | /* Core functions */ | 
 | 36 |  | 
 | 37 | /** | 
 | 38 |  * idna_to_ascii_4i | 
 | 39 |  * @in: input array with unicode code points. | 
 | 40 |  * @inlen: length of input array with unicode code points. | 
 | 41 |  * @out: output zero terminated string that must have room for at | 
 | 42 |  *       least 63 characters plus the terminating zero. | 
 | 43 |  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. | 
 | 44 |  * | 
 | 45 |  * The ToASCII operation takes a sequence of Unicode code points that make | 
 | 46 |  * up one label and transforms it into a sequence of code points in the | 
 | 47 |  * ASCII range (0..7F). If ToASCII succeeds, the original sequence and the | 
 | 48 |  * resulting sequence are equivalent labels. | 
 | 49 |  * | 
 | 50 |  * It is important to note that the ToASCII operation can fail. ToASCII | 
 | 51 |  * fails if any step of it fails. If any step of the ToASCII operation | 
 | 52 |  * fails on any label in a domain name, that domain name MUST NOT be used | 
 | 53 |  * as an internationalized domain name. The method for deadling with this | 
 | 54 |  * failure is application-specific. | 
 | 55 |  * | 
 | 56 |  * The inputs to ToASCII are a sequence of code points, the AllowUnassigned | 
 | 57 |  * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a | 
 | 58 |  * sequence of ASCII code points or a failure condition. | 
 | 59 |  * | 
 | 60 |  * ToASCII never alters a sequence of code points that are all in the ASCII | 
 | 61 |  * range to begin with (although it could fail). Applying the ToASCII | 
 | 62 |  * operation multiple times has exactly the same effect as applying it just | 
 | 63 |  * once. | 
 | 64 |  * | 
 | 65 |  * Return value: Returns 0 on success, or an error code. | 
 | 66 |  */ | 
 | 67 | int | 
 | 68 | idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags) | 
 | 69 | { | 
 | 70 |   size_t len, outlen; | 
 | 71 |   uint32_t *src;		/* XXX don't need to copy data? */ | 
 | 72 |   int rc; | 
 | 73 |  | 
 | 74 |   /* | 
 | 75 |    * ToASCII consists of the following steps: | 
 | 76 |    * | 
 | 77 |    * 1. If all code points in the sequence are in the ASCII range (0..7F) | 
 | 78 |    * then skip to step 3. | 
 | 79 |    */ | 
 | 80 |  | 
 | 81 |   { | 
 | 82 |     size_t i; | 
 | 83 |     int inasciirange; | 
 | 84 |  | 
 | 85 |     inasciirange = 1; | 
 | 86 |     for (i = 0; i < inlen; i++) | 
 | 87 |       if (in[i] > 0x7F) | 
 | 88 | 	inasciirange = 0; | 
 | 89 |     if (inasciirange) | 
 | 90 |       { | 
 | 91 | 	src = malloc (sizeof (in[0]) * (inlen + 1)); | 
 | 92 | 	if (src == NULL) | 
 | 93 | 	  return IDNA_MALLOC_ERROR; | 
 | 94 |  | 
 | 95 | 	memcpy (src, in, sizeof (in[0]) * inlen); | 
 | 96 | 	src[inlen] = 0; | 
 | 97 |  | 
 | 98 | 	goto step3; | 
 | 99 |       } | 
 | 100 |   } | 
 | 101 |  | 
 | 102 |   /* | 
 | 103 |    * 2. Perform the steps specified in [NAMEPREP] and fail if there is | 
 | 104 |    * an error. The AllowUnassigned flag is used in [NAMEPREP]. | 
 | 105 |    */ | 
 | 106 |  | 
 | 107 |   { | 
 | 108 |     char *p; | 
 | 109 |  | 
 | 110 |     p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL); | 
 | 111 |     if (p == NULL) | 
 | 112 |       return IDNA_MALLOC_ERROR; | 
 | 113 |  | 
 | 114 |     len = strlen (p); | 
 | 115 |     do | 
 | 116 |       { | 
 | 117 | 	char *newp; | 
 | 118 |  | 
 | 119 | 	len = 2 * len + 10;	/* XXX better guess? */ | 
 | 120 | 	newp = realloc (p, len); | 
 | 121 | 	if (newp == NULL) | 
 | 122 | 	  { | 
 | 123 | 	    free (p); | 
 | 124 | 	    return IDNA_MALLOC_ERROR; | 
 | 125 | 	  } | 
 | 126 | 	p = newp; | 
 | 127 |  | 
 | 128 | 	if (flags & IDNA_ALLOW_UNASSIGNED) | 
 | 129 | 	  rc = stringprep_nameprep (p, len); | 
 | 130 | 	else | 
 | 131 | 	  rc = stringprep_nameprep_no_unassigned (p, len); | 
 | 132 |       } | 
 | 133 |     while (rc == STRINGPREP_TOO_SMALL_BUFFER); | 
 | 134 |  | 
 | 135 |     if (rc != STRINGPREP_OK) | 
 | 136 |       { | 
 | 137 | 	free (p); | 
 | 138 | 	return IDNA_STRINGPREP_ERROR; | 
 | 139 |       } | 
 | 140 |  | 
 | 141 |     src = stringprep_utf8_to_ucs4 (p, -1, NULL); | 
 | 142 |  | 
 | 143 |     free (p); | 
 | 144 |   } | 
 | 145 |  | 
 | 146 | step3: | 
 | 147 |   /* | 
 | 148 |    * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks: | 
 | 149 |    * | 
 | 150 |    * (a) Verify the absence of non-LDH ASCII code points; that is, | 
 | 151 |    * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F. | 
 | 152 |    * | 
 | 153 |    * (b) Verify the absence of leading and trailing hyphen-minus; | 
 | 154 |    * that is, the absence of U+002D at the beginning and end of | 
 | 155 |    * the sequence. | 
 | 156 |    */ | 
 | 157 |  | 
 | 158 |   if (flags & IDNA_USE_STD3_ASCII_RULES) | 
 | 159 |     { | 
 | 160 |       size_t i; | 
 | 161 |  | 
 | 162 |       for (i = 0; src[i]; i++) | 
 | 163 | 	if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F || | 
 | 164 | 	    (src[i] >= 0x3A && src[i] <= 0x40) || | 
 | 165 | 	    (src[i] >= 0x5B && src[i] <= 0x60) || | 
 | 166 | 	    (src[i] >= 0x7B && src[i] <= 0x7F)) | 
 | 167 | 	  { | 
 | 168 | 	    free (src); | 
 | 169 | 	    return IDNA_CONTAINS_NON_LDH; | 
 | 170 | 	  } | 
 | 171 |  | 
 | 172 |       if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D)) | 
 | 173 | 	{ | 
 | 174 | 	  free (src); | 
 | 175 | 	  return IDNA_CONTAINS_MINUS; | 
 | 176 | 	} | 
 | 177 |     } | 
 | 178 |  | 
 | 179 |   /* | 
 | 180 |    * 4. If all code points in the sequence are in the ASCII range | 
 | 181 |    * (0..7F), then skip to step 8. | 
 | 182 |    */ | 
 | 183 |  | 
 | 184 |   { | 
 | 185 |     size_t i; | 
 | 186 |     int inasciirange; | 
 | 187 |  | 
 | 188 |     inasciirange = 1; | 
 | 189 |     for (i = 0; src[i]; i++) | 
 | 190 |       { | 
 | 191 | 	if (src[i] > 0x7F) | 
 | 192 | 	  inasciirange = 0; | 
 | 193 | 	/* copy string to output buffer if we are about to skip to step8 */ | 
 | 194 | 	if (i < 64) | 
 | 195 | 	  out[i] = src[i]; | 
 | 196 |       } | 
 | 197 |     if (i < 64) | 
 | 198 |       out[i] = '\0'; | 
 | 199 |     if (inasciirange) | 
 | 200 |       goto step8; | 
 | 201 |   } | 
 | 202 |  | 
 | 203 |   /* | 
 | 204 |    * 5. Verify that the sequence does NOT begin with the ACE prefix. | 
 | 205 |    * | 
 | 206 |    */ | 
 | 207 |  | 
 | 208 |   { | 
 | 209 |     size_t i; | 
 | 210 |     int match; | 
 | 211 |  | 
 | 212 |     match = 1; | 
 | 213 |     for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++) | 
 | 214 |       if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i]) | 
 | 215 | 	match = 0; | 
 | 216 |     if (match) | 
 | 217 |       { | 
 | 218 | 	free (src); | 
 | 219 | 	return IDNA_CONTAINS_ACE_PREFIX; | 
 | 220 |       } | 
 | 221 |   } | 
 | 222 |  | 
 | 223 |   /* | 
 | 224 |    * 6. Encode the sequence using the encoding algorithm in [PUNYCODE] | 
 | 225 |    * and fail if there is an error. | 
 | 226 |    */ | 
 | 227 |   for (len = 0; src[len]; len++) | 
 | 228 |     ; | 
 | 229 |   src[len] = '\0'; | 
 | 230 |   outlen = 63 - strlen (IDNA_ACE_PREFIX); | 
 | 231 |   rc = punycode_encode (len, src, NULL, | 
 | 232 | 			&outlen, &out[strlen (IDNA_ACE_PREFIX)]); | 
 | 233 |   if (rc != PUNYCODE_SUCCESS) | 
 | 234 |     { | 
 | 235 |       free (src); | 
 | 236 |       return IDNA_PUNYCODE_ERROR; | 
 | 237 |     } | 
 | 238 |   out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0'; | 
 | 239 |  | 
 | 240 |   /* | 
 | 241 |    * 7. Prepend the ACE prefix. | 
 | 242 |    */ | 
 | 243 |  | 
 | 244 |   memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX)); | 
 | 245 |  | 
 | 246 |   /* | 
 | 247 |    * 8. Verify that the number of code points is in the range 1 to 63 | 
 | 248 |    * inclusive (0 is excluded). | 
 | 249 |    */ | 
 | 250 |  | 
 | 251 | step8: | 
 | 252 |   free (src); | 
 | 253 |   if (strlen (out) < 1 || strlen (out) > 63) | 
 | 254 |     return IDNA_INVALID_LENGTH; | 
 | 255 |  | 
 | 256 |   return IDNA_SUCCESS; | 
 | 257 | } | 
 | 258 |  | 
 | 259 | /* ToUnicode().  May realloc() utf8in. */ | 
 | 260 | static int | 
 | 261 | idna_to_unicode_internal (char *utf8in, | 
 | 262 | 			  uint32_t * out, size_t * outlen, int flags) | 
 | 263 | { | 
 | 264 |   int rc; | 
 | 265 |   char tmpout[64]; | 
 | 266 |   size_t utf8len = strlen (utf8in) + 1; | 
 | 267 |   size_t addlen = 0; | 
 | 268 |  | 
 | 269 |   /* | 
 | 270 |    * ToUnicode consists of the following steps: | 
 | 271 |    * | 
 | 272 |    * 1. If the sequence contains any code points outside the ASCII range | 
 | 273 |    * (0..7F) then proceed to step 2, otherwise skip to step 3. | 
 | 274 |    */ | 
 | 275 |  | 
 | 276 |   { | 
 | 277 |     size_t i; | 
 | 278 |     int inasciirange; | 
 | 279 |  | 
 | 280 |     inasciirange = 1; | 
 | 281 |     for (i = 0; utf8in[i]; i++) | 
 | 282 |       if (utf8in[i] & ~0x7F) | 
 | 283 | 	inasciirange = 0; | 
 | 284 |     if (inasciirange) | 
 | 285 |       goto step3; | 
 | 286 |   } | 
 | 287 |  | 
 | 288 |   /* | 
 | 289 |    * 2. Perform the steps specified in [NAMEPREP] and fail if there is an | 
 | 290 |    * error. (If step 3 of ToASCII is also performed here, it will not | 
 | 291 |    * affect the overall behavior of ToUnicode, but it is not | 
 | 292 |    * necessary.) The AllowUnassigned flag is used in [NAMEPREP]. | 
 | 293 |    */ | 
 | 294 |   do | 
 | 295 |     { | 
 | 296 |       char *newp = realloc (utf8in, utf8len + addlen); | 
 | 297 |       if (newp == NULL) | 
 | 298 | 	{ | 
 | 299 | 	  free (utf8in); | 
 | 300 | 	  return IDNA_MALLOC_ERROR; | 
 | 301 | 	} | 
 | 302 |       utf8in = newp; | 
 | 303 |       if (flags & IDNA_ALLOW_UNASSIGNED) | 
 | 304 | 	rc = stringprep_nameprep (utf8in, utf8len + addlen); | 
 | 305 |       else | 
 | 306 | 	rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen); | 
 | 307 |       addlen += 1; | 
 | 308 |     } | 
 | 309 |   while (rc == STRINGPREP_TOO_SMALL_BUFFER); | 
 | 310 |  | 
 | 311 |   if (rc != STRINGPREP_OK) | 
 | 312 |     { | 
 | 313 |       free (utf8in); | 
 | 314 |       return IDNA_STRINGPREP_ERROR; | 
 | 315 |     } | 
 | 316 |  | 
 | 317 |   /* 3. Verify that the sequence begins with the ACE prefix, and save a | 
 | 318 |    * copy of the sequence. | 
 | 319 |    */ | 
 | 320 |  | 
 | 321 | step3: | 
 | 322 |   if (memcmp (IDNA_ACE_PREFIX, utf8in, strlen (IDNA_ACE_PREFIX)) != 0) | 
 | 323 |     { | 
 | 324 |       free (utf8in); | 
 | 325 |       return IDNA_NO_ACE_PREFIX; | 
 | 326 |     } | 
 | 327 |  | 
 | 328 |   /* 4. Remove the ACE prefix. | 
 | 329 |    */ | 
 | 330 |  | 
 | 331 |   memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)], | 
 | 332 | 	   strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1); | 
 | 333 |  | 
 | 334 |   /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE] | 
 | 335 |    * and fail if there is an error. Save a copy of the result of | 
 | 336 |    * this step. | 
 | 337 |    */ | 
 | 338 |  | 
 | 339 |   (*outlen)--;			/* reserve one for the zero */ | 
 | 340 |  | 
 | 341 |   rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL); | 
 | 342 |   if (rc != PUNYCODE_SUCCESS) | 
 | 343 |     { | 
 | 344 |       free (utf8in); | 
 | 345 |       return IDNA_PUNYCODE_ERROR; | 
 | 346 |     } | 
 | 347 |  | 
 | 348 |   out[*outlen] = 0;		/* add zero */ | 
 | 349 |  | 
 | 350 |   /* 6. Apply ToASCII. | 
 | 351 |    */ | 
 | 352 |  | 
 | 353 |   rc = idna_to_ascii_4i (out, *outlen, tmpout, flags); | 
 | 354 |   if (rc != IDNA_SUCCESS) | 
 | 355 |     { | 
 | 356 |       free (utf8in); | 
 | 357 |       return rc; | 
 | 358 |     } | 
 | 359 |  | 
 | 360 |   /* 7. Verify that the result of step 6 matches the saved copy from | 
 | 361 |    * step 3, using a case-insensitive ASCII comparison. | 
 | 362 |    */ | 
 | 363 |  | 
 | 364 |   if (strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0) | 
 | 365 |     { | 
 | 366 |       free (utf8in); | 
 | 367 |       return IDNA_ROUNDTRIP_VERIFY_ERROR; | 
 | 368 |     } | 
 | 369 |  | 
 | 370 |   /* 8. Return the saved copy from step 5. | 
 | 371 |    */ | 
 | 372 |  | 
 | 373 |   free (utf8in); | 
 | 374 |   return IDNA_SUCCESS; | 
 | 375 | } | 
 | 376 |  | 
 | 377 | /** | 
 | 378 |  * idna_to_unicode_44i | 
 | 379 |  * @in: input array with unicode code points. | 
 | 380 |  * @inlen: length of input array with unicode code points. | 
 | 381 |  * @out: output array with unicode code points. | 
 | 382 |  * @outlen: on input, maximum size of output array with unicode code points, | 
 | 383 |  *          on exit, actual size of output array with unicode code points. | 
 | 384 |  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. | 
 | 385 |  * | 
 | 386 |  * The ToUnicode operation takes a sequence of Unicode code points | 
 | 387 |  * that make up one label and returns a sequence of Unicode code | 
 | 388 |  * points. If the input sequence is a label in ACE form, then the | 
 | 389 |  * result is an equivalent internationalized label that is not in ACE | 
 | 390 |  * form, otherwise the original sequence is returned unaltered. | 
 | 391 |  * | 
 | 392 |  * ToUnicode never fails. If any step fails, then the original input | 
 | 393 |  * sequence is returned immediately in that step. | 
 | 394 |  * | 
 | 395 |  * The Punycode decoder can never output more code points than it | 
 | 396 |  * inputs, but Nameprep can, and therefore ToUnicode can.  Note that | 
 | 397 |  * the number of octets needed to represent a sequence of code points | 
 | 398 |  * depends on the particular character encoding used. | 
 | 399 |  * | 
 | 400 |  * The inputs to ToUnicode are a sequence of code points, the | 
 | 401 |  * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of | 
 | 402 |  * ToUnicode is always a sequence of Unicode code points. | 
 | 403 |  * | 
 | 404 |  * Return value: Returns error condition, but it must only be used for | 
 | 405 |  *               debugging purposes.  The output buffer is always | 
 | 406 |  *               guaranteed to contain the correct data according to | 
 | 407 |  *               the specification (sans malloc induced errors).  NB! | 
 | 408 |  *               This means that you normally ignore the return code | 
 | 409 |  *               from this function, as checking it means breaking the | 
 | 410 |  *               standard. | 
 | 411 |  */ | 
 | 412 | int | 
 | 413 | idna_to_unicode_44i (const uint32_t * in, size_t inlen, | 
 | 414 | 		     uint32_t * out, size_t * outlen, int flags) | 
 | 415 | { | 
 | 416 |   int rc; | 
 | 417 |   size_t outlensave = *outlen; | 
 | 418 |   char *p; | 
 | 419 |  | 
 | 420 |   p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL); | 
 | 421 |   if (p == NULL) | 
 | 422 |     return IDNA_MALLOC_ERROR; | 
 | 423 |  | 
 | 424 |   rc = idna_to_unicode_internal (p, out, outlen, flags); | 
 | 425 |   if (rc != IDNA_SUCCESS) | 
 | 426 |     { | 
 | 427 |       memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ? | 
 | 428 | 					 inlen : outlensave)); | 
 | 429 |       *outlen = inlen; | 
 | 430 |     } | 
 | 431 |  | 
 | 432 |   /* p is freed in idna_to_unicode_internal.  */ | 
 | 433 |  | 
 | 434 |   return rc; | 
 | 435 | } | 
 | 436 |  | 
 | 437 | /* Wrappers that handle several labels */ | 
 | 438 |  | 
 | 439 | /** | 
 | 440 |  * idna_to_ascii_4z: | 
 | 441 |  * @input: zero terminated input Unicode string. | 
 | 442 |  * @output: pointer to newly allocated output string. | 
 | 443 |  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. | 
 | 444 |  * | 
 | 445 |  * Convert UCS-4 domain name to ASCII string.  The domain name may | 
 | 446 |  * contain several labels, separated by dots.  The output buffer must | 
 | 447 |  * be deallocated by the caller. | 
 | 448 |  * | 
 | 449 |  * Return value: Returns IDNA_SUCCESS on success, or error code. | 
 | 450 |  **/ | 
 | 451 | int | 
 | 452 | idna_to_ascii_4z (const uint32_t * input, char **output, int flags) | 
 | 453 | { | 
 | 454 |   const uint32_t *start = input; | 
 | 455 |   const uint32_t *end = input; | 
 | 456 |   char buf[64]; | 
 | 457 |   char *out = NULL; | 
 | 458 |   int rc; | 
 | 459 |  | 
 | 460 |   /* 1) Whenever dots are used as label separators, the following | 
 | 461 |      characters MUST be recognized as dots: U+002E (full stop), | 
 | 462 |      U+3002 (ideographic full stop), U+FF0E (fullwidth full stop), | 
 | 463 |      U+FF61 (halfwidth ideographic full stop). */ | 
 | 464 |  | 
 | 465 |   if (input[0] == 0) | 
 | 466 |     { | 
 | 467 |       /* Handle implicit zero-length root label. */ | 
 | 468 |       *output = malloc (1); | 
 | 469 |       if (!*output) | 
 | 470 | 	return IDNA_MALLOC_ERROR; | 
 | 471 |       strcpy (*output, ""); | 
 | 472 |       return IDNA_SUCCESS; | 
 | 473 |     } | 
 | 474 |  | 
 | 475 |   if (DOTP (input[0]) && input[1] == 0) | 
 | 476 |     { | 
 | 477 |       /* Handle explicit zero-length root label. */ | 
 | 478 |       *output = malloc (2); | 
 | 479 |       if (!*output) | 
 | 480 | 	return IDNA_MALLOC_ERROR; | 
 | 481 |       strcpy (*output, "."); | 
 | 482 |       return IDNA_SUCCESS; | 
 | 483 |     } | 
 | 484 |  | 
 | 485 |   *output = NULL; | 
 | 486 |   do | 
 | 487 |     { | 
 | 488 |       end = start; | 
 | 489 |  | 
 | 490 |       for (; *end && !DOTP (*end); end++) | 
 | 491 | 	; | 
 | 492 |  | 
 | 493 |       if (*end == '\0' && start == end) | 
 | 494 | 	{ | 
 | 495 | 	  /* Handle explicit zero-length root label. */ | 
 | 496 | 	  buf[0] = '\0'; | 
 | 497 | 	} | 
 | 498 |       else | 
 | 499 | 	{ | 
 | 500 | 	  rc = idna_to_ascii_4i (start, end - start, buf, flags); | 
 | 501 | 	  if (rc != IDNA_SUCCESS) | 
 | 502 | 	    return rc; | 
 | 503 | 	} | 
 | 504 |  | 
 | 505 |       if (out) | 
 | 506 | 	{ | 
 | 507 | 	  char *newp = realloc (out, strlen (out) + 1 + strlen (buf) + 1); | 
 | 508 | 	  if (!newp) | 
 | 509 | 	    { | 
 | 510 | 	      free (out); | 
 | 511 | 	      return IDNA_MALLOC_ERROR; | 
 | 512 | 	    } | 
 | 513 | 	  out = newp; | 
 | 514 | 	  strcat (out, "."); | 
 | 515 | 	  strcat (out, buf); | 
 | 516 | 	} | 
 | 517 |       else | 
 | 518 | 	{ | 
 | 519 | 	  out = (char *) malloc (strlen (buf) + 1); | 
 | 520 | 	  if (!out) | 
 | 521 | 	    return IDNA_MALLOC_ERROR; | 
 | 522 | 	  strcpy (out, buf); | 
 | 523 | 	} | 
 | 524 |  | 
 | 525 |       start = end + 1; | 
 | 526 |     } | 
 | 527 |   while (*end); | 
 | 528 |  | 
 | 529 |   *output = out; | 
 | 530 |  | 
 | 531 |   return IDNA_SUCCESS; | 
 | 532 | } | 
 | 533 |  | 
 | 534 | /** | 
 | 535 |  * idna_to_ascii_8z: | 
 | 536 |  * @input: zero terminated input UTF-8 string. | 
 | 537 |  * @output: pointer to newly allocated output string. | 
 | 538 |  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. | 
 | 539 |  * | 
 | 540 |  * Convert UTF-8 domain name to ASCII string.  The domain name may | 
 | 541 |  * contain several labels, separated by dots.  The output buffer must | 
 | 542 |  * be deallocated by the caller. | 
 | 543 |  * | 
 | 544 |  * Return value: Returns IDNA_SUCCESS on success, or error code. | 
 | 545 |  **/ | 
 | 546 | int | 
 | 547 | idna_to_ascii_8z (const char *input, char **output, int flags) | 
 | 548 | { | 
 | 549 |   uint32_t *ucs4; | 
 | 550 |   size_t ucs4len; | 
 | 551 |   int rc; | 
 | 552 |  | 
 | 553 |   ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len); | 
 | 554 |   if (!ucs4) | 
 | 555 |     return IDNA_ICONV_ERROR; | 
 | 556 |  | 
 | 557 |   rc = idna_to_ascii_4z (ucs4, output, flags); | 
 | 558 |  | 
 | 559 |   free (ucs4); | 
 | 560 |  | 
 | 561 |   return rc; | 
 | 562 |  | 
 | 563 | } | 
 | 564 |  | 
 | 565 | /** | 
 | 566 |  * idna_to_ascii_lz: | 
 | 567 |  * @input: zero terminated input UTF-8 string. | 
 | 568 |  * @output: pointer to newly allocated output string. | 
 | 569 |  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. | 
 | 570 |  * | 
 | 571 |  * Convert domain name in the locale's encoding to ASCII string.  The | 
 | 572 |  * domain name may contain several labels, separated by dots.  The | 
 | 573 |  * output buffer must be deallocated by the caller. | 
 | 574 |  * | 
 | 575 |  * Return value: Returns IDNA_SUCCESS on success, or error code. | 
 | 576 |  **/ | 
 | 577 | int | 
 | 578 | idna_to_ascii_lz (const char *input, char **output, int flags) | 
 | 579 | { | 
 | 580 |   char *utf8; | 
 | 581 |   int rc; | 
 | 582 |  | 
 | 583 |   utf8 = stringprep_locale_to_utf8 (input); | 
 | 584 |   if (!utf8) | 
 | 585 |     return IDNA_ICONV_ERROR; | 
 | 586 |  | 
 | 587 |   rc = idna_to_ascii_8z (utf8, output, flags); | 
 | 588 |  | 
 | 589 |   free (utf8); | 
 | 590 |  | 
 | 591 |   return rc; | 
 | 592 | } | 
 | 593 |  | 
 | 594 | /** | 
 | 595 |  * idna_to_unicode_4z4z: | 
 | 596 |  * @input: zero-terminated Unicode string. | 
 | 597 |  * @output: pointer to newly allocated output Unicode string. | 
 | 598 |  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. | 
 | 599 |  * | 
 | 600 |  * Convert possibly ACE encoded domain name in UCS-4 format into a | 
 | 601 |  * UCS-4 string.  The domain name may contain several labels, | 
 | 602 |  * separated by dots.  The output buffer must be deallocated by the | 
 | 603 |  * caller. | 
 | 604 |  * | 
 | 605 |  * Return value: Returns IDNA_SUCCESS on success, or error code. | 
 | 606 |  **/ | 
 | 607 | int | 
 | 608 | idna_to_unicode_4z4z (const uint32_t * input, uint32_t ** output, int flags) | 
 | 609 | { | 
 | 610 |   const uint32_t *start = input; | 
 | 611 |   const uint32_t *end = input; | 
 | 612 |   uint32_t *buf; | 
 | 613 |   size_t buflen; | 
 | 614 |   uint32_t *out = NULL; | 
 | 615 |   size_t outlen = 0; | 
 | 616 |  | 
 | 617 |   *output = NULL; | 
 | 618 |  | 
 | 619 |   do | 
 | 620 |     { | 
 | 621 |       end = start; | 
 | 622 |  | 
 | 623 |       for (; *end && !DOTP (*end); end++) | 
 | 624 | 	; | 
 | 625 |  | 
 | 626 |       buflen = end - start; | 
 | 627 |       buf = malloc (sizeof (buf[0]) * (buflen + 1)); | 
 | 628 |       if (!buf) | 
 | 629 | 	return IDNA_MALLOC_ERROR; | 
 | 630 |  | 
 | 631 |       idna_to_unicode_44i (start, end - start, buf, &buflen, flags); | 
 | 632 |       /* don't check return value as per specification! */ | 
 | 633 |  | 
 | 634 |       if (out) | 
 | 635 | 	{ | 
 | 636 | 	  uint32_t *newp = realloc (out, | 
 | 637 | 				    sizeof (out[0]) | 
 | 638 | 				    * (outlen + 1 + buflen + 1)); | 
 | 639 | 	  if (!newp) | 
 | 640 | 	    { | 
 | 641 | 	      free (buf); | 
 | 642 | 	      free (out); | 
 | 643 | 	      return IDNA_MALLOC_ERROR; | 
 | 644 | 	    } | 
 | 645 | 	  out = newp; | 
 | 646 | 	  out[outlen++] = 0x002E;	/* '.' (full stop) */ | 
 | 647 | 	  memcpy (out + outlen, buf, sizeof (buf[0]) * buflen); | 
 | 648 | 	  outlen += buflen; | 
 | 649 | 	  out[outlen] = 0x0; | 
 | 650 | 	  free (buf); | 
 | 651 | 	} | 
 | 652 |       else | 
 | 653 | 	{ | 
 | 654 | 	  out = buf; | 
 | 655 | 	  outlen = buflen; | 
 | 656 | 	  out[outlen] = 0x0; | 
 | 657 | 	} | 
 | 658 |  | 
 | 659 |       start = end + 1; | 
 | 660 |     } | 
 | 661 |   while (*end); | 
 | 662 |  | 
 | 663 |   *output = out; | 
 | 664 |  | 
 | 665 |   return IDNA_SUCCESS; | 
 | 666 | } | 
 | 667 |  | 
 | 668 | /** | 
 | 669 |  * idna_to_unicode_8z4z: | 
 | 670 |  * @input: zero-terminated UTF-8 string. | 
 | 671 |  * @output: pointer to newly allocated output Unicode string. | 
 | 672 |  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. | 
 | 673 |  * | 
 | 674 |  * Convert possibly ACE encoded domain name in UTF-8 format into a | 
 | 675 |  * UCS-4 string.  The domain name may contain several labels, | 
 | 676 |  * separated by dots.  The output buffer must be deallocated by the | 
 | 677 |  * caller. | 
 | 678 |  * | 
 | 679 |  * Return value: Returns IDNA_SUCCESS on success, or error code. | 
 | 680 |  **/ | 
 | 681 | int | 
 | 682 | idna_to_unicode_8z4z (const char *input, uint32_t ** output, int flags) | 
 | 683 | { | 
 | 684 |   uint32_t *ucs4; | 
 | 685 |   size_t ucs4len; | 
 | 686 |   int rc; | 
 | 687 |  | 
 | 688 |   ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len); | 
 | 689 |   if (!ucs4) | 
 | 690 |     return IDNA_ICONV_ERROR; | 
 | 691 |  | 
 | 692 |   rc = idna_to_unicode_4z4z (ucs4, output, flags); | 
 | 693 |   free (ucs4); | 
 | 694 |  | 
 | 695 |   return rc; | 
 | 696 | } | 
 | 697 |  | 
 | 698 | /** | 
 | 699 |  * idna_to_unicode_8z8z: | 
 | 700 |  * @input: zero-terminated UTF-8 string. | 
 | 701 |  * @output: pointer to newly allocated output UTF-8 string. | 
 | 702 |  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. | 
 | 703 |  * | 
 | 704 |  * Convert possibly ACE encoded domain name in UTF-8 format into a | 
 | 705 |  * UTF-8 string.  The domain name may contain several labels, | 
 | 706 |  * separated by dots.  The output buffer must be deallocated by the | 
 | 707 |  * caller. | 
 | 708 |  * | 
 | 709 |  * Return value: Returns IDNA_SUCCESS on success, or error code. | 
 | 710 |  **/ | 
 | 711 | int | 
 | 712 | idna_to_unicode_8z8z (const char *input, char **output, int flags) | 
 | 713 | { | 
 | 714 |   uint32_t *ucs4; | 
 | 715 |   int rc; | 
 | 716 |  | 
 | 717 |   rc = idna_to_unicode_8z4z (input, &ucs4, flags); | 
 | 718 |   *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL); | 
 | 719 |   free (ucs4); | 
 | 720 |  | 
 | 721 |   if (!*output) | 
 | 722 |     return IDNA_ICONV_ERROR; | 
 | 723 |  | 
 | 724 |   return rc; | 
 | 725 | } | 
 | 726 |  | 
 | 727 | /** | 
 | 728 |  * idna_to_unicode_8zlz: | 
 | 729 |  * @input: zero-terminated UTF-8 string. | 
 | 730 |  * @output: pointer to newly allocated output string encoded in the | 
 | 731 |  *   current locale's character set. | 
 | 732 |  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. | 
 | 733 |  * | 
 | 734 |  * Convert possibly ACE encoded domain name in UTF-8 format into a | 
 | 735 |  * string encoded in the current locale's character set.  The domain | 
 | 736 |  * name may contain several labels, separated by dots.  The output | 
 | 737 |  * buffer must be deallocated by the caller. | 
 | 738 |  * | 
 | 739 |  * Return value: Returns IDNA_SUCCESS on success, or error code. | 
 | 740 |  **/ | 
 | 741 | int | 
 | 742 | idna_to_unicode_8zlz (const char *input, char **output, int flags) | 
 | 743 | { | 
 | 744 |   char *utf8; | 
 | 745 |   int rc; | 
 | 746 |  | 
 | 747 |   rc = idna_to_unicode_8z8z (input, &utf8, flags); | 
 | 748 |   *output = stringprep_utf8_to_locale (utf8); | 
 | 749 |   free (utf8); | 
 | 750 |  | 
 | 751 |   if (!*output) | 
 | 752 |     return IDNA_ICONV_ERROR; | 
 | 753 |  | 
 | 754 |   return rc; | 
 | 755 | } | 
 | 756 |  | 
 | 757 | /** | 
 | 758 |  * idna_to_unicode_lzlz: | 
 | 759 |  * @input: zero-terminated string encoded in the current locale's | 
 | 760 |  *   character set. | 
 | 761 |  * @output: pointer to newly allocated output string encoded in the | 
 | 762 |  *   current locale's character set. | 
 | 763 |  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. | 
 | 764 |  * | 
 | 765 |  * Convert possibly ACE encoded domain name in the locale's character | 
 | 766 |  * set into a string encoded in the current locale's character set. | 
 | 767 |  * The domain name may contain several labels, separated by dots.  The | 
 | 768 |  * output buffer must be deallocated by the caller. | 
 | 769 |  * | 
 | 770 |  * Return value: Returns IDNA_SUCCESS on success, or error code. | 
 | 771 |  **/ | 
 | 772 | int | 
 | 773 | idna_to_unicode_lzlz (const char *input, char **output, int flags) | 
 | 774 | { | 
 | 775 |   char *utf8; | 
 | 776 |   int rc; | 
 | 777 |  | 
 | 778 |   utf8 = stringprep_locale_to_utf8 (input); | 
 | 779 |   if (!utf8) | 
 | 780 |     return IDNA_ICONV_ERROR; | 
 | 781 |  | 
 | 782 |   rc = idna_to_unicode_8zlz (utf8, output, flags); | 
 | 783 |   free (utf8); | 
 | 784 |  | 
 | 785 |   return rc; | 
 | 786 | } | 
 | 787 |  | 
 | 788 | /** | 
 | 789 |  * IDNA_ACE_PREFIX | 
 | 790 |  * | 
 | 791 |  * The IANA allocated prefix to use for IDNA. "xn--" | 
 | 792 |  */ | 
 | 793 |  | 
 | 794 | /** | 
 | 795 |  * Idna_rc: | 
 | 796 |  * @IDNA_SUCCESS: Successful operation.  This value is guaranteed to | 
 | 797 |  *   always be zero, the remaining ones are only guaranteed to hold | 
 | 798 |  *   non-zero values, for logical comparison purposes. | 
 | 799 |  * @IDNA_STRINGPREP_ERROR:  Error during string preparation. | 
 | 800 |  * @IDNA_PUNYCODE_ERROR: Error during punycode operation. | 
 | 801 |  * @IDNA_CONTAINS_NON_LDH: For IDNA_USE_STD3_ASCII_RULES, indicate that | 
 | 802 |  *   the string contains non-LDH ASCII characters. | 
 | 803 |  * @IDNA_CONTAINS_MINUS: For IDNA_USE_STD3_ASCII_RULES, indicate that | 
 | 804 |  *   the string contains a leading or trailing hyphen-minus (U+002D). | 
 | 805 |  * @IDNA_INVALID_LENGTH: The final output string is not within the | 
 | 806 |  *   (inclusive) range 1 to 63 characters. | 
 | 807 |  * @IDNA_NO_ACE_PREFIX: The string does not contain the ACE prefix | 
 | 808 |  *   (for ToUnicode). | 
 | 809 |  * @IDNA_ROUNDTRIP_VERIFY_ERROR: The ToASCII operation on output | 
 | 810 |  *   string does not equal the input. | 
 | 811 |  * @IDNA_CONTAINS_ACE_PREFIX: The input contains the ACE prefix (for | 
 | 812 |  *   ToASCII). | 
 | 813 |  * @IDNA_ICONV_ERROR: Could not convert string in locale encoding. | 
 | 814 |  * @IDNA_MALLOC_ERROR: Could not allocate buffer (this is typically a | 
 | 815 |  *   fatal error). | 
 | 816 |  * @IDNA_DLOPEN_ERROR: Could not dlopen the libcidn DSO (only used | 
 | 817 |  *   internally in libc). | 
 | 818 |  * | 
 | 819 |  * Enumerated return codes of idna_to_ascii_4i(), | 
 | 820 |  * idna_to_unicode_44i() functions (and functions derived from those | 
 | 821 |  * functions).  The value 0 is guaranteed to always correspond to | 
 | 822 |  * success. | 
 | 823 |  */ | 
 | 824 |  | 
 | 825 |  | 
 | 826 | /** | 
 | 827 |  * Idna_flags: | 
 | 828 |  * @IDNA_ALLOW_UNASSIGNED: Don't reject strings containing unassigned | 
 | 829 |  *   Unicode code points. | 
 | 830 |  * @IDNA_USE_STD3_ASCII_RULES: Validate strings according to STD3 | 
 | 831 |  *   rules (i.e., normal host name rules). | 
 | 832 |  * | 
 | 833 |  * Flags to pass to idna_to_ascii_4i(), idna_to_unicode_44i() etc. | 
 | 834 |  */ |