| lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame] | 1 | /* idna.c	Convert to or from IDN strings. | 
|  | 2 | * Copyright (C) 2002, 2003, 2004, 2011  Simon Josefsson | 
|  | 3 | * | 
|  | 4 | * This file is part of GNU Libidn. | 
|  | 5 | * | 
|  | 6 | * GNU Libidn is free software; you can redistribute it and/or | 
|  | 7 | * modify it under the terms of the GNU Lesser General Public | 
|  | 8 | * License as published by the Free Software Foundation; either | 
|  | 9 | * version 2.1 of the License, or (at your option) any later version. | 
|  | 10 | * | 
|  | 11 | * GNU Libidn is distributed in the hope that it will be useful, | 
|  | 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|  | 14 | * Lesser General Public License for more details. | 
|  | 15 | * | 
|  | 16 | * You should have received a copy of the GNU Lesser General Public | 
|  | 17 | * License along with GNU Libidn; if not, see <http://www.gnu.org/licenses/>. | 
|  | 18 | */ | 
|  | 19 |  | 
|  | 20 | #if HAVE_CONFIG_H | 
|  | 21 | # include "config.h" | 
|  | 22 | #endif | 
|  | 23 |  | 
|  | 24 | #include <stdlib.h> | 
|  | 25 | #include <string.h> | 
|  | 26 | #include <stringprep.h> | 
|  | 27 | #include <punycode.h> | 
|  | 28 | #include <stdint.h> | 
|  | 29 |  | 
|  | 30 | #include "idna.h" | 
|  | 31 |  | 
|  | 32 | #define DOTP(c) ((c) == 0x002E || (c) == 0x3002 ||	\ | 
|  | 33 | (c) == 0xFF0E || (c) == 0xFF61) | 
|  | 34 |  | 
|  | 35 | /* Core functions */ | 
|  | 36 |  | 
|  | 37 | /** | 
|  | 38 | * idna_to_ascii_4i | 
|  | 39 | * @in: input array with unicode code points. | 
|  | 40 | * @inlen: length of input array with unicode code points. | 
|  | 41 | * @out: output zero terminated string that must have room for at | 
|  | 42 | *       least 63 characters plus the terminating zero. | 
|  | 43 | * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. | 
|  | 44 | * | 
|  | 45 | * The ToASCII operation takes a sequence of Unicode code points that make | 
|  | 46 | * up one label and transforms it into a sequence of code points in the | 
|  | 47 | * ASCII range (0..7F). If ToASCII succeeds, the original sequence and the | 
|  | 48 | * resulting sequence are equivalent labels. | 
|  | 49 | * | 
|  | 50 | * It is important to note that the ToASCII operation can fail. ToASCII | 
|  | 51 | * fails if any step of it fails. If any step of the ToASCII operation | 
|  | 52 | * fails on any label in a domain name, that domain name MUST NOT be used | 
|  | 53 | * as an internationalized domain name. The method for deadling with this | 
|  | 54 | * failure is application-specific. | 
|  | 55 | * | 
|  | 56 | * The inputs to ToASCII are a sequence of code points, the AllowUnassigned | 
|  | 57 | * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a | 
|  | 58 | * sequence of ASCII code points or a failure condition. | 
|  | 59 | * | 
|  | 60 | * ToASCII never alters a sequence of code points that are all in the ASCII | 
|  | 61 | * range to begin with (although it could fail). Applying the ToASCII | 
|  | 62 | * operation multiple times has exactly the same effect as applying it just | 
|  | 63 | * once. | 
|  | 64 | * | 
|  | 65 | * Return value: Returns 0 on success, or an error code. | 
|  | 66 | */ | 
|  | 67 | int | 
|  | 68 | idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags) | 
|  | 69 | { | 
|  | 70 | size_t len, outlen; | 
|  | 71 | uint32_t *src;		/* XXX don't need to copy data? */ | 
|  | 72 | int rc; | 
|  | 73 |  | 
|  | 74 | /* | 
|  | 75 | * ToASCII consists of the following steps: | 
|  | 76 | * | 
|  | 77 | * 1. If all code points in the sequence are in the ASCII range (0..7F) | 
|  | 78 | * then skip to step 3. | 
|  | 79 | */ | 
|  | 80 |  | 
|  | 81 | { | 
|  | 82 | size_t i; | 
|  | 83 | int inasciirange; | 
|  | 84 |  | 
|  | 85 | inasciirange = 1; | 
|  | 86 | for (i = 0; i < inlen; i++) | 
|  | 87 | if (in[i] > 0x7F) | 
|  | 88 | inasciirange = 0; | 
|  | 89 | if (inasciirange) | 
|  | 90 | { | 
|  | 91 | src = malloc (sizeof (in[0]) * (inlen + 1)); | 
|  | 92 | if (src == NULL) | 
|  | 93 | return IDNA_MALLOC_ERROR; | 
|  | 94 |  | 
|  | 95 | memcpy (src, in, sizeof (in[0]) * inlen); | 
|  | 96 | src[inlen] = 0; | 
|  | 97 |  | 
|  | 98 | goto step3; | 
|  | 99 | } | 
|  | 100 | } | 
|  | 101 |  | 
|  | 102 | /* | 
|  | 103 | * 2. Perform the steps specified in [NAMEPREP] and fail if there is | 
|  | 104 | * an error. The AllowUnassigned flag is used in [NAMEPREP]. | 
|  | 105 | */ | 
|  | 106 |  | 
|  | 107 | { | 
|  | 108 | char *p; | 
|  | 109 |  | 
|  | 110 | p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL); | 
|  | 111 | if (p == NULL) | 
|  | 112 | return IDNA_MALLOC_ERROR; | 
|  | 113 |  | 
|  | 114 | len = strlen (p); | 
|  | 115 | do | 
|  | 116 | { | 
|  | 117 | char *newp; | 
|  | 118 |  | 
|  | 119 | len = 2 * len + 10;	/* XXX better guess? */ | 
|  | 120 | newp = realloc (p, len); | 
|  | 121 | if (newp == NULL) | 
|  | 122 | { | 
|  | 123 | free (p); | 
|  | 124 | return IDNA_MALLOC_ERROR; | 
|  | 125 | } | 
|  | 126 | p = newp; | 
|  | 127 |  | 
|  | 128 | if (flags & IDNA_ALLOW_UNASSIGNED) | 
|  | 129 | rc = stringprep_nameprep (p, len); | 
|  | 130 | else | 
|  | 131 | rc = stringprep_nameprep_no_unassigned (p, len); | 
|  | 132 | } | 
|  | 133 | while (rc == STRINGPREP_TOO_SMALL_BUFFER); | 
|  | 134 |  | 
|  | 135 | if (rc != STRINGPREP_OK) | 
|  | 136 | { | 
|  | 137 | free (p); | 
|  | 138 | return IDNA_STRINGPREP_ERROR; | 
|  | 139 | } | 
|  | 140 |  | 
|  | 141 | src = stringprep_utf8_to_ucs4 (p, -1, NULL); | 
|  | 142 |  | 
|  | 143 | free (p); | 
|  | 144 | } | 
|  | 145 |  | 
|  | 146 | step3: | 
|  | 147 | /* | 
|  | 148 | * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks: | 
|  | 149 | * | 
|  | 150 | * (a) Verify the absence of non-LDH ASCII code points; that is, | 
|  | 151 | * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F. | 
|  | 152 | * | 
|  | 153 | * (b) Verify the absence of leading and trailing hyphen-minus; | 
|  | 154 | * that is, the absence of U+002D at the beginning and end of | 
|  | 155 | * the sequence. | 
|  | 156 | */ | 
|  | 157 |  | 
|  | 158 | if (flags & IDNA_USE_STD3_ASCII_RULES) | 
|  | 159 | { | 
|  | 160 | size_t i; | 
|  | 161 |  | 
|  | 162 | for (i = 0; src[i]; i++) | 
|  | 163 | if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F || | 
|  | 164 | (src[i] >= 0x3A && src[i] <= 0x40) || | 
|  | 165 | (src[i] >= 0x5B && src[i] <= 0x60) || | 
|  | 166 | (src[i] >= 0x7B && src[i] <= 0x7F)) | 
|  | 167 | { | 
|  | 168 | free (src); | 
|  | 169 | return IDNA_CONTAINS_NON_LDH; | 
|  | 170 | } | 
|  | 171 |  | 
|  | 172 | if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D)) | 
|  | 173 | { | 
|  | 174 | free (src); | 
|  | 175 | return IDNA_CONTAINS_MINUS; | 
|  | 176 | } | 
|  | 177 | } | 
|  | 178 |  | 
|  | 179 | /* | 
|  | 180 | * 4. If all code points in the sequence are in the ASCII range | 
|  | 181 | * (0..7F), then skip to step 8. | 
|  | 182 | */ | 
|  | 183 |  | 
|  | 184 | { | 
|  | 185 | size_t i; | 
|  | 186 | int inasciirange; | 
|  | 187 |  | 
|  | 188 | inasciirange = 1; | 
|  | 189 | for (i = 0; src[i]; i++) | 
|  | 190 | { | 
|  | 191 | if (src[i] > 0x7F) | 
|  | 192 | inasciirange = 0; | 
|  | 193 | /* copy string to output buffer if we are about to skip to step8 */ | 
|  | 194 | if (i < 64) | 
|  | 195 | out[i] = src[i]; | 
|  | 196 | } | 
|  | 197 | if (i < 64) | 
|  | 198 | out[i] = '\0'; | 
|  | 199 | if (inasciirange) | 
|  | 200 | goto step8; | 
|  | 201 | } | 
|  | 202 |  | 
|  | 203 | /* | 
|  | 204 | * 5. Verify that the sequence does NOT begin with the ACE prefix. | 
|  | 205 | * | 
|  | 206 | */ | 
|  | 207 |  | 
|  | 208 | { | 
|  | 209 | size_t i; | 
|  | 210 | int match; | 
|  | 211 |  | 
|  | 212 | match = 1; | 
|  | 213 | for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++) | 
|  | 214 | if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i]) | 
|  | 215 | match = 0; | 
|  | 216 | if (match) | 
|  | 217 | { | 
|  | 218 | free (src); | 
|  | 219 | return IDNA_CONTAINS_ACE_PREFIX; | 
|  | 220 | } | 
|  | 221 | } | 
|  | 222 |  | 
|  | 223 | /* | 
|  | 224 | * 6. Encode the sequence using the encoding algorithm in [PUNYCODE] | 
|  | 225 | * and fail if there is an error. | 
|  | 226 | */ | 
|  | 227 | for (len = 0; src[len]; len++) | 
|  | 228 | ; | 
|  | 229 | src[len] = '\0'; | 
|  | 230 | outlen = 63 - strlen (IDNA_ACE_PREFIX); | 
|  | 231 | rc = punycode_encode (len, src, NULL, | 
|  | 232 | &outlen, &out[strlen (IDNA_ACE_PREFIX)]); | 
|  | 233 | if (rc != PUNYCODE_SUCCESS) | 
|  | 234 | { | 
|  | 235 | free (src); | 
|  | 236 | return IDNA_PUNYCODE_ERROR; | 
|  | 237 | } | 
|  | 238 | out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0'; | 
|  | 239 |  | 
|  | 240 | /* | 
|  | 241 | * 7. Prepend the ACE prefix. | 
|  | 242 | */ | 
|  | 243 |  | 
|  | 244 | memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX)); | 
|  | 245 |  | 
|  | 246 | /* | 
|  | 247 | * 8. Verify that the number of code points is in the range 1 to 63 | 
|  | 248 | * inclusive (0 is excluded). | 
|  | 249 | */ | 
|  | 250 |  | 
|  | 251 | step8: | 
|  | 252 | free (src); | 
|  | 253 | if (strlen (out) < 1 || strlen (out) > 63) | 
|  | 254 | return IDNA_INVALID_LENGTH; | 
|  | 255 |  | 
|  | 256 | return IDNA_SUCCESS; | 
|  | 257 | } | 
|  | 258 |  | 
|  | 259 | /* ToUnicode().  May realloc() utf8in. */ | 
|  | 260 | static int | 
|  | 261 | idna_to_unicode_internal (char *utf8in, | 
|  | 262 | uint32_t * out, size_t * outlen, int flags) | 
|  | 263 | { | 
|  | 264 | int rc; | 
|  | 265 | char tmpout[64]; | 
|  | 266 | size_t utf8len = strlen (utf8in) + 1; | 
|  | 267 | size_t addlen = 0; | 
|  | 268 |  | 
|  | 269 | /* | 
|  | 270 | * ToUnicode consists of the following steps: | 
|  | 271 | * | 
|  | 272 | * 1. If the sequence contains any code points outside the ASCII range | 
|  | 273 | * (0..7F) then proceed to step 2, otherwise skip to step 3. | 
|  | 274 | */ | 
|  | 275 |  | 
|  | 276 | { | 
|  | 277 | size_t i; | 
|  | 278 | int inasciirange; | 
|  | 279 |  | 
|  | 280 | inasciirange = 1; | 
|  | 281 | for (i = 0; utf8in[i]; i++) | 
|  | 282 | if (utf8in[i] & ~0x7F) | 
|  | 283 | inasciirange = 0; | 
|  | 284 | if (inasciirange) | 
|  | 285 | goto step3; | 
|  | 286 | } | 
|  | 287 |  | 
|  | 288 | /* | 
|  | 289 | * 2. Perform the steps specified in [NAMEPREP] and fail if there is an | 
|  | 290 | * error. (If step 3 of ToASCII is also performed here, it will not | 
|  | 291 | * affect the overall behavior of ToUnicode, but it is not | 
|  | 292 | * necessary.) The AllowUnassigned flag is used in [NAMEPREP]. | 
|  | 293 | */ | 
|  | 294 | do | 
|  | 295 | { | 
|  | 296 | char *newp = realloc (utf8in, utf8len + addlen); | 
|  | 297 | if (newp == NULL) | 
|  | 298 | { | 
|  | 299 | free (utf8in); | 
|  | 300 | return IDNA_MALLOC_ERROR; | 
|  | 301 | } | 
|  | 302 | utf8in = newp; | 
|  | 303 | if (flags & IDNA_ALLOW_UNASSIGNED) | 
|  | 304 | rc = stringprep_nameprep (utf8in, utf8len + addlen); | 
|  | 305 | else | 
|  | 306 | rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen); | 
|  | 307 | addlen += 1; | 
|  | 308 | } | 
|  | 309 | while (rc == STRINGPREP_TOO_SMALL_BUFFER); | 
|  | 310 |  | 
|  | 311 | if (rc != STRINGPREP_OK) | 
|  | 312 | { | 
|  | 313 | free (utf8in); | 
|  | 314 | return IDNA_STRINGPREP_ERROR; | 
|  | 315 | } | 
|  | 316 |  | 
|  | 317 | /* 3. Verify that the sequence begins with the ACE prefix, and save a | 
|  | 318 | * copy of the sequence. | 
|  | 319 | */ | 
|  | 320 |  | 
|  | 321 | step3: | 
|  | 322 | if (memcmp (IDNA_ACE_PREFIX, utf8in, strlen (IDNA_ACE_PREFIX)) != 0) | 
|  | 323 | { | 
|  | 324 | free (utf8in); | 
|  | 325 | return IDNA_NO_ACE_PREFIX; | 
|  | 326 | } | 
|  | 327 |  | 
|  | 328 | /* 4. Remove the ACE prefix. | 
|  | 329 | */ | 
|  | 330 |  | 
|  | 331 | memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)], | 
|  | 332 | strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1); | 
|  | 333 |  | 
|  | 334 | /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE] | 
|  | 335 | * and fail if there is an error. Save a copy of the result of | 
|  | 336 | * this step. | 
|  | 337 | */ | 
|  | 338 |  | 
|  | 339 | (*outlen)--;			/* reserve one for the zero */ | 
|  | 340 |  | 
|  | 341 | rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL); | 
|  | 342 | if (rc != PUNYCODE_SUCCESS) | 
|  | 343 | { | 
|  | 344 | free (utf8in); | 
|  | 345 | return IDNA_PUNYCODE_ERROR; | 
|  | 346 | } | 
|  | 347 |  | 
|  | 348 | out[*outlen] = 0;		/* add zero */ | 
|  | 349 |  | 
|  | 350 | /* 6. Apply ToASCII. | 
|  | 351 | */ | 
|  | 352 |  | 
|  | 353 | rc = idna_to_ascii_4i (out, *outlen, tmpout, flags); | 
|  | 354 | if (rc != IDNA_SUCCESS) | 
|  | 355 | { | 
|  | 356 | free (utf8in); | 
|  | 357 | return rc; | 
|  | 358 | } | 
|  | 359 |  | 
|  | 360 | /* 7. Verify that the result of step 6 matches the saved copy from | 
|  | 361 | * step 3, using a case-insensitive ASCII comparison. | 
|  | 362 | */ | 
|  | 363 |  | 
|  | 364 | if (strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0) | 
|  | 365 | { | 
|  | 366 | free (utf8in); | 
|  | 367 | return IDNA_ROUNDTRIP_VERIFY_ERROR; | 
|  | 368 | } | 
|  | 369 |  | 
|  | 370 | /* 8. Return the saved copy from step 5. | 
|  | 371 | */ | 
|  | 372 |  | 
|  | 373 | free (utf8in); | 
|  | 374 | return IDNA_SUCCESS; | 
|  | 375 | } | 
|  | 376 |  | 
|  | 377 | /** | 
|  | 378 | * idna_to_unicode_44i | 
|  | 379 | * @in: input array with unicode code points. | 
|  | 380 | * @inlen: length of input array with unicode code points. | 
|  | 381 | * @out: output array with unicode code points. | 
|  | 382 | * @outlen: on input, maximum size of output array with unicode code points, | 
|  | 383 | *          on exit, actual size of output array with unicode code points. | 
|  | 384 | * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. | 
|  | 385 | * | 
|  | 386 | * The ToUnicode operation takes a sequence of Unicode code points | 
|  | 387 | * that make up one label and returns a sequence of Unicode code | 
|  | 388 | * points. If the input sequence is a label in ACE form, then the | 
|  | 389 | * result is an equivalent internationalized label that is not in ACE | 
|  | 390 | * form, otherwise the original sequence is returned unaltered. | 
|  | 391 | * | 
|  | 392 | * ToUnicode never fails. If any step fails, then the original input | 
|  | 393 | * sequence is returned immediately in that step. | 
|  | 394 | * | 
|  | 395 | * The Punycode decoder can never output more code points than it | 
|  | 396 | * inputs, but Nameprep can, and therefore ToUnicode can.  Note that | 
|  | 397 | * the number of octets needed to represent a sequence of code points | 
|  | 398 | * depends on the particular character encoding used. | 
|  | 399 | * | 
|  | 400 | * The inputs to ToUnicode are a sequence of code points, the | 
|  | 401 | * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of | 
|  | 402 | * ToUnicode is always a sequence of Unicode code points. | 
|  | 403 | * | 
|  | 404 | * Return value: Returns error condition, but it must only be used for | 
|  | 405 | *               debugging purposes.  The output buffer is always | 
|  | 406 | *               guaranteed to contain the correct data according to | 
|  | 407 | *               the specification (sans malloc induced errors).  NB! | 
|  | 408 | *               This means that you normally ignore the return code | 
|  | 409 | *               from this function, as checking it means breaking the | 
|  | 410 | *               standard. | 
|  | 411 | */ | 
|  | 412 | int | 
|  | 413 | idna_to_unicode_44i (const uint32_t * in, size_t inlen, | 
|  | 414 | uint32_t * out, size_t * outlen, int flags) | 
|  | 415 | { | 
|  | 416 | int rc; | 
|  | 417 | size_t outlensave = *outlen; | 
|  | 418 | char *p; | 
|  | 419 |  | 
|  | 420 | p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL); | 
|  | 421 | if (p == NULL) | 
|  | 422 | return IDNA_MALLOC_ERROR; | 
|  | 423 |  | 
|  | 424 | rc = idna_to_unicode_internal (p, out, outlen, flags); | 
|  | 425 | if (rc != IDNA_SUCCESS) | 
|  | 426 | { | 
|  | 427 | memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ? | 
|  | 428 | inlen : outlensave)); | 
|  | 429 | *outlen = inlen; | 
|  | 430 | } | 
|  | 431 |  | 
|  | 432 | /* p is freed in idna_to_unicode_internal.  */ | 
|  | 433 |  | 
|  | 434 | return rc; | 
|  | 435 | } | 
|  | 436 |  | 
|  | 437 | /* Wrappers that handle several labels */ | 
|  | 438 |  | 
|  | 439 | /** | 
|  | 440 | * idna_to_ascii_4z: | 
|  | 441 | * @input: zero terminated input Unicode string. | 
|  | 442 | * @output: pointer to newly allocated output string. | 
|  | 443 | * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. | 
|  | 444 | * | 
|  | 445 | * Convert UCS-4 domain name to ASCII string.  The domain name may | 
|  | 446 | * contain several labels, separated by dots.  The output buffer must | 
|  | 447 | * be deallocated by the caller. | 
|  | 448 | * | 
|  | 449 | * Return value: Returns IDNA_SUCCESS on success, or error code. | 
|  | 450 | **/ | 
|  | 451 | int | 
|  | 452 | idna_to_ascii_4z (const uint32_t * input, char **output, int flags) | 
|  | 453 | { | 
|  | 454 | const uint32_t *start = input; | 
|  | 455 | const uint32_t *end = input; | 
|  | 456 | char buf[64]; | 
|  | 457 | char *out = NULL; | 
|  | 458 | int rc; | 
|  | 459 |  | 
|  | 460 | /* 1) Whenever dots are used as label separators, the following | 
|  | 461 | characters MUST be recognized as dots: U+002E (full stop), | 
|  | 462 | U+3002 (ideographic full stop), U+FF0E (fullwidth full stop), | 
|  | 463 | U+FF61 (halfwidth ideographic full stop). */ | 
|  | 464 |  | 
|  | 465 | if (input[0] == 0) | 
|  | 466 | { | 
|  | 467 | /* Handle implicit zero-length root label. */ | 
|  | 468 | *output = malloc (1); | 
|  | 469 | if (!*output) | 
|  | 470 | return IDNA_MALLOC_ERROR; | 
|  | 471 | strcpy (*output, ""); | 
|  | 472 | return IDNA_SUCCESS; | 
|  | 473 | } | 
|  | 474 |  | 
|  | 475 | if (DOTP (input[0]) && input[1] == 0) | 
|  | 476 | { | 
|  | 477 | /* Handle explicit zero-length root label. */ | 
|  | 478 | *output = malloc (2); | 
|  | 479 | if (!*output) | 
|  | 480 | return IDNA_MALLOC_ERROR; | 
|  | 481 | strcpy (*output, "."); | 
|  | 482 | return IDNA_SUCCESS; | 
|  | 483 | } | 
|  | 484 |  | 
|  | 485 | *output = NULL; | 
|  | 486 | do | 
|  | 487 | { | 
|  | 488 | end = start; | 
|  | 489 |  | 
|  | 490 | for (; *end && !DOTP (*end); end++) | 
|  | 491 | ; | 
|  | 492 |  | 
|  | 493 | if (*end == '\0' && start == end) | 
|  | 494 | { | 
|  | 495 | /* Handle explicit zero-length root label. */ | 
|  | 496 | buf[0] = '\0'; | 
|  | 497 | } | 
|  | 498 | else | 
|  | 499 | { | 
|  | 500 | rc = idna_to_ascii_4i (start, end - start, buf, flags); | 
|  | 501 | if (rc != IDNA_SUCCESS) | 
|  | 502 | return rc; | 
|  | 503 | } | 
|  | 504 |  | 
|  | 505 | if (out) | 
|  | 506 | { | 
|  | 507 | char *newp = realloc (out, strlen (out) + 1 + strlen (buf) + 1); | 
|  | 508 | if (!newp) | 
|  | 509 | { | 
|  | 510 | free (out); | 
|  | 511 | return IDNA_MALLOC_ERROR; | 
|  | 512 | } | 
|  | 513 | out = newp; | 
|  | 514 | strcat (out, "."); | 
|  | 515 | strcat (out, buf); | 
|  | 516 | } | 
|  | 517 | else | 
|  | 518 | { | 
|  | 519 | out = (char *) malloc (strlen (buf) + 1); | 
|  | 520 | if (!out) | 
|  | 521 | return IDNA_MALLOC_ERROR; | 
|  | 522 | strcpy (out, buf); | 
|  | 523 | } | 
|  | 524 |  | 
|  | 525 | start = end + 1; | 
|  | 526 | } | 
|  | 527 | while (*end); | 
|  | 528 |  | 
|  | 529 | *output = out; | 
|  | 530 |  | 
|  | 531 | return IDNA_SUCCESS; | 
|  | 532 | } | 
|  | 533 |  | 
|  | 534 | /** | 
|  | 535 | * idna_to_ascii_8z: | 
|  | 536 | * @input: zero terminated input UTF-8 string. | 
|  | 537 | * @output: pointer to newly allocated output string. | 
|  | 538 | * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. | 
|  | 539 | * | 
|  | 540 | * Convert UTF-8 domain name to ASCII string.  The domain name may | 
|  | 541 | * contain several labels, separated by dots.  The output buffer must | 
|  | 542 | * be deallocated by the caller. | 
|  | 543 | * | 
|  | 544 | * Return value: Returns IDNA_SUCCESS on success, or error code. | 
|  | 545 | **/ | 
|  | 546 | int | 
|  | 547 | idna_to_ascii_8z (const char *input, char **output, int flags) | 
|  | 548 | { | 
|  | 549 | uint32_t *ucs4; | 
|  | 550 | size_t ucs4len; | 
|  | 551 | int rc; | 
|  | 552 |  | 
|  | 553 | ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len); | 
|  | 554 | if (!ucs4) | 
|  | 555 | return IDNA_ICONV_ERROR; | 
|  | 556 |  | 
|  | 557 | rc = idna_to_ascii_4z (ucs4, output, flags); | 
|  | 558 |  | 
|  | 559 | free (ucs4); | 
|  | 560 |  | 
|  | 561 | return rc; | 
|  | 562 |  | 
|  | 563 | } | 
|  | 564 |  | 
|  | 565 | /** | 
|  | 566 | * idna_to_ascii_lz: | 
|  | 567 | * @input: zero terminated input UTF-8 string. | 
|  | 568 | * @output: pointer to newly allocated output string. | 
|  | 569 | * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. | 
|  | 570 | * | 
|  | 571 | * Convert domain name in the locale's encoding to ASCII string.  The | 
|  | 572 | * domain name may contain several labels, separated by dots.  The | 
|  | 573 | * output buffer must be deallocated by the caller. | 
|  | 574 | * | 
|  | 575 | * Return value: Returns IDNA_SUCCESS on success, or error code. | 
|  | 576 | **/ | 
|  | 577 | int | 
|  | 578 | idna_to_ascii_lz (const char *input, char **output, int flags) | 
|  | 579 | { | 
|  | 580 | char *utf8; | 
|  | 581 | int rc; | 
|  | 582 |  | 
|  | 583 | utf8 = stringprep_locale_to_utf8 (input); | 
|  | 584 | if (!utf8) | 
|  | 585 | return IDNA_ICONV_ERROR; | 
|  | 586 |  | 
|  | 587 | rc = idna_to_ascii_8z (utf8, output, flags); | 
|  | 588 |  | 
|  | 589 | free (utf8); | 
|  | 590 |  | 
|  | 591 | return rc; | 
|  | 592 | } | 
|  | 593 |  | 
|  | 594 | /** | 
|  | 595 | * idna_to_unicode_4z4z: | 
|  | 596 | * @input: zero-terminated Unicode string. | 
|  | 597 | * @output: pointer to newly allocated output Unicode string. | 
|  | 598 | * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. | 
|  | 599 | * | 
|  | 600 | * Convert possibly ACE encoded domain name in UCS-4 format into a | 
|  | 601 | * UCS-4 string.  The domain name may contain several labels, | 
|  | 602 | * separated by dots.  The output buffer must be deallocated by the | 
|  | 603 | * caller. | 
|  | 604 | * | 
|  | 605 | * Return value: Returns IDNA_SUCCESS on success, or error code. | 
|  | 606 | **/ | 
|  | 607 | int | 
|  | 608 | idna_to_unicode_4z4z (const uint32_t * input, uint32_t ** output, int flags) | 
|  | 609 | { | 
|  | 610 | const uint32_t *start = input; | 
|  | 611 | const uint32_t *end = input; | 
|  | 612 | uint32_t *buf; | 
|  | 613 | size_t buflen; | 
|  | 614 | uint32_t *out = NULL; | 
|  | 615 | size_t outlen = 0; | 
|  | 616 |  | 
|  | 617 | *output = NULL; | 
|  | 618 |  | 
|  | 619 | do | 
|  | 620 | { | 
|  | 621 | end = start; | 
|  | 622 |  | 
|  | 623 | for (; *end && !DOTP (*end); end++) | 
|  | 624 | ; | 
|  | 625 |  | 
|  | 626 | buflen = end - start; | 
|  | 627 | buf = malloc (sizeof (buf[0]) * (buflen + 1)); | 
|  | 628 | if (!buf) | 
|  | 629 | return IDNA_MALLOC_ERROR; | 
|  | 630 |  | 
|  | 631 | idna_to_unicode_44i (start, end - start, buf, &buflen, flags); | 
|  | 632 | /* don't check return value as per specification! */ | 
|  | 633 |  | 
|  | 634 | if (out) | 
|  | 635 | { | 
|  | 636 | uint32_t *newp = realloc (out, | 
|  | 637 | sizeof (out[0]) | 
|  | 638 | * (outlen + 1 + buflen + 1)); | 
|  | 639 | if (!newp) | 
|  | 640 | { | 
|  | 641 | free (buf); | 
|  | 642 | free (out); | 
|  | 643 | return IDNA_MALLOC_ERROR; | 
|  | 644 | } | 
|  | 645 | out = newp; | 
|  | 646 | out[outlen++] = 0x002E;	/* '.' (full stop) */ | 
|  | 647 | memcpy (out + outlen, buf, sizeof (buf[0]) * buflen); | 
|  | 648 | outlen += buflen; | 
|  | 649 | out[outlen] = 0x0; | 
|  | 650 | free (buf); | 
|  | 651 | } | 
|  | 652 | else | 
|  | 653 | { | 
|  | 654 | out = buf; | 
|  | 655 | outlen = buflen; | 
|  | 656 | out[outlen] = 0x0; | 
|  | 657 | } | 
|  | 658 |  | 
|  | 659 | start = end + 1; | 
|  | 660 | } | 
|  | 661 | while (*end); | 
|  | 662 |  | 
|  | 663 | *output = out; | 
|  | 664 |  | 
|  | 665 | return IDNA_SUCCESS; | 
|  | 666 | } | 
|  | 667 |  | 
|  | 668 | /** | 
|  | 669 | * idna_to_unicode_8z4z: | 
|  | 670 | * @input: zero-terminated UTF-8 string. | 
|  | 671 | * @output: pointer to newly allocated output Unicode string. | 
|  | 672 | * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. | 
|  | 673 | * | 
|  | 674 | * Convert possibly ACE encoded domain name in UTF-8 format into a | 
|  | 675 | * UCS-4 string.  The domain name may contain several labels, | 
|  | 676 | * separated by dots.  The output buffer must be deallocated by the | 
|  | 677 | * caller. | 
|  | 678 | * | 
|  | 679 | * Return value: Returns IDNA_SUCCESS on success, or error code. | 
|  | 680 | **/ | 
|  | 681 | int | 
|  | 682 | idna_to_unicode_8z4z (const char *input, uint32_t ** output, int flags) | 
|  | 683 | { | 
|  | 684 | uint32_t *ucs4; | 
|  | 685 | size_t ucs4len; | 
|  | 686 | int rc; | 
|  | 687 |  | 
|  | 688 | ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len); | 
|  | 689 | if (!ucs4) | 
|  | 690 | return IDNA_ICONV_ERROR; | 
|  | 691 |  | 
|  | 692 | rc = idna_to_unicode_4z4z (ucs4, output, flags); | 
|  | 693 | free (ucs4); | 
|  | 694 |  | 
|  | 695 | return rc; | 
|  | 696 | } | 
|  | 697 |  | 
|  | 698 | /** | 
|  | 699 | * idna_to_unicode_8z8z: | 
|  | 700 | * @input: zero-terminated UTF-8 string. | 
|  | 701 | * @output: pointer to newly allocated output UTF-8 string. | 
|  | 702 | * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. | 
|  | 703 | * | 
|  | 704 | * Convert possibly ACE encoded domain name in UTF-8 format into a | 
|  | 705 | * UTF-8 string.  The domain name may contain several labels, | 
|  | 706 | * separated by dots.  The output buffer must be deallocated by the | 
|  | 707 | * caller. | 
|  | 708 | * | 
|  | 709 | * Return value: Returns IDNA_SUCCESS on success, or error code. | 
|  | 710 | **/ | 
|  | 711 | int | 
|  | 712 | idna_to_unicode_8z8z (const char *input, char **output, int flags) | 
|  | 713 | { | 
|  | 714 | uint32_t *ucs4; | 
|  | 715 | int rc; | 
|  | 716 |  | 
|  | 717 | rc = idna_to_unicode_8z4z (input, &ucs4, flags); | 
|  | 718 | *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL); | 
|  | 719 | free (ucs4); | 
|  | 720 |  | 
|  | 721 | if (!*output) | 
|  | 722 | return IDNA_ICONV_ERROR; | 
|  | 723 |  | 
|  | 724 | return rc; | 
|  | 725 | } | 
|  | 726 |  | 
|  | 727 | /** | 
|  | 728 | * idna_to_unicode_8zlz: | 
|  | 729 | * @input: zero-terminated UTF-8 string. | 
|  | 730 | * @output: pointer to newly allocated output string encoded in the | 
|  | 731 | *   current locale's character set. | 
|  | 732 | * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. | 
|  | 733 | * | 
|  | 734 | * Convert possibly ACE encoded domain name in UTF-8 format into a | 
|  | 735 | * string encoded in the current locale's character set.  The domain | 
|  | 736 | * name may contain several labels, separated by dots.  The output | 
|  | 737 | * buffer must be deallocated by the caller. | 
|  | 738 | * | 
|  | 739 | * Return value: Returns IDNA_SUCCESS on success, or error code. | 
|  | 740 | **/ | 
|  | 741 | int | 
|  | 742 | idna_to_unicode_8zlz (const char *input, char **output, int flags) | 
|  | 743 | { | 
|  | 744 | char *utf8; | 
|  | 745 | int rc; | 
|  | 746 |  | 
|  | 747 | rc = idna_to_unicode_8z8z (input, &utf8, flags); | 
|  | 748 | *output = stringprep_utf8_to_locale (utf8); | 
|  | 749 | free (utf8); | 
|  | 750 |  | 
|  | 751 | if (!*output) | 
|  | 752 | return IDNA_ICONV_ERROR; | 
|  | 753 |  | 
|  | 754 | return rc; | 
|  | 755 | } | 
|  | 756 |  | 
|  | 757 | /** | 
|  | 758 | * idna_to_unicode_lzlz: | 
|  | 759 | * @input: zero-terminated string encoded in the current locale's | 
|  | 760 | *   character set. | 
|  | 761 | * @output: pointer to newly allocated output string encoded in the | 
|  | 762 | *   current locale's character set. | 
|  | 763 | * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES. | 
|  | 764 | * | 
|  | 765 | * Convert possibly ACE encoded domain name in the locale's character | 
|  | 766 | * set into a string encoded in the current locale's character set. | 
|  | 767 | * The domain name may contain several labels, separated by dots.  The | 
|  | 768 | * output buffer must be deallocated by the caller. | 
|  | 769 | * | 
|  | 770 | * Return value: Returns IDNA_SUCCESS on success, or error code. | 
|  | 771 | **/ | 
|  | 772 | int | 
|  | 773 | idna_to_unicode_lzlz (const char *input, char **output, int flags) | 
|  | 774 | { | 
|  | 775 | char *utf8; | 
|  | 776 | int rc; | 
|  | 777 |  | 
|  | 778 | utf8 = stringprep_locale_to_utf8 (input); | 
|  | 779 | if (!utf8) | 
|  | 780 | return IDNA_ICONV_ERROR; | 
|  | 781 |  | 
|  | 782 | rc = idna_to_unicode_8zlz (utf8, output, flags); | 
|  | 783 | free (utf8); | 
|  | 784 |  | 
|  | 785 | return rc; | 
|  | 786 | } | 
|  | 787 |  | 
|  | 788 | /** | 
|  | 789 | * IDNA_ACE_PREFIX | 
|  | 790 | * | 
|  | 791 | * The IANA allocated prefix to use for IDNA. "xn--" | 
|  | 792 | */ | 
|  | 793 |  | 
|  | 794 | /** | 
|  | 795 | * Idna_rc: | 
|  | 796 | * @IDNA_SUCCESS: Successful operation.  This value is guaranteed to | 
|  | 797 | *   always be zero, the remaining ones are only guaranteed to hold | 
|  | 798 | *   non-zero values, for logical comparison purposes. | 
|  | 799 | * @IDNA_STRINGPREP_ERROR:  Error during string preparation. | 
|  | 800 | * @IDNA_PUNYCODE_ERROR: Error during punycode operation. | 
|  | 801 | * @IDNA_CONTAINS_NON_LDH: For IDNA_USE_STD3_ASCII_RULES, indicate that | 
|  | 802 | *   the string contains non-LDH ASCII characters. | 
|  | 803 | * @IDNA_CONTAINS_MINUS: For IDNA_USE_STD3_ASCII_RULES, indicate that | 
|  | 804 | *   the string contains a leading or trailing hyphen-minus (U+002D). | 
|  | 805 | * @IDNA_INVALID_LENGTH: The final output string is not within the | 
|  | 806 | *   (inclusive) range 1 to 63 characters. | 
|  | 807 | * @IDNA_NO_ACE_PREFIX: The string does not contain the ACE prefix | 
|  | 808 | *   (for ToUnicode). | 
|  | 809 | * @IDNA_ROUNDTRIP_VERIFY_ERROR: The ToASCII operation on output | 
|  | 810 | *   string does not equal the input. | 
|  | 811 | * @IDNA_CONTAINS_ACE_PREFIX: The input contains the ACE prefix (for | 
|  | 812 | *   ToASCII). | 
|  | 813 | * @IDNA_ICONV_ERROR: Could not convert string in locale encoding. | 
|  | 814 | * @IDNA_MALLOC_ERROR: Could not allocate buffer (this is typically a | 
|  | 815 | *   fatal error). | 
|  | 816 | * @IDNA_DLOPEN_ERROR: Could not dlopen the libcidn DSO (only used | 
|  | 817 | *   internally in libc). | 
|  | 818 | * | 
|  | 819 | * Enumerated return codes of idna_to_ascii_4i(), | 
|  | 820 | * idna_to_unicode_44i() functions (and functions derived from those | 
|  | 821 | * functions).  The value 0 is guaranteed to always correspond to | 
|  | 822 | * success. | 
|  | 823 | */ | 
|  | 824 |  | 
|  | 825 |  | 
|  | 826 | /** | 
|  | 827 | * Idna_flags: | 
|  | 828 | * @IDNA_ALLOW_UNASSIGNED: Don't reject strings containing unassigned | 
|  | 829 | *   Unicode code points. | 
|  | 830 | * @IDNA_USE_STD3_ASCII_RULES: Validate strings according to STD3 | 
|  | 831 | *   rules (i.e., normal host name rules). | 
|  | 832 | * | 
|  | 833 | * Flags to pass to idna_to_ascii_4i(), idna_to_unicode_44i() etc. | 
|  | 834 | */ |