| lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame] | 1 | /* punycode.h	Declarations for punycode functions. | 
|  | 2 | * Copyright (C) 2002, 2003  Simon Josefsson | 
|  | 3 | * | 
|  | 4 | * This file is part of GNU Libidn. | 
|  | 5 | * | 
|  | 6 | * GNU Libidn is free software; you can redistribute it and/or | 
|  | 7 | * modify it under the terms of the GNU Lesser General Public | 
|  | 8 | * License as published by the Free Software Foundation; either | 
|  | 9 | * version 2.1 of the License, or (at your option) any later version. | 
|  | 10 | * | 
|  | 11 | * GNU Libidn is distributed in the hope that it will be useful, | 
|  | 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|  | 14 | * Lesser General Public License for more details. | 
|  | 15 | * | 
|  | 16 | * You should have received a copy of the GNU Lesser General Public | 
|  | 17 | * License along with GNU Libidn; if not, see <http://www.gnu.org/licenses/>. | 
|  | 18 | */ | 
|  | 19 |  | 
|  | 20 | /* | 
|  | 21 | * This file is derived from RFC 3492bis written by Adam M. Costello. | 
|  | 22 | * | 
|  | 23 | * Disclaimer and license: Regarding this entire document or any | 
|  | 24 | * portion of it (including the pseudocode and C code), the author | 
|  | 25 | * makes no guarantees and is not responsible for any damage resulting | 
|  | 26 | * from its use.  The author grants irrevocable permission to anyone | 
|  | 27 | * to use, modify, and distribute it in any way that does not diminish | 
|  | 28 | * the rights of anyone else to use, modify, and distribute it, | 
|  | 29 | * provided that redistributed derivative works do not contain | 
|  | 30 | * misleading author or version information.  Derivative works need | 
|  | 31 | * not be licensed under similar terms. | 
|  | 32 | * | 
|  | 33 | * Copyright (C) The Internet Society (2003).  All Rights Reserved. | 
|  | 34 | * | 
|  | 35 | * This document and translations of it may be copied and furnished to | 
|  | 36 | * others, and derivative works that comment on or otherwise explain it | 
|  | 37 | * or assist in its implementation may be prepared, copied, published | 
|  | 38 | * and distributed, in whole or in part, without restriction of any | 
|  | 39 | * kind, provided that the above copyright notice and this paragraph are | 
|  | 40 | * included on all such copies and derivative works.  However, this | 
|  | 41 | * document itself may not be modified in any way, such as by removing | 
|  | 42 | * the copyright notice or references to the Internet Society or other | 
|  | 43 | * Internet organizations, except as needed for the purpose of | 
|  | 44 | * developing Internet standards in which case the procedures for | 
|  | 45 | * copyrights defined in the Internet Standards process must be | 
|  | 46 | * followed, or as required to translate it into languages other than | 
|  | 47 | * English. | 
|  | 48 | * | 
|  | 49 | * The limited permissions granted above are perpetual and will not be | 
|  | 50 | * revoked by the Internet Society or its successors or assigns. | 
|  | 51 | * | 
|  | 52 | * This document and the information contained herein is provided on an | 
|  | 53 | * "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING | 
|  | 54 | * TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING | 
|  | 55 | * BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION | 
|  | 56 | * HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF | 
|  | 57 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. | 
|  | 58 | */ | 
|  | 59 |  | 
|  | 60 | #ifndef _PUNYCODE_H | 
|  | 61 | #define _PUNYCODE_H | 
|  | 62 |  | 
|  | 63 | #ifdef __cplusplus | 
|  | 64 | extern "C" | 
|  | 65 | { | 
|  | 66 | #endif | 
|  | 67 |  | 
|  | 68 | #include <stddef.h>		/* size_t */ | 
|  | 69 | #include <stdint.h>		/* uint32_t */ | 
|  | 70 |  | 
|  | 71 | enum punycode_status | 
|  | 72 | { | 
|  | 73 | punycode_success = 0, | 
|  | 74 | punycode_bad_input = 1,	/* Input is invalid.                       */ | 
|  | 75 | punycode_big_output = 2,	/* Output would exceed the space provided. */ | 
|  | 76 | punycode_overflow = 3	/* Wider integers needed to process input. */ | 
|  | 77 | }; | 
|  | 78 |  | 
|  | 79 | typedef enum | 
|  | 80 | { | 
|  | 81 | PUNYCODE_SUCCESS = punycode_success, | 
|  | 82 | PUNYCODE_BAD_INPUT = punycode_bad_input, | 
|  | 83 | PUNYCODE_BIG_OUTPUT = punycode_big_output, | 
|  | 84 | PUNYCODE_OVERFLOW = punycode_overflow | 
|  | 85 | } Punycode_status; | 
|  | 86 |  | 
|  | 87 | /* punycode_uint needs to be unsigned and needs to be */ | 
|  | 88 | /* at least 26 bits wide.                             */ | 
|  | 89 |  | 
|  | 90 | typedef uint32_t punycode_uint; | 
|  | 91 |  | 
|  | 92 | extern int punycode_encode (size_t input_length, | 
|  | 93 | const punycode_uint input[], | 
|  | 94 | const unsigned char case_flags[], | 
|  | 95 | size_t * output_length, char output[]); | 
|  | 96 |  | 
|  | 97 | /* | 
|  | 98 | punycode_encode() converts a sequence of code points (presumed to be | 
|  | 99 | Unicode code points) to Punycode. | 
|  | 100 |  | 
|  | 101 | Input arguments (to be supplied by the caller): | 
|  | 102 |  | 
|  | 103 | input_length | 
|  | 104 | The number of code points in the input array and the number | 
|  | 105 | of flags in the case_flags array. | 
|  | 106 |  | 
|  | 107 | input | 
|  | 108 | An array of code points.  They are presumed to be Unicode | 
|  | 109 | code points, but that is not strictly REQUIRED.  The | 
|  | 110 | array contains code points, not code units.  UTF-16 uses | 
|  | 111 | code units D800 through DFFF to refer to code points | 
|  | 112 | 10000..10FFFF.  The code points D800..DFFF do not occur in | 
|  | 113 | any valid Unicode string.  The code points that can occur in | 
|  | 114 | Unicode strings (0..D7FF and E000..10FFFF) are also called | 
|  | 115 | Unicode scalar values. | 
|  | 116 |  | 
|  | 117 | case_flags | 
|  | 118 | A null pointer or an array of boolean values parallel to | 
|  | 119 | the input array.  Nonzero (true, flagged) suggests that the | 
|  | 120 | corresponding Unicode character be forced to uppercase after | 
|  | 121 | being decoded (if possible), and zero (false, unflagged) | 
|  | 122 | suggests that it be forced to lowercase (if possible). | 
|  | 123 | ASCII code points (0..7F) are encoded literally, except that | 
|  | 124 | ASCII letters are forced to uppercase or lowercase according | 
|  | 125 | to the corresponding case flags.  If case_flags is a null | 
|  | 126 | pointer then ASCII letters are left as they are, and other | 
|  | 127 | code points are treated as unflagged. | 
|  | 128 |  | 
|  | 129 | Output arguments (to be filled in by the function): | 
|  | 130 |  | 
|  | 131 | output | 
|  | 132 | An array of ASCII code points.  It is *not* null-terminated; | 
|  | 133 | it will contain zeros if and only if the input contains | 
|  | 134 | zeros.  (Of course the caller can leave room for a | 
|  | 135 | terminator and add one if needed.) | 
|  | 136 |  | 
|  | 137 | Input/output arguments (to be supplied by the caller and overwritten | 
|  | 138 | by the function): | 
|  | 139 |  | 
|  | 140 | output_length | 
|  | 141 | The caller passes in the maximum number of ASCII code points | 
|  | 142 | that it can receive.  On successful return it will contain | 
|  | 143 | the number of ASCII code points actually output. | 
|  | 144 |  | 
|  | 145 | Return value: | 
|  | 146 |  | 
|  | 147 | Can be any of the punycode_status values defined above except | 
|  | 148 | punycode_bad_input.  If not punycode_success, then output_size | 
|  | 149 | and output might contain garbage. | 
|  | 150 | */ | 
|  | 151 |  | 
|  | 152 | extern int punycode_decode (size_t input_length, | 
|  | 153 | const char input[], | 
|  | 154 | size_t * output_length, | 
|  | 155 | punycode_uint output[], | 
|  | 156 | unsigned char case_flags[]); | 
|  | 157 |  | 
|  | 158 | /* | 
|  | 159 | punycode_decode() converts Punycode to a sequence of code points | 
|  | 160 | (presumed to be Unicode code points). | 
|  | 161 |  | 
|  | 162 | Input arguments (to be supplied by the caller): | 
|  | 163 |  | 
|  | 164 | input_length | 
|  | 165 | The number of ASCII code points in the input array. | 
|  | 166 |  | 
|  | 167 | input | 
|  | 168 | An array of ASCII code points (0..7F). | 
|  | 169 |  | 
|  | 170 | Output arguments (to be filled in by the function): | 
|  | 171 |  | 
|  | 172 | output | 
|  | 173 | An array of code points like the input argument of | 
|  | 174 | punycode_encode() (see above). | 
|  | 175 |  | 
|  | 176 | case_flags | 
|  | 177 | A null pointer (if the flags are not needed by the caller) | 
|  | 178 | or an array of boolean values parallel to the output array. | 
|  | 179 | Nonzero (true, flagged) suggests that the corresponding | 
|  | 180 | Unicode character be forced to uppercase by the caller (if | 
|  | 181 | possible), and zero (false, unflagged) suggests that it | 
|  | 182 | be forced to lowercase (if possible).  ASCII code points | 
|  | 183 | (0..7F) are output already in the proper case, but their | 
|  | 184 | flags will be set appropriately so that applying the flags | 
|  | 185 | would be harmless. | 
|  | 186 |  | 
|  | 187 | Input/output arguments (to be supplied by the caller and overwritten | 
|  | 188 | by the function): | 
|  | 189 |  | 
|  | 190 | output_length | 
|  | 191 | The caller passes in the maximum number of code points | 
|  | 192 | that it can receive into the output array (which is also | 
|  | 193 | the maximum number of flags that it can receive into the | 
|  | 194 | case_flags array, if case_flags is not a null pointer).  On | 
|  | 195 | successful return it will contain the number of code points | 
|  | 196 | actually output (which is also the number of flags actually | 
|  | 197 | output, if case_flags is not a null pointer).  The decoder | 
|  | 198 | will never need to output more code points than the number | 
|  | 199 | of ASCII code points in the input, because of the way the | 
|  | 200 | encoding is defined.  The number of code points output | 
|  | 201 | cannot exceed the maximum possible value of a punycode_uint, | 
|  | 202 | even if the supplied output_length is greater than that. | 
|  | 203 |  | 
|  | 204 | Return value: | 
|  | 205 |  | 
|  | 206 | Can be any of the punycode_status values defined above.  If not | 
|  | 207 | punycode_success, then output_length, output, and case_flags | 
|  | 208 | might contain garbage. | 
|  | 209 | */ | 
|  | 210 |  | 
|  | 211 | #ifdef __cplusplus | 
|  | 212 | } | 
|  | 213 | #endif | 
|  | 214 | #endif				/* _PUNYCODE_H */ |