lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame] | 1 | /* punycode.h Declarations for punycode functions. |
| 2 | * Copyright (C) 2002, 2003 Simon Josefsson |
| 3 | * |
| 4 | * This file is part of GNU Libidn. |
| 5 | * |
| 6 | * GNU Libidn is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU Lesser General Public |
| 8 | * License as published by the Free Software Foundation; either |
| 9 | * version 2.1 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | * GNU Libidn is distributed in the hope that it will be useful, |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | * Lesser General Public License for more details. |
| 15 | * |
| 16 | * You should have received a copy of the GNU Lesser General Public |
| 17 | * License along with GNU Libidn; if not, see <http://www.gnu.org/licenses/>. |
| 18 | */ |
| 19 | |
| 20 | /* |
| 21 | * This file is derived from RFC 3492bis written by Adam M. Costello. |
| 22 | * |
| 23 | * Disclaimer and license: Regarding this entire document or any |
| 24 | * portion of it (including the pseudocode and C code), the author |
| 25 | * makes no guarantees and is not responsible for any damage resulting |
| 26 | * from its use. The author grants irrevocable permission to anyone |
| 27 | * to use, modify, and distribute it in any way that does not diminish |
| 28 | * the rights of anyone else to use, modify, and distribute it, |
| 29 | * provided that redistributed derivative works do not contain |
| 30 | * misleading author or version information. Derivative works need |
| 31 | * not be licensed under similar terms. |
| 32 | * |
| 33 | * Copyright (C) The Internet Society (2003). All Rights Reserved. |
| 34 | * |
| 35 | * This document and translations of it may be copied and furnished to |
| 36 | * others, and derivative works that comment on or otherwise explain it |
| 37 | * or assist in its implementation may be prepared, copied, published |
| 38 | * and distributed, in whole or in part, without restriction of any |
| 39 | * kind, provided that the above copyright notice and this paragraph are |
| 40 | * included on all such copies and derivative works. However, this |
| 41 | * document itself may not be modified in any way, such as by removing |
| 42 | * the copyright notice or references to the Internet Society or other |
| 43 | * Internet organizations, except as needed for the purpose of |
| 44 | * developing Internet standards in which case the procedures for |
| 45 | * copyrights defined in the Internet Standards process must be |
| 46 | * followed, or as required to translate it into languages other than |
| 47 | * English. |
| 48 | * |
| 49 | * The limited permissions granted above are perpetual and will not be |
| 50 | * revoked by the Internet Society or its successors or assigns. |
| 51 | * |
| 52 | * This document and the information contained herein is provided on an |
| 53 | * "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING |
| 54 | * TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING |
| 55 | * BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION |
| 56 | * HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF |
| 57 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. |
| 58 | */ |
| 59 | |
| 60 | #ifndef _PUNYCODE_H |
| 61 | #define _PUNYCODE_H |
| 62 | |
| 63 | #ifdef __cplusplus |
| 64 | extern "C" |
| 65 | { |
| 66 | #endif |
| 67 | |
| 68 | #include <stddef.h> /* size_t */ |
| 69 | #include <stdint.h> /* uint32_t */ |
| 70 | |
| 71 | enum punycode_status |
| 72 | { |
| 73 | punycode_success = 0, |
| 74 | punycode_bad_input = 1, /* Input is invalid. */ |
| 75 | punycode_big_output = 2, /* Output would exceed the space provided. */ |
| 76 | punycode_overflow = 3 /* Wider integers needed to process input. */ |
| 77 | }; |
| 78 | |
| 79 | typedef enum |
| 80 | { |
| 81 | PUNYCODE_SUCCESS = punycode_success, |
| 82 | PUNYCODE_BAD_INPUT = punycode_bad_input, |
| 83 | PUNYCODE_BIG_OUTPUT = punycode_big_output, |
| 84 | PUNYCODE_OVERFLOW = punycode_overflow |
| 85 | } Punycode_status; |
| 86 | |
| 87 | /* punycode_uint needs to be unsigned and needs to be */ |
| 88 | /* at least 26 bits wide. */ |
| 89 | |
| 90 | typedef uint32_t punycode_uint; |
| 91 | |
| 92 | extern int punycode_encode (size_t input_length, |
| 93 | const punycode_uint input[], |
| 94 | const unsigned char case_flags[], |
| 95 | size_t * output_length, char output[]); |
| 96 | |
| 97 | /* |
| 98 | punycode_encode() converts a sequence of code points (presumed to be |
| 99 | Unicode code points) to Punycode. |
| 100 | |
| 101 | Input arguments (to be supplied by the caller): |
| 102 | |
| 103 | input_length |
| 104 | The number of code points in the input array and the number |
| 105 | of flags in the case_flags array. |
| 106 | |
| 107 | input |
| 108 | An array of code points. They are presumed to be Unicode |
| 109 | code points, but that is not strictly REQUIRED. The |
| 110 | array contains code points, not code units. UTF-16 uses |
| 111 | code units D800 through DFFF to refer to code points |
| 112 | 10000..10FFFF. The code points D800..DFFF do not occur in |
| 113 | any valid Unicode string. The code points that can occur in |
| 114 | Unicode strings (0..D7FF and E000..10FFFF) are also called |
| 115 | Unicode scalar values. |
| 116 | |
| 117 | case_flags |
| 118 | A null pointer or an array of boolean values parallel to |
| 119 | the input array. Nonzero (true, flagged) suggests that the |
| 120 | corresponding Unicode character be forced to uppercase after |
| 121 | being decoded (if possible), and zero (false, unflagged) |
| 122 | suggests that it be forced to lowercase (if possible). |
| 123 | ASCII code points (0..7F) are encoded literally, except that |
| 124 | ASCII letters are forced to uppercase or lowercase according |
| 125 | to the corresponding case flags. If case_flags is a null |
| 126 | pointer then ASCII letters are left as they are, and other |
| 127 | code points are treated as unflagged. |
| 128 | |
| 129 | Output arguments (to be filled in by the function): |
| 130 | |
| 131 | output |
| 132 | An array of ASCII code points. It is *not* null-terminated; |
| 133 | it will contain zeros if and only if the input contains |
| 134 | zeros. (Of course the caller can leave room for a |
| 135 | terminator and add one if needed.) |
| 136 | |
| 137 | Input/output arguments (to be supplied by the caller and overwritten |
| 138 | by the function): |
| 139 | |
| 140 | output_length |
| 141 | The caller passes in the maximum number of ASCII code points |
| 142 | that it can receive. On successful return it will contain |
| 143 | the number of ASCII code points actually output. |
| 144 | |
| 145 | Return value: |
| 146 | |
| 147 | Can be any of the punycode_status values defined above except |
| 148 | punycode_bad_input. If not punycode_success, then output_size |
| 149 | and output might contain garbage. |
| 150 | */ |
| 151 | |
| 152 | extern int punycode_decode (size_t input_length, |
| 153 | const char input[], |
| 154 | size_t * output_length, |
| 155 | punycode_uint output[], |
| 156 | unsigned char case_flags[]); |
| 157 | |
| 158 | /* |
| 159 | punycode_decode() converts Punycode to a sequence of code points |
| 160 | (presumed to be Unicode code points). |
| 161 | |
| 162 | Input arguments (to be supplied by the caller): |
| 163 | |
| 164 | input_length |
| 165 | The number of ASCII code points in the input array. |
| 166 | |
| 167 | input |
| 168 | An array of ASCII code points (0..7F). |
| 169 | |
| 170 | Output arguments (to be filled in by the function): |
| 171 | |
| 172 | output |
| 173 | An array of code points like the input argument of |
| 174 | punycode_encode() (see above). |
| 175 | |
| 176 | case_flags |
| 177 | A null pointer (if the flags are not needed by the caller) |
| 178 | or an array of boolean values parallel to the output array. |
| 179 | Nonzero (true, flagged) suggests that the corresponding |
| 180 | Unicode character be forced to uppercase by the caller (if |
| 181 | possible), and zero (false, unflagged) suggests that it |
| 182 | be forced to lowercase (if possible). ASCII code points |
| 183 | (0..7F) are output already in the proper case, but their |
| 184 | flags will be set appropriately so that applying the flags |
| 185 | would be harmless. |
| 186 | |
| 187 | Input/output arguments (to be supplied by the caller and overwritten |
| 188 | by the function): |
| 189 | |
| 190 | output_length |
| 191 | The caller passes in the maximum number of code points |
| 192 | that it can receive into the output array (which is also |
| 193 | the maximum number of flags that it can receive into the |
| 194 | case_flags array, if case_flags is not a null pointer). On |
| 195 | successful return it will contain the number of code points |
| 196 | actually output (which is also the number of flags actually |
| 197 | output, if case_flags is not a null pointer). The decoder |
| 198 | will never need to output more code points than the number |
| 199 | of ASCII code points in the input, because of the way the |
| 200 | encoding is defined. The number of code points output |
| 201 | cannot exceed the maximum possible value of a punycode_uint, |
| 202 | even if the supplied output_length is greater than that. |
| 203 | |
| 204 | Return value: |
| 205 | |
| 206 | Can be any of the punycode_status values defined above. If not |
| 207 | punycode_success, then output_length, output, and case_flags |
| 208 | might contain garbage. |
| 209 | */ |
| 210 | |
| 211 | #ifdef __cplusplus |
| 212 | } |
| 213 | #endif |
| 214 | #endif /* _PUNYCODE_H */ |