rjw | 2e8229f | 2022-02-15 21:08:12 +0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 1996, 1998 by Internet Software Consortium. |
| 3 | * |
| 4 | * Permission to use, copy, modify, and distribute this software for any |
| 5 | * purpose with or without fee is hereby granted, provided that the above |
| 6 | * copyright notice and this permission notice appear in all copies. |
| 7 | * |
| 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS |
| 9 | * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES |
| 10 | * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE |
| 11 | * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL |
| 12 | * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR |
| 13 | * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS |
| 14 | * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS |
| 15 | * SOFTWARE. |
| 16 | */ |
| 17 | |
| 18 | /* |
| 19 | * Portions Copyright (c) 1995 by International Business Machines, Inc. |
| 20 | * |
| 21 | * International Business Machines, Inc. (hereinafter called IBM) grants |
| 22 | * permission under its copyrights to use, copy, modify, and distribute this |
| 23 | * Software with or without fee, provided that the above copyright notice and |
| 24 | * all paragraphs of this notice appear in all copies, and that the name of IBM |
| 25 | * not be used in connection with the marketing of any product incorporating |
| 26 | * the Software or modifications thereof, without specific, written prior |
| 27 | * permission. |
| 28 | * |
| 29 | * To the extent it has a right to do so, IBM grants an immunity from suit |
| 30 | * under its patents, if any, for the use, sale or manufacture of products to |
| 31 | * the extent that such products are used for performing Domain Name System |
| 32 | * dynamic updates in TCP/IP networks by means of the Software. No immunity is |
| 33 | * granted for any product per se or for any other function of any product. |
| 34 | * |
| 35 | * THE SOFTWARE IS PROVIDED "AS IS", AND IBM DISCLAIMS ALL WARRANTIES, |
| 36 | * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
| 37 | * PARTICULAR PURPOSE. IN NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL, |
| 38 | * DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER ARISING |
| 39 | * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE, EVEN |
| 40 | * IF IBM IS APPRISED OF THE POSSIBILITY OF SUCH DAMAGES. |
| 41 | */ |
| 42 | |
| 43 | #include <sys/types.h> |
| 44 | #include <sys/param.h> |
| 45 | #include <sys/socket.h> |
| 46 | |
| 47 | #include <netinet/in.h> |
| 48 | #include <arpa/inet.h> |
| 49 | |
| 50 | #include <ctype.h> |
| 51 | #include <stdio.h> |
| 52 | #include <stdlib.h> |
| 53 | #include <string.h> |
| 54 | |
| 55 | #define Assert(Cond) if (!(Cond)) abort() |
| 56 | |
| 57 | static const char Base64[] = |
| 58 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; |
| 59 | static const char Pad64 = '='; |
| 60 | |
| 61 | /* (From RFC1521 and draft-ietf-dnssec-secext-03.txt) |
| 62 | The following encoding technique is taken from RFC 1521 by Borenstein |
| 63 | and Freed. It is reproduced here in a slightly edited form for |
| 64 | convenience. |
| 65 | |
| 66 | A 65-character subset of US-ASCII is used, enabling 6 bits to be |
| 67 | represented per printable character. (The extra 65th character, "=", |
| 68 | is used to signify a special processing function.) |
| 69 | |
| 70 | The encoding process represents 24-bit groups of input bits as output |
| 71 | strings of 4 encoded characters. Proceeding from left to right, a |
| 72 | 24-bit input group is formed by concatenating 3 8-bit input groups. |
| 73 | These 24 bits are then treated as 4 concatenated 6-bit groups, each |
| 74 | of which is translated into a single digit in the base64 alphabet. |
| 75 | |
| 76 | Each 6-bit group is used as an index into an array of 64 printable |
| 77 | characters. The character referenced by the index is placed in the |
| 78 | output string. |
| 79 | |
| 80 | Table 1: The Base64 Alphabet |
| 81 | |
| 82 | Value Encoding Value Encoding Value Encoding Value Encoding |
| 83 | 0 A 17 R 34 i 51 z |
| 84 | 1 B 18 S 35 j 52 0 |
| 85 | 2 C 19 T 36 k 53 1 |
| 86 | 3 D 20 U 37 l 54 2 |
| 87 | 4 E 21 V 38 m 55 3 |
| 88 | 5 F 22 W 39 n 56 4 |
| 89 | 6 G 23 X 40 o 57 5 |
| 90 | 7 H 24 Y 41 p 58 6 |
| 91 | 8 I 25 Z 42 q 59 7 |
| 92 | 9 J 26 a 43 r 60 8 |
| 93 | 10 K 27 b 44 s 61 9 |
| 94 | 11 L 28 c 45 t 62 + |
| 95 | 12 M 29 d 46 u 63 / |
| 96 | 13 N 30 e 47 v |
| 97 | 14 O 31 f 48 w (pad) = |
| 98 | 15 P 32 g 49 x |
| 99 | 16 Q 33 h 50 y |
| 100 | |
| 101 | Special processing is performed if fewer than 24 bits are available |
| 102 | at the end of the data being encoded. A full encoding quantum is |
| 103 | always completed at the end of a quantity. When fewer than 24 input |
| 104 | bits are available in an input group, zero bits are added (on the |
| 105 | right) to form an integral number of 6-bit groups. Padding at the |
| 106 | end of the data is performed using the '=' character. |
| 107 | |
| 108 | Since all base64 input is an integral number of octets, only the |
| 109 | following cases can arise: |
| 110 | |
| 111 | (1) the final quantum of encoding input is an integral |
| 112 | multiple of 24 bits; here, the final unit of encoded |
| 113 | output will be an integral multiple of 4 characters |
| 114 | with no "=" padding, |
| 115 | (2) the final quantum of encoding input is exactly 8 bits; |
| 116 | here, the final unit of encoded output will be two |
| 117 | characters followed by two "=" padding characters, or |
| 118 | (3) the final quantum of encoding input is exactly 16 bits; |
| 119 | here, the final unit of encoded output will be three |
| 120 | characters followed by one "=" padding character. |
| 121 | */ |
| 122 | |
| 123 | /* skips all whitespace anywhere. |
| 124 | converts characters, four at a time, starting at (or after) |
| 125 | src from base - 64 numbers into three 8 bit bytes in the target area. |
| 126 | it returns the number of data bytes stored at the target, or -1 on error. |
| 127 | */ |
| 128 | |
| 129 | static int b64rmap_initialized = 0; |
| 130 | static uint8_t b64rmap[256]; |
| 131 | |
| 132 | static const uint8_t b64rmap_special = 0xf0; |
| 133 | static const uint8_t b64rmap_end = 0xfd; |
| 134 | static const uint8_t b64rmap_space = 0xfe; |
| 135 | static const uint8_t b64rmap_invalid = 0xff; |
| 136 | |
| 137 | /** |
| 138 | * Initializing the reverse map is not thread safe. |
| 139 | * Which is fine for NSD. For now... |
| 140 | **/ |
| 141 | static void |
| 142 | b64_initialize_rmap () |
| 143 | { |
| 144 | int i; |
| 145 | char ch; |
| 146 | |
| 147 | /* Null: end of string, stop parsing */ |
| 148 | b64rmap[0] = b64rmap_end; |
| 149 | |
| 150 | for (i = 1; i < 256; ++i) { |
| 151 | ch = (char)i; |
| 152 | /* Whitespaces */ |
| 153 | if (isspace(ch)) |
| 154 | b64rmap[i] = b64rmap_space; |
| 155 | /* Padding: stop parsing */ |
| 156 | else if (ch == Pad64) |
| 157 | b64rmap[i] = b64rmap_end; |
| 158 | /* Non-base64 char */ |
| 159 | else |
| 160 | b64rmap[i] = b64rmap_invalid; |
| 161 | } |
| 162 | |
| 163 | /* Fill reverse mapping for base64 chars */ |
| 164 | for (i = 0; Base64[i] != '\0'; ++i) |
| 165 | b64rmap[(uint8_t)Base64[i]] = i; |
| 166 | |
| 167 | b64rmap_initialized = 1; |
| 168 | } |
| 169 | |
| 170 | static int |
| 171 | b64_pton_do(char const *src, uint8_t *target, size_t targsize) |
| 172 | { |
| 173 | int tarindex, state, ch; |
| 174 | uint8_t ofs; |
| 175 | |
| 176 | state = 0; |
| 177 | tarindex = 0; |
| 178 | |
| 179 | while (1) |
| 180 | { |
| 181 | ch = *src++; |
| 182 | ofs = b64rmap[ch]; |
| 183 | |
| 184 | if (ofs >= b64rmap_special) { |
| 185 | /* Ignore whitespaces */ |
| 186 | if (ofs == b64rmap_space) |
| 187 | continue; |
| 188 | /* End of base64 characters */ |
| 189 | if (ofs == b64rmap_end) |
| 190 | break; |
| 191 | /* A non-base64 character. */ |
| 192 | return (-1); |
| 193 | } |
| 194 | |
| 195 | switch (state) { |
| 196 | case 0: |
| 197 | if ((size_t)tarindex >= targsize) |
| 198 | return (-1); |
| 199 | target[tarindex] = ofs << 2; |
| 200 | state = 1; |
| 201 | break; |
| 202 | case 1: |
| 203 | if ((size_t)tarindex + 1 >= targsize) |
| 204 | return (-1); |
| 205 | target[tarindex] |= ofs >> 4; |
| 206 | target[tarindex+1] = (ofs & 0x0f) |
| 207 | << 4 ; |
| 208 | tarindex++; |
| 209 | state = 2; |
| 210 | break; |
| 211 | case 2: |
| 212 | if ((size_t)tarindex + 1 >= targsize) |
| 213 | return (-1); |
| 214 | target[tarindex] |= ofs >> 2; |
| 215 | target[tarindex+1] = (ofs & 0x03) |
| 216 | << 6; |
| 217 | tarindex++; |
| 218 | state = 3; |
| 219 | break; |
| 220 | case 3: |
| 221 | if ((size_t)tarindex >= targsize) |
| 222 | return (-1); |
| 223 | target[tarindex] |= ofs; |
| 224 | tarindex++; |
| 225 | state = 0; |
| 226 | break; |
| 227 | default: |
| 228 | abort(); |
| 229 | } |
| 230 | } |
| 231 | |
| 232 | /* |
| 233 | * We are done decoding Base-64 chars. Let's see if we ended |
| 234 | * on a byte boundary, and/or with erroneous trailing characters. |
| 235 | */ |
| 236 | |
| 237 | if (ch == Pad64) { /* We got a pad char. */ |
| 238 | ch = *src++; /* Skip it, get next. */ |
| 239 | switch (state) { |
| 240 | case 0: /* Invalid = in first position */ |
| 241 | case 1: /* Invalid = in second position */ |
| 242 | return (-1); |
| 243 | |
| 244 | case 2: /* Valid, means one byte of info */ |
| 245 | /* Skip any number of spaces. */ |
| 246 | for ((void)NULL; ch != '\0'; ch = *src++) |
| 247 | if (b64rmap[ch] != b64rmap_space) |
| 248 | break; |
| 249 | /* Make sure there is another trailing = sign. */ |
| 250 | if (ch != Pad64) |
| 251 | return (-1); |
| 252 | ch = *src++; /* Skip the = */ |
| 253 | /* Fall through to "single trailing =" case. */ |
| 254 | /* FALLTHROUGH */ |
| 255 | |
| 256 | case 3: /* Valid, means two bytes of info */ |
| 257 | /* |
| 258 | * We know this char is an =. Is there anything but |
| 259 | * whitespace after it? |
| 260 | */ |
| 261 | for ((void)NULL; ch != '\0'; ch = *src++) |
| 262 | if (b64rmap[ch] != b64rmap_space) |
| 263 | return (-1); |
| 264 | |
| 265 | /* |
| 266 | * Now make sure for cases 2 and 3 that the "extra" |
| 267 | * bits that slopped past the last full byte were |
| 268 | * zeros. If we don't check them, they become a |
| 269 | * subliminal channel. |
| 270 | */ |
| 271 | if (target[tarindex] != 0) |
| 272 | return (-1); |
| 273 | } |
| 274 | } else { |
| 275 | /* |
| 276 | * We ended by seeing the end of the string. Make sure we |
| 277 | * have no partial bytes lying around. |
| 278 | */ |
| 279 | if (state != 0) |
| 280 | return (-1); |
| 281 | } |
| 282 | |
| 283 | return (tarindex); |
| 284 | } |
| 285 | |
| 286 | |
| 287 | static int |
| 288 | b64_pton_len(char const *src) |
| 289 | { |
| 290 | int tarindex, state, ch; |
| 291 | uint8_t ofs; |
| 292 | |
| 293 | state = 0; |
| 294 | tarindex = 0; |
| 295 | |
| 296 | while (1) |
| 297 | { |
| 298 | ch = *src++; |
| 299 | ofs = b64rmap[ch]; |
| 300 | |
| 301 | if (ofs >= b64rmap_special) { |
| 302 | /* Ignore whitespaces */ |
| 303 | if (ofs == b64rmap_space) |
| 304 | continue; |
| 305 | /* End of base64 characters */ |
| 306 | if (ofs == b64rmap_end) |
| 307 | break; |
| 308 | /* A non-base64 character. */ |
| 309 | return (-1); |
| 310 | } |
| 311 | |
| 312 | switch (state) { |
| 313 | case 0: |
| 314 | state = 1; |
| 315 | break; |
| 316 | case 1: |
| 317 | tarindex++; |
| 318 | state = 2; |
| 319 | break; |
| 320 | case 2: |
| 321 | tarindex++; |
| 322 | state = 3; |
| 323 | break; |
| 324 | case 3: |
| 325 | tarindex++; |
| 326 | state = 0; |
| 327 | break; |
| 328 | default: |
| 329 | abort(); |
| 330 | } |
| 331 | } |
| 332 | |
| 333 | /* |
| 334 | * We are done decoding Base-64 chars. Let's see if we ended |
| 335 | * on a byte boundary, and/or with erroneous trailing characters. |
| 336 | */ |
| 337 | |
| 338 | if (ch == Pad64) { /* We got a pad char. */ |
| 339 | ch = *src++; /* Skip it, get next. */ |
| 340 | switch (state) { |
| 341 | case 0: /* Invalid = in first position */ |
| 342 | case 1: /* Invalid = in second position */ |
| 343 | return (-1); |
| 344 | |
| 345 | case 2: /* Valid, means one byte of info */ |
| 346 | /* Skip any number of spaces. */ |
| 347 | for ((void)NULL; ch != '\0'; ch = *src++) |
| 348 | if (b64rmap[ch] != b64rmap_space) |
| 349 | break; |
| 350 | /* Make sure there is another trailing = sign. */ |
| 351 | if (ch != Pad64) |
| 352 | return (-1); |
| 353 | ch = *src++; /* Skip the = */ |
| 354 | /* Fall through to "single trailing =" case. */ |
| 355 | /* FALLTHROUGH */ |
| 356 | |
| 357 | case 3: /* Valid, means two bytes of info */ |
| 358 | /* |
| 359 | * We know this char is an =. Is there anything but |
| 360 | * whitespace after it? |
| 361 | */ |
| 362 | for ((void)NULL; ch != '\0'; ch = *src++) |
| 363 | if (b64rmap[ch] != b64rmap_space) |
| 364 | return (-1); |
| 365 | |
| 366 | } |
| 367 | } else { |
| 368 | /* |
| 369 | * We ended by seeing the end of the string. Make sure we |
| 370 | * have no partial bytes lying around. |
| 371 | */ |
| 372 | if (state != 0) |
| 373 | return (-1); |
| 374 | } |
| 375 | |
| 376 | return (tarindex); |
| 377 | } |
| 378 | |
| 379 | |
| 380 | int |
| 381 | b64_pton(char const *src, uint8_t *target, size_t targsize) |
| 382 | { |
| 383 | if (!b64rmap_initialized) |
| 384 | b64_initialize_rmap (); |
| 385 | |
| 386 | if (target) |
| 387 | return b64_pton_do (src, target, targsize); |
| 388 | else |
| 389 | return b64_pton_len (src); |
| 390 | } |