| xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame] | 1 | /* Access functions for GB2312 conversion. | 
 | 2 |    Copyright (C) 1998-2016 Free Software Foundation, Inc. | 
 | 3 |    This file is part of the GNU C Library. | 
 | 4 |    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. | 
 | 5 |  | 
 | 6 |    The GNU C Library is free software; you can redistribute it and/or | 
 | 7 |    modify it under the terms of the GNU Lesser General Public | 
 | 8 |    License as published by the Free Software Foundation; either | 
 | 9 |    version 2.1 of the License, or (at your option) any later version. | 
 | 10 |  | 
 | 11 |    The GNU C Library is distributed in the hope that it will be useful, | 
 | 12 |    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 | 13 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
 | 14 |    Lesser General Public License for more details. | 
 | 15 |  | 
 | 16 |    You should have received a copy of the GNU Lesser General Public | 
 | 17 |    License along with the GNU C Library; if not, see | 
 | 18 |    <http://www.gnu.org/licenses/>.  */ | 
 | 19 |  | 
 | 20 | #ifndef _GB2312_H | 
 | 21 | #define _GB2312_H	1 | 
 | 22 |  | 
 | 23 | #include <gconv.h> | 
 | 24 | #include <stdint.h> | 
 | 25 | #include <assert.h> | 
 | 26 |  | 
 | 27 | /* Conversion table.  */ | 
 | 28 | extern const uint16_t __gb2312_to_ucs[]; | 
 | 29 |  | 
 | 30 |  | 
 | 31 | static inline uint32_t | 
 | 32 | __attribute ((always_inline)) | 
 | 33 | gb2312_to_ucs4 (const unsigned char **s, size_t avail, unsigned char offset) | 
 | 34 | { | 
 | 35 |   unsigned char ch = *(*s); | 
 | 36 |   unsigned char ch2; | 
 | 37 |   int idx; | 
 | 38 |  | 
 | 39 |   if (ch < offset || (ch - offset) <= 0x20 || (ch - offset) > 0x77) | 
 | 40 |     return __UNKNOWN_10646_CHAR; | 
 | 41 |  | 
 | 42 |   if (avail < 2) | 
 | 43 |     return 0; | 
 | 44 |  | 
 | 45 |   ch2 = (*s)[1]; | 
 | 46 |   if ((ch2 - offset) <= 0x20 || (ch2 - offset) >= 0x7f) | 
 | 47 |     return __UNKNOWN_10646_CHAR; | 
 | 48 |  | 
 | 49 |   idx = (ch - 0x21 - offset) * 94 + (ch2 - 0x21 - offset); | 
 | 50 |   if (idx > 0x1ff1) | 
 | 51 |     return __UNKNOWN_10646_CHAR; | 
 | 52 |  | 
 | 53 |   (*s) += 2; | 
 | 54 |  | 
 | 55 |   return __gb2312_to_ucs[idx] ?: ((*s) -= 2, __UNKNOWN_10646_CHAR); | 
 | 56 | } | 
 | 57 |  | 
 | 58 |  | 
 | 59 | extern const char __gb2312_from_ucs4_tab1[][2]; | 
 | 60 | extern const char __gb2312_from_ucs4_tab2[][2]; | 
 | 61 | extern const char __gb2312_from_ucs4_tab3[][2]; | 
 | 62 | extern const char __gb2312_from_ucs4_tab4[][2]; | 
 | 63 | extern const char __gb2312_from_ucs4_tab5[][2]; | 
 | 64 | extern const char __gb2312_from_ucs4_tab6[][2]; | 
 | 65 | extern const char __gb2312_from_ucs4_tab7[][2]; | 
 | 66 | extern const char __gb2312_from_ucs4_tab8[][2]; | 
 | 67 | extern const char __gb2312_from_ucs4_tab9[][2]; | 
 | 68 |  | 
 | 69 | static inline size_t | 
 | 70 | __attribute ((always_inline)) | 
 | 71 | ucs4_to_gb2312 (uint32_t wch, unsigned char *s, size_t avail) | 
 | 72 | { | 
 | 73 |   unsigned int ch = (unsigned int) wch; | 
 | 74 |   char buf[2]; | 
 | 75 |   const char *cp = buf; | 
 | 76 |  | 
 | 77 |   switch (ch) | 
 | 78 |     { | 
 | 79 |     case 0xa4 ... 0x101: | 
 | 80 |       cp = __gb2312_from_ucs4_tab1[ch - 0xa4]; | 
 | 81 |       break; | 
 | 82 |     case 0x113: | 
 | 83 |       cp = "\x28\x25"; | 
 | 84 |       break; | 
 | 85 |     case 0x11b: | 
 | 86 |       cp = "\x28\x27"; | 
 | 87 |       break; | 
 | 88 |     case 0x12b: | 
 | 89 |       cp = "\x28\x29"; | 
 | 90 |       break; | 
 | 91 |     case 0x14d: | 
 | 92 |       cp = "\x28\x2d"; | 
 | 93 |       break; | 
 | 94 |     case 0x16b: | 
 | 95 |       cp = "\x28\x31"; | 
 | 96 |       break; | 
 | 97 |     case 0x1ce: | 
 | 98 |       cp = "\x28\x23"; | 
 | 99 |       break; | 
 | 100 |     case 0x1d0: | 
 | 101 |       cp = "\x28\x2b"; | 
 | 102 |       break; | 
 | 103 |     case 0x1d2: | 
 | 104 |       cp = "\x28\x2f"; | 
 | 105 |       break; | 
 | 106 |     case 0x1d4: | 
 | 107 |       cp = "\x28\x33"; | 
 | 108 |       break; | 
 | 109 |     case 0x1d6: | 
 | 110 |       cp = "\x28\x35"; | 
 | 111 |       break; | 
 | 112 |     case 0x1d8: | 
 | 113 |       cp = "\x28\x36"; | 
 | 114 |       break; | 
 | 115 |     case 0x1da: | 
 | 116 |       cp = "\x28\x37"; | 
 | 117 |       break; | 
 | 118 |     case 0x1dc: | 
 | 119 |       cp = "\x28\x38"; | 
 | 120 |       break; | 
 | 121 |     case 0x2c7: | 
 | 122 |       cp = "\x21\x26"; | 
 | 123 |       break; | 
 | 124 |     case 0x2c9: | 
 | 125 |       cp = "\x21\x25"; | 
 | 126 |       break; | 
 | 127 |     case 0x391 ... 0x3c9: | 
 | 128 |       cp = __gb2312_from_ucs4_tab2[ch - 0x391]; | 
 | 129 |       break; | 
 | 130 |     case 0x401 ... 0x451: | 
 | 131 |       cp = __gb2312_from_ucs4_tab3[ch - 0x401]; | 
 | 132 |       break; | 
 | 133 |     case 0x2015 ... 0x203b: | 
 | 134 |       cp = __gb2312_from_ucs4_tab4[ch - 0x2015]; | 
 | 135 |       break; | 
 | 136 |     case 0x2103 ... 0x22a5: | 
 | 137 |       cp = __gb2312_from_ucs4_tab5[ch - 0x2103]; | 
 | 138 |       break; | 
 | 139 |     case 0x2312: | 
 | 140 |       cp = "\x21\x50"; | 
 | 141 |       break; | 
 | 142 |     case 0x2460 ... 0x249b: | 
 | 143 |       cp = __gb2312_from_ucs4_tab6[ch - 0x2460]; | 
 | 144 |       break; | 
 | 145 |     case 0x2500 ... 0x254b: | 
 | 146 |       buf[0] = '\x29'; | 
 | 147 |       buf[1] = '\x24' + (ch % 256); | 
 | 148 |       break; | 
 | 149 |     case 0x25a0: | 
 | 150 |       cp = "\x21\x76"; | 
 | 151 |       break; | 
 | 152 |     case 0x25a1: | 
 | 153 |       cp = "\x21\x75"; | 
 | 154 |       break; | 
 | 155 |     case 0x25b2: | 
 | 156 |       cp = "\x21\x78"; | 
 | 157 |       break; | 
 | 158 |     case 0x25b3: | 
 | 159 |       cp = "\x21\x77"; | 
 | 160 |       break; | 
 | 161 |     case 0x25c6: | 
 | 162 |       cp = "\x21\x74"; | 
 | 163 |       break; | 
 | 164 |     case 0x25c7: | 
 | 165 |       cp = "\x21\x73"; | 
 | 166 |       break; | 
 | 167 |     case 0x25cb: | 
 | 168 |       cp = "\x21\x70"; | 
 | 169 |       break; | 
 | 170 |     case 0x25ce: | 
 | 171 |       cp = "\x21\x72"; | 
 | 172 |       break; | 
 | 173 |     case 0x25cf: | 
 | 174 |       cp = "\x21\x71"; | 
 | 175 |       break; | 
 | 176 |     case 0x2605: | 
 | 177 |       cp = "\x21\x6f"; | 
 | 178 |       break; | 
 | 179 |     case 0x2606: | 
 | 180 |       cp = "\x21\x6e"; | 
 | 181 |       break; | 
 | 182 |     case 0x2640: | 
 | 183 |       cp = "\x21\x62"; | 
 | 184 |       break; | 
 | 185 |     case 0x2642: | 
 | 186 |       cp = "\x21\x61"; | 
 | 187 |       break; | 
 | 188 |     case 0x3000 ... 0x3129: | 
 | 189 |       cp = __gb2312_from_ucs4_tab7[ch - 0x3000]; | 
 | 190 |       break; | 
 | 191 |     case 0x3220 ... 0x3229: | 
 | 192 |       buf[0] = '\x22'; | 
 | 193 |       buf[1] = '\x65' + (ch - 0x3220); | 
 | 194 |       break; | 
 | 195 |     case 0x4e00 ... 0x9fa0: | 
 | 196 |       cp = __gb2312_from_ucs4_tab8[ch - 0x4e00]; | 
 | 197 |       break; | 
 | 198 |     case 0xff01 ... 0xff5e: | 
 | 199 |       cp = __gb2312_from_ucs4_tab9[ch - 0xff01]; | 
 | 200 |       break; | 
 | 201 |     case 0xffe0: | 
 | 202 |       cp = "\x21\x69"; | 
 | 203 |       break; | 
 | 204 |     case 0xffe1: | 
 | 205 |       cp = "\x21\x6a"; | 
 | 206 |       break; | 
 | 207 |     case 0xffe3: | 
 | 208 |       cp = "\x23\x7e"; | 
 | 209 |       break; | 
 | 210 |     case 0xffe5: | 
 | 211 |       cp = "\x23\x24"; | 
 | 212 |       break; | 
 | 213 |     default: | 
 | 214 |       return __UNKNOWN_10646_CHAR; | 
 | 215 |     } | 
 | 216 |  | 
 | 217 |   if (cp[0] == '\0') | 
 | 218 |     return __UNKNOWN_10646_CHAR; | 
 | 219 |  | 
 | 220 |   assert (cp[1] != '\0'); | 
 | 221 |  | 
 | 222 |   if (avail < 2) | 
 | 223 |     return 0; | 
 | 224 |  | 
 | 225 |   s[0] = cp[0]; | 
 | 226 |   s[1] = cp[1]; | 
 | 227 |  | 
 | 228 |   return 2; | 
 | 229 | } | 
 | 230 |  | 
 | 231 | #endif	/* gb2312.h */ |