| xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame] | 1 | /* Conversion module for ISO-2022-CN-EXT. | 
|  | 2 | Copyright (C) 2000-2016 Free Software Foundation, Inc. | 
|  | 3 | This file is part of the GNU C Library. | 
|  | 4 | Contributed by Ulrich Drepper <drepper@cygnus.com>, 2000. | 
|  | 5 |  | 
|  | 6 | The GNU C Library is free software; you can redistribute it and/or | 
|  | 7 | modify it under the terms of the GNU Lesser General Public | 
|  | 8 | License as published by the Free Software Foundation; either | 
|  | 9 | version 2.1 of the License, or (at your option) any later version. | 
|  | 10 |  | 
|  | 11 | The GNU C Library is distributed in the hope that it will be useful, | 
|  | 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|  | 14 | Lesser General Public License for more details. | 
|  | 15 |  | 
|  | 16 | You should have received a copy of the GNU Lesser General Public | 
|  | 17 | License along with the GNU C Library; if not, see | 
|  | 18 | <http://www.gnu.org/licenses/>.  */ | 
|  | 19 |  | 
|  | 20 | #include <dlfcn.h> | 
|  | 21 | #include <gconv.h> | 
|  | 22 | #include <stdint.h> | 
|  | 23 | #include <stdlib.h> | 
|  | 24 | #include <string.h> | 
|  | 25 | #include "gb2312.h" | 
|  | 26 | #include "iso-ir-165.h" | 
|  | 27 | #include "cns11643.h" | 
|  | 28 | #include "cns11643l1.h" | 
|  | 29 | #include "cns11643l2.h" | 
|  | 30 |  | 
|  | 31 | #include <assert.h> | 
|  | 32 |  | 
|  | 33 | /* This makes obvious what everybody knows: 0x1b is the Esc character.  */ | 
|  | 34 | #define ESC	0x1b | 
|  | 35 |  | 
|  | 36 | /* We have single-byte shift-in and shift-out sequences, and the single | 
|  | 37 | shift sequences SS2 and SS3 which replaces the SS2/SS3 designation for | 
|  | 38 | the next two bytes.  */ | 
|  | 39 | #define SI	0x0f | 
|  | 40 | #define SO	0x0e | 
|  | 41 | #define SS2_0	ESC | 
|  | 42 | #define SS2_1	0x4e | 
|  | 43 | #define SS3_0	ESC | 
|  | 44 | #define SS3_1	0x4f | 
|  | 45 |  | 
|  | 46 | /* Definitions used in the body of the `gconv' function.  */ | 
|  | 47 | #define CHARSET_NAME		"ISO-2022-CN-EXT//" | 
|  | 48 | #define DEFINE_INIT		1 | 
|  | 49 | #define DEFINE_FINI		1 | 
|  | 50 | #define ONE_DIRECTION		0 | 
|  | 51 | #define FROM_LOOP		from_iso2022cn_ext_loop | 
|  | 52 | #define TO_LOOP			to_iso2022cn_ext_loop | 
|  | 53 | #define FROM_LOOP_MIN_NEEDED_FROM	1 | 
|  | 54 | #define FROM_LOOP_MAX_NEEDED_FROM	4 | 
|  | 55 | #define FROM_LOOP_MIN_NEEDED_TO		4 | 
|  | 56 | #define FROM_LOOP_MAX_NEEDED_TO		4 | 
|  | 57 | #define TO_LOOP_MIN_NEEDED_FROM		4 | 
|  | 58 | #define TO_LOOP_MAX_NEEDED_FROM		4 | 
|  | 59 | #define TO_LOOP_MIN_NEEDED_TO		1 | 
|  | 60 | #define TO_LOOP_MAX_NEEDED_TO		6 | 
|  | 61 | #define PREPARE_LOOP \ | 
|  | 62 | int save_set;								      \ | 
|  | 63 | int *setp = &data->__statep->__count; | 
|  | 64 | #define EXTRA_LOOP_ARGS		, setp | 
|  | 65 |  | 
|  | 66 |  | 
|  | 67 | /* The charsets GB/T 12345-90, GB 7589-87, GB/T 13131-9X, GB 7590-87, | 
|  | 68 | and GB/T 13132-9X are not registered to the best of my knowledge and | 
|  | 69 | therefore have no escape sequence assigned.  We cannot handle them | 
|  | 70 | for this reason.  Tell the implementation about this.  */ | 
|  | 71 | #define X12345	'\0' | 
|  | 72 | #define X7589	'\0' | 
|  | 73 | #define X13131	'\0' | 
|  | 74 | #define X7590	'\0' | 
|  | 75 | #define X13132	'\0' | 
|  | 76 |  | 
|  | 77 |  | 
|  | 78 | /* The COUNT element of the state keeps track of the currently selected | 
|  | 79 | character set.  The possible values are:  */ | 
|  | 80 | enum | 
|  | 81 | { | 
|  | 82 | ASCII_set = 0, | 
|  | 83 | GB2312_set, | 
|  | 84 | GB12345_set, | 
|  | 85 | CNS11643_1_set, | 
|  | 86 | ISO_IR_165_set, | 
|  | 87 | SO_mask = 7, | 
|  | 88 |  | 
|  | 89 | GB7589_set = 1 << 3, | 
|  | 90 | GB13131_set = 2 << 3, | 
|  | 91 | CNS11643_2_set = 3 << 3, | 
|  | 92 | SS2_mask = 3 << 3, | 
|  | 93 |  | 
|  | 94 | GB7590_set = 1 << 5, | 
|  | 95 | GB13132_set = 2 << 5, | 
|  | 96 | CNS11643_3_set = 3 << 5, | 
|  | 97 | CNS11643_4_set = 4 << 5, | 
|  | 98 | CNS11643_5_set = 5 << 5, | 
|  | 99 | CNS11643_6_set = 6 << 5, | 
|  | 100 | CNS11643_7_set = 7 << 5, | 
|  | 101 | SS3_mask = 7 << 5, | 
|  | 102 |  | 
|  | 103 | #define CURRENT_MASK (SO_mask | SS2_mask | SS3_mask) | 
|  | 104 |  | 
|  | 105 | GB2312_ann = 1 << 8, | 
|  | 106 | GB12345_ann = 2 << 8, | 
|  | 107 | CNS11643_1_ann = 3 << 8, | 
|  | 108 | ISO_IR_165_ann = 4 << 8, | 
|  | 109 | SO_ann = 7 << 8, | 
|  | 110 |  | 
|  | 111 | GB7589_ann = 1 << 11, | 
|  | 112 | GB13131_ann = 2 << 11, | 
|  | 113 | CNS11643_2_ann = 3 << 11, | 
|  | 114 | SS2_ann = 3 << 11, | 
|  | 115 |  | 
|  | 116 | GB7590_ann = 1 << 13, | 
|  | 117 | GB13132_ann = 2 << 13, | 
|  | 118 | CNS11643_3_ann = 3 << 13, | 
|  | 119 | CNS11643_4_ann = 4 << 13, | 
|  | 120 | CNS11643_5_ann = 5 << 13, | 
|  | 121 | CNS11643_6_ann = 6 << 13, | 
|  | 122 | CNS11643_7_ann = 7 << 13, | 
|  | 123 | SS3_ann = 7 << 13 | 
|  | 124 | }; | 
|  | 125 |  | 
|  | 126 |  | 
|  | 127 | /* Since this is a stateful encoding we have to provide code which resets | 
|  | 128 | the output state to the initial state.  This has to be done during the | 
|  | 129 | flushing.  */ | 
|  | 130 | #define EMIT_SHIFT_TO_INIT \ | 
|  | 131 | if (data->__statep->__count >> 3 != ASCII_set)			      \ | 
|  | 132 | {									      \ | 
|  | 133 | if (FROM_DIRECTION)						      \ | 
|  | 134 | /* It's easy, we don't have to emit anything, we just reset the	      \ | 
|  | 135 | state for the input.  */					      \ | 
|  | 136 | data->__statep->__count = ASCII_set << 3;			      \ | 
|  | 137 | else								      \ | 
|  | 138 | {								      \ | 
|  | 139 | /* We are not in the initial state.  To switch back we have	      \ | 
|  | 140 | to emit `SI'.  */						      \ | 
|  | 141 | if (__glibc_unlikely (outbuf == outend))			      \ | 
|  | 142 | /* We don't have enough room in the output buffer.  */	      \ | 
|  | 143 | status = __GCONV_FULL_OUTPUT;				      \ | 
|  | 144 | else								      \ | 
|  | 145 | {								      \ | 
|  | 146 | /* Write out the shift sequence.  */			      \ | 
|  | 147 | *outbuf++ = SI;						      \ | 
|  | 148 | if (data->__flags & __GCONV_IS_LAST)			      \ | 
|  | 149 | *irreversible += 1;					      \ | 
|  | 150 | data->__statep->__count = ASCII_set << 3;			      \ | 
|  | 151 | }								      \ | 
|  | 152 | }								      \ | 
|  | 153 | } | 
|  | 154 |  | 
|  | 155 |  | 
|  | 156 | /* Since we might have to reset input pointer we must be able to save | 
|  | 157 | and retore the state.  */ | 
|  | 158 | #define SAVE_RESET_STATE(Save) \ | 
|  | 159 | if (Save)								      \ | 
|  | 160 | save_set = *setp;							      \ | 
|  | 161 | else									      \ | 
|  | 162 | *setp = save_set | 
|  | 163 |  | 
|  | 164 |  | 
|  | 165 | /* First define the conversion function from ISO-2022-CN to UCS4.  */ | 
|  | 166 | #define MIN_NEEDED_INPUT	FROM_LOOP_MIN_NEEDED_FROM | 
|  | 167 | #define MAX_NEEDED_INPUT	FROM_LOOP_MAX_NEEDED_FROM | 
|  | 168 | #define MIN_NEEDED_OUTPUT	FROM_LOOP_MIN_NEEDED_TO | 
|  | 169 | #define MAX_NEEDED_OUTPUT	FROM_LOOP_MAX_NEEDED_TO | 
|  | 170 | #define LOOPFCT			FROM_LOOP | 
|  | 171 | #define BODY \ | 
|  | 172 | {									      \ | 
|  | 173 | uint32_t ch = *inptr;						      \ | 
|  | 174 | \ | 
|  | 175 | /* This is a 7bit character set, disallow all 8bit characters.  */	      \ | 
|  | 176 | if (ch > 0x7f)							      \ | 
|  | 177 | STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \ | 
|  | 178 | \ | 
|  | 179 | /* Recognize escape sequences.  */					      \ | 
|  | 180 | if (ch == ESC)							      \ | 
|  | 181 | {									      \ | 
|  | 182 | /* There are three kinds of escape sequences we have to handle:	      \ | 
|  | 183 | - those announcing the use of GB and CNS characters on the	      \ | 
|  | 184 | line; we can simply ignore them				      \ | 
|  | 185 | - the initial byte of the SS2 sequence.			      \ | 
|  | 186 | - the initial byte of the SS3 sequence.			      \ | 
|  | 187 | */								      \ | 
|  | 188 | if (inptr + 2 > inend						      \ | 
|  | 189 | || (inptr[1] == '$'						      \ | 
|  | 190 | && (inptr + 3 > inend					      \ | 
|  | 191 | || (inptr[2] == ')' && inptr + 4 > inend)		      \ | 
|  | 192 | || (inptr[2] == '*' && inptr + 4 > inend)		      \ | 
|  | 193 | || (inptr[2] == '+' && inptr + 4 > inend)))		      \ | 
|  | 194 | || (inptr[1] == SS2_1 && inptr + 4 > inend)			      \ | 
|  | 195 | || (inptr[1] == SS3_1 && inptr + 4 > inend))		      \ | 
|  | 196 | {								      \ | 
|  | 197 | result = __GCONV_INCOMPLETE_INPUT;				      \ | 
|  | 198 | break;							      \ | 
|  | 199 | }								      \ | 
|  | 200 | if (inptr[1] == '$'						      \ | 
|  | 201 | && ((inptr[2] == ')'					      \ | 
|  | 202 | && (inptr[3] == 'A'					      \ | 
|  | 203 | || (X12345 != '\0' && inptr[3] == X12345)		      \ | 
|  | 204 | || inptr[3] == 'E' || inptr[3] == 'G'))		      \ | 
|  | 205 | || (inptr[2] == '*'					      \ | 
|  | 206 | && ((X7589 != '\0' && inptr[3] == X7589)		      \ | 
|  | 207 | || (X13131 != '\0' && inptr[3] == X13131)	      \ | 
|  | 208 | || inptr[3] == 'H'))				      \ | 
|  | 209 | || (inptr[2] == '+'					      \ | 
|  | 210 | && ((X7590 != '\0' && inptr[3] == X7590)		      \ | 
|  | 211 | || (X13132 != '\0' && inptr[3] == X13132)	      \ | 
|  | 212 | || inptr[3] == 'I' || inptr[3] == 'J'		      \ | 
|  | 213 | || inptr[3] == 'K' || inptr[3] == 'L'		      \ | 
|  | 214 | || inptr[3] == 'M'))))				      \ | 
|  | 215 | {								      \ | 
|  | 216 | /* OK, we accept those character sets.  */			      \ | 
|  | 217 | if (inptr[3] == 'A')					      \ | 
|  | 218 | ann = (ann & ~SO_ann) | GB2312_ann;			      \ | 
|  | 219 | else if (inptr[3] == 'G')					      \ | 
|  | 220 | ann = (ann & ~SO_ann) | CNS11643_1_ann;			      \ | 
|  | 221 | else if (inptr[3] == 'E')					      \ | 
|  | 222 | ann = (ann & ~SO_ann) | ISO_IR_165_ann;			      \ | 
|  | 223 | else if (X12345 != '\0' && inptr[3] == X12345)		      \ | 
|  | 224 | ann = (ann & ~SO_ann) | GB12345_ann;			      \ | 
|  | 225 | else if (inptr[3] == 'H')					      \ | 
|  | 226 | ann = (ann & ~SS2_ann) | CNS11643_2_ann;			      \ | 
|  | 227 | else if (X7589 != '\0' && inptr[3] == X7589)		      \ | 
|  | 228 | ann = (ann & ~SS2_ann) | GB7589_ann;			      \ | 
|  | 229 | else if (X13131 != '\0' && inptr[3] == X13131)		      \ | 
|  | 230 | ann = (ann & ~SS2_ann) | GB13131_ann;			      \ | 
|  | 231 | else if (inptr[3] == 'I')					      \ | 
|  | 232 | ann = (ann & ~SS3_ann) | CNS11643_3_ann;			      \ | 
|  | 233 | else if (inptr[3] == 'J')					      \ | 
|  | 234 | ann = (ann & ~SS3_ann) | CNS11643_4_ann;			      \ | 
|  | 235 | else if (inptr[3] == 'K')					      \ | 
|  | 236 | ann = (ann & ~SS3_ann) | CNS11643_5_ann;			      \ | 
|  | 237 | else if (inptr[3] == 'L')					      \ | 
|  | 238 | ann = (ann & ~SS3_ann) | CNS11643_6_ann;			      \ | 
|  | 239 | else if (inptr[3] == 'M')					      \ | 
|  | 240 | ann = (ann & ~SS3_ann) | CNS11643_7_ann;			      \ | 
|  | 241 | else if (X7590 != '\0' && inptr[3] == X7590)		      \ | 
|  | 242 | ann = (ann & ~SS3_ann) | GB7590_ann;			      \ | 
|  | 243 | else if (X13132 != '\0' && inptr[3] == X13132)		      \ | 
|  | 244 | ann = (ann & ~SS3_ann) | GB13132_ann;			      \ | 
|  | 245 | inptr += 4;							      \ | 
|  | 246 | continue;							      \ | 
|  | 247 | }								      \ | 
|  | 248 | }									      \ | 
|  | 249 | else if (ch == SO)							      \ | 
|  | 250 | {									      \ | 
|  | 251 | /* Switch to use GB2312, GB12345, CNS 11643 plane 1, or ISO-IR-165,   \ | 
|  | 252 | depending on which S0 designation came last.  The only problem     \ | 
|  | 253 | is what to do with faulty input files where no designator came.    \ | 
|  | 254 | XXX For now I'll default to use GB2312.  If this is not the	      \ | 
|  | 255 | best behavior (e.g., we should flag an error) let me know.  */     \ | 
|  | 256 | ++inptr;							      \ | 
|  | 257 | if ((ann & SO_ann) != 0)					      \ | 
|  | 258 | switch (ann & SO_ann)						      \ | 
|  | 259 | {								      \ | 
|  | 260 | case GB2312_ann:						      \ | 
|  | 261 | set = GB2312_set;						      \ | 
|  | 262 | break;							      \ | 
|  | 263 | case GB12345_ann:						      \ | 
|  | 264 | set = GB12345_set;					      \ | 
|  | 265 | break;							      \ | 
|  | 266 | case CNS11643_1_ann:					      \ | 
|  | 267 | set = CNS11643_1_set;					      \ | 
|  | 268 | break;							      \ | 
|  | 269 | case ISO_IR_165_ann:					      \ | 
|  | 270 | set = ISO_IR_165_set;					      \ | 
|  | 271 | break;							      \ | 
|  | 272 | default:							      \ | 
|  | 273 | abort ();							      \ | 
|  | 274 | }								      \ | 
|  | 275 | else								      \ | 
|  | 276 | {								      \ | 
|  | 277 | STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \ | 
|  | 278 | }								      \ | 
|  | 279 | continue;							      \ | 
|  | 280 | }									      \ | 
|  | 281 | else if (ch == SI)							      \ | 
|  | 282 | {									      \ | 
|  | 283 | /* Switch to use ASCII.  */					      \ | 
|  | 284 | ++inptr;							      \ | 
|  | 285 | set = ASCII_set;						      \ | 
|  | 286 | continue;							      \ | 
|  | 287 | }									      \ | 
|  | 288 | \ | 
|  | 289 | if (ch == ESC && inptr[1] == SS2_1)					      \ | 
|  | 290 | {									      \ | 
|  | 291 | /* This is a character from CNS 11643 plane 2.			      \ | 
|  | 292 | XXX We could test here whether the use of this character	      \ | 
|  | 293 | set was announced.						      \ | 
|  | 294 | XXX Currently GB7589 and GB13131 are not supported.  */	      \ | 
|  | 295 | inptr += 2;							      \ | 
|  | 296 | ch = cns11643l2_to_ucs4 (&inptr, 2, 0);				      \ | 
|  | 297 | if (ch == __UNKNOWN_10646_CHAR)					      \ | 
|  | 298 | STANDARD_FROM_LOOP_ERR_HANDLER (2);				      \ | 
|  | 299 | }									      \ | 
|  | 300 | /* Note that we can assume here that at least 4 bytes are available if    \ | 
|  | 301 | the first byte is ESC since otherwise the first if would have been     \ | 
|  | 302 | true.  */							      \ | 
|  | 303 | else if (ch == ESC && inptr[1] == SS3_1)				      \ | 
|  | 304 | {									      \ | 
|  | 305 | /* This is a character from CNS 11643 plane 3 or higher.	      \ | 
|  | 306 | XXX Currently GB7590 and GB13132 are not supported.  */	      \ | 
|  | 307 | unsigned char buf[3];						      \ | 
|  | 308 | const unsigned char *tmp = buf;					      \ | 
|  | 309 | \ | 
|  | 310 | buf[1] = inptr[2];						      \ | 
|  | 311 | buf[2] = inptr[3];						      \ | 
|  | 312 | switch (ann & SS3_ann)						      \ | 
|  | 313 | {								      \ | 
|  | 314 | case CNS11643_3_ann:						      \ | 
|  | 315 | buf[0] = 0x23;						      \ | 
|  | 316 | ch = cns11643_to_ucs4 (&tmp, 3, 0);				      \ | 
|  | 317 | break;							      \ | 
|  | 318 | case CNS11643_4_ann:						      \ | 
|  | 319 | buf[0] = 0x24;						      \ | 
|  | 320 | ch = cns11643_to_ucs4 (&tmp, 3, 0);				      \ | 
|  | 321 | break;							      \ | 
|  | 322 | case CNS11643_5_ann:						      \ | 
|  | 323 | buf[0] = 0x25;						      \ | 
|  | 324 | ch = cns11643_to_ucs4 (&tmp, 3, 0);				      \ | 
|  | 325 | break;							      \ | 
|  | 326 | case CNS11643_6_ann:						      \ | 
|  | 327 | buf[0] = 0x26;						      \ | 
|  | 328 | ch = cns11643_to_ucs4 (&tmp, 3, 0);				      \ | 
|  | 329 | break;							      \ | 
|  | 330 | case CNS11643_7_ann:						      \ | 
|  | 331 | buf[0] = 0x27;						      \ | 
|  | 332 | ch = cns11643_to_ucs4 (&tmp, 3, 0);				      \ | 
|  | 333 | break;							      \ | 
|  | 334 | default:							      \ | 
|  | 335 | /* XXX Currently GB7590 and GB13132 are not supported.  */	      \ | 
|  | 336 | ch = __UNKNOWN_10646_CHAR;					      \ | 
|  | 337 | break;							      \ | 
|  | 338 | }								      \ | 
|  | 339 | if (ch == __UNKNOWN_10646_CHAR)					      \ | 
|  | 340 | {								      \ | 
|  | 341 | STANDARD_FROM_LOOP_ERR_HANDLER (4);				      \ | 
|  | 342 | }								      \ | 
|  | 343 | assert (tmp == buf + 3);					      \ | 
|  | 344 | inptr += 4;							      \ | 
|  | 345 | }									      \ | 
|  | 346 | else if (set == ASCII_set)						      \ | 
|  | 347 | {									      \ | 
|  | 348 | /* Almost done, just advance the input pointer.  */		      \ | 
|  | 349 | ++inptr;							      \ | 
|  | 350 | }									      \ | 
|  | 351 | else								      \ | 
|  | 352 | {									      \ | 
|  | 353 | /* That's pretty easy, we have a dedicated functions for this.  */    \ | 
|  | 354 | if (inend - inptr < 2)						      \ | 
|  | 355 | {								      \ | 
|  | 356 | result = __GCONV_INCOMPLETE_INPUT;				      \ | 
|  | 357 | break;							      \ | 
|  | 358 | }								      \ | 
|  | 359 | if (set == GB2312_set)						      \ | 
|  | 360 | ch = gb2312_to_ucs4 (&inptr, inend - inptr, 0);		      \ | 
|  | 361 | else if (set == ISO_IR_165_set)					      \ | 
|  | 362 | ch = isoir165_to_ucs4 (&inptr, inend - inptr);		      \ | 
|  | 363 | else								      \ | 
|  | 364 | {								      \ | 
|  | 365 | assert (set == CNS11643_1_set);				      \ | 
|  | 366 | ch = cns11643l1_to_ucs4 (&inptr, inend - inptr, 0);		      \ | 
|  | 367 | }								      \ | 
|  | 368 | \ | 
|  | 369 | if (ch == 0)							      \ | 
|  | 370 | {								      \ | 
|  | 371 | result = __GCONV_INCOMPLETE_INPUT;				      \ | 
|  | 372 | break;							      \ | 
|  | 373 | }								      \ | 
|  | 374 | else if (ch == __UNKNOWN_10646_CHAR)				      \ | 
|  | 375 | {								      \ | 
|  | 376 | STANDARD_FROM_LOOP_ERR_HANDLER (2);				      \ | 
|  | 377 | }								      \ | 
|  | 378 | }									      \ | 
|  | 379 | \ | 
|  | 380 | *((uint32_t *) outptr) = ch;					      \ | 
|  | 381 | outptr += sizeof (uint32_t);					      \ | 
|  | 382 | } | 
|  | 383 | #define EXTRA_LOOP_DECLS	, int *setp | 
|  | 384 | #define INIT_PARAMS		int set = (*setp >> 3) & CURRENT_MASK; \ | 
|  | 385 | int ann = (*setp >> 3) & ~CURRENT_MASK | 
|  | 386 | #define UPDATE_PARAMS		*setp = (set | ann) << 3 | 
|  | 387 | #define LOOP_NEED_FLAGS | 
|  | 388 | #include <iconv/loop.c> | 
|  | 389 |  | 
|  | 390 |  | 
|  | 391 | /* Next, define the other direction.  */ | 
|  | 392 | #define MIN_NEEDED_INPUT	TO_LOOP_MIN_NEEDED_FROM | 
|  | 393 | #define MAX_NEEDED_INPUT	TO_LOOP_MAX_NEEDED_FROM | 
|  | 394 | #define MIN_NEEDED_OUTPUT	TO_LOOP_MIN_NEEDED_TO | 
|  | 395 | #define MAX_NEEDED_OUTPUT	TO_LOOP_MAX_NEEDED_TO | 
|  | 396 | #define LOOPFCT			TO_LOOP | 
|  | 397 | #define BODY \ | 
|  | 398 | {									      \ | 
|  | 399 | uint32_t ch;							      \ | 
|  | 400 | size_t written = 0;							      \ | 
|  | 401 | \ | 
|  | 402 | ch = *((const uint32_t *) inptr);					      \ | 
|  | 403 | \ | 
|  | 404 | /* First see whether we can write the character using the currently	      \ | 
|  | 405 | selected character set.  */					      \ | 
|  | 406 | if (ch < 0x80)							      \ | 
|  | 407 | {									      \ | 
|  | 408 | if (set != ASCII_set)						      \ | 
|  | 409 | {								      \ | 
|  | 410 | *outptr++ = SI;						      \ | 
|  | 411 | set = ASCII_set;						      \ | 
|  | 412 | if (outptr == outend)					      \ | 
|  | 413 | {								      \ | 
|  | 414 | result = __GCONV_FULL_OUTPUT;				      \ | 
|  | 415 | break;							      \ | 
|  | 416 | }								      \ | 
|  | 417 | }								      \ | 
|  | 418 | \ | 
|  | 419 | *outptr++ = ch;							      \ | 
|  | 420 | written = 1;							      \ | 
|  | 421 | \ | 
|  | 422 | /* At the end of the line we have to clear the `ann' flags since      \ | 
|  | 423 | every line must contain this information again.  */		      \ | 
|  | 424 | if (ch == L'\n')						      \ | 
|  | 425 | ann = 0;							      \ | 
|  | 426 | }									      \ | 
|  | 427 | else								      \ | 
|  | 428 | {									      \ | 
|  | 429 | unsigned char buf[2] = { 0, 0 };				      \ | 
|  | 430 | int used;							      \ | 
|  | 431 | \ | 
|  | 432 | if (set == GB2312_set || ((ann & SO_ann) != CNS11643_1_ann	      \ | 
|  | 433 | && (ann & SO_ann) != ISO_IR_165_ann))	      \ | 
|  | 434 | {								      \ | 
|  | 435 | written = ucs4_to_gb2312 (ch, buf, 2);			      \ | 
|  | 436 | used = GB2312_set;						      \ | 
|  | 437 | }								      \ | 
|  | 438 | else if (set == ISO_IR_165_set || (ann & SO_ann) == ISO_IR_165_set)   \ | 
|  | 439 | {								      \ | 
|  | 440 | written = ucs4_to_isoir165 (ch, buf, 2);			      \ | 
|  | 441 | used = ISO_IR_165_set;					      \ | 
|  | 442 | }								      \ | 
|  | 443 | else								      \ | 
|  | 444 | {								      \ | 
|  | 445 | written = ucs4_to_cns11643l1 (ch, buf, 2);			      \ | 
|  | 446 | used = CNS11643_1_set;					      \ | 
|  | 447 | }								      \ | 
|  | 448 | \ | 
|  | 449 | if (written == __UNKNOWN_10646_CHAR)				      \ | 
|  | 450 | {								      \ | 
|  | 451 | /* Cannot convert it using the currently selected SO set.	      \ | 
|  | 452 | Next try the SS2 set.  */				      \ | 
|  | 453 | written = ucs4_to_cns11643l2 (ch, buf, 2);			      \ | 
|  | 454 | if (written != __UNKNOWN_10646_CHAR)			      \ | 
|  | 455 | /* Yep, that worked.  */					      \ | 
|  | 456 | used = CNS11643_2_set;					      \ | 
|  | 457 | else							      \ | 
|  | 458 | {								      \ | 
|  | 459 | unsigned char tmpbuf[3];				      \ | 
|  | 460 | \ | 
|  | 461 | switch (0)						      \ | 
|  | 462 | {							      \ | 
|  | 463 | default:						      \ | 
|  | 464 | /* Well, see whether we have to change the SO set.  */    \ | 
|  | 465 | \ | 
|  | 466 | if (used != GB2312_set)				      \ | 
|  | 467 | {							      \ | 
|  | 468 | written = ucs4_to_gb2312 (ch, buf, 2);		      \ | 
|  | 469 | if (written != __UNKNOWN_10646_CHAR)		      \ | 
|  | 470 | {						      \ | 
|  | 471 | used = GB2312_set;				      \ | 
|  | 472 | break;					      \ | 
|  | 473 | }						      \ | 
|  | 474 | }							      \ | 
|  | 475 | \ | 
|  | 476 | if (used != ISO_IR_165_set)				      \ | 
|  | 477 | {							      \ | 
|  | 478 | written = ucs4_to_isoir165 (ch, buf, 2);	      \ | 
|  | 479 | if (written != __UNKNOWN_10646_CHAR)		      \ | 
|  | 480 | {						      \ | 
|  | 481 | used = ISO_IR_165_set;			      \ | 
|  | 482 | break;					      \ | 
|  | 483 | }						      \ | 
|  | 484 | }							      \ | 
|  | 485 | \ | 
|  | 486 | if (used != CNS11643_1_set)				      \ | 
|  | 487 | {							      \ | 
|  | 488 | written = ucs4_to_cns11643l1 (ch, buf, 2);	      \ | 
|  | 489 | if (written != __UNKNOWN_10646_CHAR)		      \ | 
|  | 490 | {						      \ | 
|  | 491 | used = CNS11643_1_set;			      \ | 
|  | 492 | break;					      \ | 
|  | 493 | }						      \ | 
|  | 494 | }							      \ | 
|  | 495 | \ | 
|  | 496 | written = ucs4_to_cns11643 (ch, tmpbuf, 3);		      \ | 
|  | 497 | if (written == 3 && tmpbuf[0] >= 3 && tmpbuf[0] <= 7)     \ | 
|  | 498 | {							      \ | 
|  | 499 | buf[0] = tmpbuf[1];				      \ | 
|  | 500 | buf[1] = tmpbuf[2];				      \ | 
|  | 501 | switch (tmpbuf[0])				      \ | 
|  | 502 | {						      \ | 
|  | 503 | case 3:					      \ | 
|  | 504 | used = CNS11643_3_set;			      \ | 
|  | 505 | break;					      \ | 
|  | 506 | case 4:					      \ | 
|  | 507 | used = CNS11643_4_set;			      \ | 
|  | 508 | break;					      \ | 
|  | 509 | case 5:					      \ | 
|  | 510 | used = CNS11643_5_set;			      \ | 
|  | 511 | break;					      \ | 
|  | 512 | case 6:					      \ | 
|  | 513 | used = CNS11643_6_set;			      \ | 
|  | 514 | break;					      \ | 
|  | 515 | case 7:					      \ | 
|  | 516 | used = CNS11643_7_set;			      \ | 
|  | 517 | break;					      \ | 
|  | 518 | default:					      \ | 
|  | 519 | abort ();					      \ | 
|  | 520 | }						      \ | 
|  | 521 | written = 2;					      \ | 
|  | 522 | break;						      \ | 
|  | 523 | }							      \ | 
|  | 524 | \ | 
|  | 525 | /* XXX Currently GB7590 and GB13132 are not supported.  */\ | 
|  | 526 | \ | 
|  | 527 | /* Even this does not work.  Error.  */		      \ | 
|  | 528 | used = ASCII_set;					      \ | 
|  | 529 | }							      \ | 
|  | 530 | if (used == ASCII_set)					      \ | 
|  | 531 | {							      \ | 
|  | 532 | UNICODE_TAG_HANDLER (ch, 4);			      \ | 
|  | 533 | STANDARD_TO_LOOP_ERR_HANDLER (4);			      \ | 
|  | 534 | }							      \ | 
|  | 535 | }								      \ | 
|  | 536 | }								      \ | 
|  | 537 | assert (written == 2);						      \ | 
|  | 538 | \ | 
|  | 539 | /* See whether we have to emit an escape sequence.  */		      \ | 
|  | 540 | if (set != used)						      \ | 
|  | 541 | {								      \ | 
|  | 542 | /* First see whether we announced that we use this		      \ | 
|  | 543 | character set.  */					      \ | 
|  | 544 | if ((used & SO_mask) != 0 && (ann & SO_ann) != (used << 8))	      \ | 
|  | 545 | {								      \ | 
|  | 546 | const char *escseq;					      \ | 
|  | 547 | \ | 
|  | 548 | if (outptr + 4 > outend)				      \ | 
|  | 549 | {							      \ | 
|  | 550 | result = __GCONV_FULL_OUTPUT;			      \ | 
|  | 551 | break;						      \ | 
|  | 552 | }							      \ | 
|  | 553 | \ | 
|  | 554 | assert (used >= 1 && used <= 4);			      \ | 
|  | 555 | escseq = ")A\0\0)G)E" + (used - 1) * 2;			      \ | 
|  | 556 | *outptr++ = ESC;					      \ | 
|  | 557 | *outptr++ = '$';					      \ | 
|  | 558 | *outptr++ = *escseq++;					      \ | 
|  | 559 | *outptr++ = *escseq++;					      \ | 
|  | 560 | \ | 
|  | 561 | ann = (ann & ~SO_ann) | (used << 8);			      \ | 
|  | 562 | }								      \ | 
|  | 563 | else if ((used & SS2_mask) != 0 && (ann & SS2_ann) != (used << 8))\ | 
|  | 564 | {								      \ | 
|  | 565 | const char *escseq;					      \ | 
|  | 566 | \ | 
|  | 567 | assert (used == CNS11643_2_set); /* XXX */		      \ | 
|  | 568 | escseq = "*H";						      \ | 
|  | 569 | *outptr++ = ESC;					      \ | 
|  | 570 | *outptr++ = '$';					      \ | 
|  | 571 | *outptr++ = *escseq++;					      \ | 
|  | 572 | *outptr++ = *escseq++;					      \ | 
|  | 573 | \ | 
|  | 574 | ann = (ann & ~SS2_ann) | (used << 8);			      \ | 
|  | 575 | }								      \ | 
|  | 576 | else if ((used & SS3_mask) != 0 && (ann & SS3_ann) != (used << 8))\ | 
|  | 577 | {								      \ | 
|  | 578 | const char *escseq;					      \ | 
|  | 579 | \ | 
|  | 580 | assert ((used >> 5) >= 3 && (used >> 5) <= 7);		      \ | 
|  | 581 | escseq = "+I+J+K+L+M" + ((used >> 5) - 3) * 2;		      \ | 
|  | 582 | *outptr++ = ESC;					      \ | 
|  | 583 | *outptr++ = '$';					      \ | 
|  | 584 | *outptr++ = *escseq++;					      \ | 
|  | 585 | *outptr++ = *escseq++;					      \ | 
|  | 586 | \ | 
|  | 587 | ann = (ann & ~SS3_ann) | (used << 8);			      \ | 
|  | 588 | }								      \ | 
|  | 589 | \ | 
|  | 590 | if (used == CNS11643_2_set)					      \ | 
|  | 591 | {								      \ | 
|  | 592 | if (outptr + 2 > outend)				      \ | 
|  | 593 | {							      \ | 
|  | 594 | result = __GCONV_FULL_OUTPUT;			      \ | 
|  | 595 | break;						      \ | 
|  | 596 | }							      \ | 
|  | 597 | *outptr++ = SS2_0;					      \ | 
|  | 598 | *outptr++ = SS2_1;					      \ | 
|  | 599 | }								      \ | 
|  | 600 | else if (used >= CNS11643_3_set && used <= CNS11643_7_set)	      \ | 
|  | 601 | {								      \ | 
|  | 602 | if (outptr + 2 > outend)				      \ | 
|  | 603 | {							      \ | 
|  | 604 | result = __GCONV_FULL_OUTPUT;			      \ | 
|  | 605 | break;						      \ | 
|  | 606 | }							      \ | 
|  | 607 | *outptr++ = SS3_0;					      \ | 
|  | 608 | *outptr++ = SS3_1;					      \ | 
|  | 609 | }								      \ | 
|  | 610 | else							      \ | 
|  | 611 | {								      \ | 
|  | 612 | /* We only have to emit something if currently ASCII is	      \ | 
|  | 613 | selected.  Otherwise we are switching within the	      \ | 
|  | 614 | SO charset.  */					      \ | 
|  | 615 | if (set == ASCII_set)					      \ | 
|  | 616 | {							      \ | 
|  | 617 | if (outptr + 1 > outend)				      \ | 
|  | 618 | {							      \ | 
|  | 619 | result = __GCONV_FULL_OUTPUT;			      \ | 
|  | 620 | break;						      \ | 
|  | 621 | }							      \ | 
|  | 622 | *outptr++ = SO;					      \ | 
|  | 623 | }							      \ | 
|  | 624 | }								      \ | 
|  | 625 | \ | 
|  | 626 | /* Always test the length here since we have used up all the      \ | 
|  | 627 | guaranteed output buffer slots.  */			      \ | 
|  | 628 | if (outptr + 2 > outend)					      \ | 
|  | 629 | {								      \ | 
|  | 630 | result = __GCONV_FULL_OUTPUT;				      \ | 
|  | 631 | break;							      \ | 
|  | 632 | }								      \ | 
|  | 633 | }								      \ | 
|  | 634 | else if (outptr + 2 > outend)					      \ | 
|  | 635 | {								      \ | 
|  | 636 | result = __GCONV_FULL_OUTPUT;				      \ | 
|  | 637 | break;							      \ | 
|  | 638 | }								      \ | 
|  | 639 | \ | 
|  | 640 | *outptr++ = buf[0];						      \ | 
|  | 641 | *outptr++ = buf[1];						      \ | 
|  | 642 | set = used;							      \ | 
|  | 643 | }									      \ | 
|  | 644 | \ | 
|  | 645 | /* Now that we wrote the output increment the input pointer.  */	      \ | 
|  | 646 | inptr += 4;								      \ | 
|  | 647 | } | 
|  | 648 | #define EXTRA_LOOP_DECLS	, int *setp | 
|  | 649 | #define INIT_PARAMS		int set = (*setp >> 3) & CURRENT_MASK; \ | 
|  | 650 | int ann = (*setp >> 3) & ~CURRENT_MASK | 
|  | 651 | #define REINIT_PARAMS		do					      \ | 
|  | 652 | {					      \ | 
|  | 653 | set = (*setp >> 3) & CURRENT_MASK;	      \ | 
|  | 654 | ann = (*setp >> 3) & ~CURRENT_MASK;	      \ | 
|  | 655 | }					      \ | 
|  | 656 | while (0) | 
|  | 657 | #define UPDATE_PARAMS		*setp = (set | ann) << 3 | 
|  | 658 | #define LOOP_NEED_FLAGS | 
|  | 659 | #include <iconv/loop.c> | 
|  | 660 |  | 
|  | 661 |  | 
|  | 662 | /* Now define the toplevel functions.  */ | 
|  | 663 | #include <iconv/skeleton.c> |