| xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame] | 1 | /* Conversion module for ISO-2022-CN-EXT. | 
 | 2 |    Copyright (C) 2000-2016 Free Software Foundation, Inc. | 
 | 3 |    This file is part of the GNU C Library. | 
 | 4 |    Contributed by Ulrich Drepper <drepper@cygnus.com>, 2000. | 
 | 5 |  | 
 | 6 |    The GNU C Library is free software; you can redistribute it and/or | 
 | 7 |    modify it under the terms of the GNU Lesser General Public | 
 | 8 |    License as published by the Free Software Foundation; either | 
 | 9 |    version 2.1 of the License, or (at your option) any later version. | 
 | 10 |  | 
 | 11 |    The GNU C Library is distributed in the hope that it will be useful, | 
 | 12 |    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 | 13 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
 | 14 |    Lesser General Public License for more details. | 
 | 15 |  | 
 | 16 |    You should have received a copy of the GNU Lesser General Public | 
 | 17 |    License along with the GNU C Library; if not, see | 
 | 18 |    <http://www.gnu.org/licenses/>.  */ | 
 | 19 |  | 
 | 20 | #include <dlfcn.h> | 
 | 21 | #include <gconv.h> | 
 | 22 | #include <stdint.h> | 
 | 23 | #include <stdlib.h> | 
 | 24 | #include <string.h> | 
 | 25 | #include "gb2312.h" | 
 | 26 | #include "iso-ir-165.h" | 
 | 27 | #include "cns11643.h" | 
 | 28 | #include "cns11643l1.h" | 
 | 29 | #include "cns11643l2.h" | 
 | 30 |  | 
 | 31 | #include <assert.h> | 
 | 32 |  | 
 | 33 | /* This makes obvious what everybody knows: 0x1b is the Esc character.  */ | 
 | 34 | #define ESC	0x1b | 
 | 35 |  | 
 | 36 | /* We have single-byte shift-in and shift-out sequences, and the single | 
 | 37 |    shift sequences SS2 and SS3 which replaces the SS2/SS3 designation for | 
 | 38 |    the next two bytes.  */ | 
 | 39 | #define SI	0x0f | 
 | 40 | #define SO	0x0e | 
 | 41 | #define SS2_0	ESC | 
 | 42 | #define SS2_1	0x4e | 
 | 43 | #define SS3_0	ESC | 
 | 44 | #define SS3_1	0x4f | 
 | 45 |  | 
 | 46 | /* Definitions used in the body of the `gconv' function.  */ | 
 | 47 | #define CHARSET_NAME		"ISO-2022-CN-EXT//" | 
 | 48 | #define DEFINE_INIT		1 | 
 | 49 | #define DEFINE_FINI		1 | 
 | 50 | #define ONE_DIRECTION		0 | 
 | 51 | #define FROM_LOOP		from_iso2022cn_ext_loop | 
 | 52 | #define TO_LOOP			to_iso2022cn_ext_loop | 
 | 53 | #define FROM_LOOP_MIN_NEEDED_FROM	1 | 
 | 54 | #define FROM_LOOP_MAX_NEEDED_FROM	4 | 
 | 55 | #define FROM_LOOP_MIN_NEEDED_TO		4 | 
 | 56 | #define FROM_LOOP_MAX_NEEDED_TO		4 | 
 | 57 | #define TO_LOOP_MIN_NEEDED_FROM		4 | 
 | 58 | #define TO_LOOP_MAX_NEEDED_FROM		4 | 
 | 59 | #define TO_LOOP_MIN_NEEDED_TO		1 | 
 | 60 | #define TO_LOOP_MAX_NEEDED_TO		6 | 
 | 61 | #define PREPARE_LOOP \ | 
 | 62 |   int save_set;								      \ | 
 | 63 |   int *setp = &data->__statep->__count; | 
 | 64 | #define EXTRA_LOOP_ARGS		, setp | 
 | 65 |  | 
 | 66 |  | 
 | 67 | /* The charsets GB/T 12345-90, GB 7589-87, GB/T 13131-9X, GB 7590-87, | 
 | 68 |    and GB/T 13132-9X are not registered to the best of my knowledge and | 
 | 69 |    therefore have no escape sequence assigned.  We cannot handle them | 
 | 70 |    for this reason.  Tell the implementation about this.  */ | 
 | 71 | #define X12345	'\0' | 
 | 72 | #define X7589	'\0' | 
 | 73 | #define X13131	'\0' | 
 | 74 | #define X7590	'\0' | 
 | 75 | #define X13132	'\0' | 
 | 76 |  | 
 | 77 |  | 
 | 78 | /* The COUNT element of the state keeps track of the currently selected | 
 | 79 |    character set.  The possible values are:  */ | 
 | 80 | enum | 
 | 81 | { | 
 | 82 |   ASCII_set = 0, | 
 | 83 |   GB2312_set, | 
 | 84 |   GB12345_set, | 
 | 85 |   CNS11643_1_set, | 
 | 86 |   ISO_IR_165_set, | 
 | 87 |   SO_mask = 7, | 
 | 88 |  | 
 | 89 |   GB7589_set = 1 << 3, | 
 | 90 |   GB13131_set = 2 << 3, | 
 | 91 |   CNS11643_2_set = 3 << 3, | 
 | 92 |   SS2_mask = 3 << 3, | 
 | 93 |  | 
 | 94 |   GB7590_set = 1 << 5, | 
 | 95 |   GB13132_set = 2 << 5, | 
 | 96 |   CNS11643_3_set = 3 << 5, | 
 | 97 |   CNS11643_4_set = 4 << 5, | 
 | 98 |   CNS11643_5_set = 5 << 5, | 
 | 99 |   CNS11643_6_set = 6 << 5, | 
 | 100 |   CNS11643_7_set = 7 << 5, | 
 | 101 |   SS3_mask = 7 << 5, | 
 | 102 |  | 
 | 103 | #define CURRENT_MASK (SO_mask | SS2_mask | SS3_mask) | 
 | 104 |  | 
 | 105 |   GB2312_ann = 1 << 8, | 
 | 106 |   GB12345_ann = 2 << 8, | 
 | 107 |   CNS11643_1_ann = 3 << 8, | 
 | 108 |   ISO_IR_165_ann = 4 << 8, | 
 | 109 |   SO_ann = 7 << 8, | 
 | 110 |  | 
 | 111 |   GB7589_ann = 1 << 11, | 
 | 112 |   GB13131_ann = 2 << 11, | 
 | 113 |   CNS11643_2_ann = 3 << 11, | 
 | 114 |   SS2_ann = 3 << 11, | 
 | 115 |  | 
 | 116 |   GB7590_ann = 1 << 13, | 
 | 117 |   GB13132_ann = 2 << 13, | 
 | 118 |   CNS11643_3_ann = 3 << 13, | 
 | 119 |   CNS11643_4_ann = 4 << 13, | 
 | 120 |   CNS11643_5_ann = 5 << 13, | 
 | 121 |   CNS11643_6_ann = 6 << 13, | 
 | 122 |   CNS11643_7_ann = 7 << 13, | 
 | 123 |   SS3_ann = 7 << 13 | 
 | 124 | }; | 
 | 125 |  | 
 | 126 |  | 
 | 127 | /* Since this is a stateful encoding we have to provide code which resets | 
 | 128 |    the output state to the initial state.  This has to be done during the | 
 | 129 |    flushing.  */ | 
 | 130 | #define EMIT_SHIFT_TO_INIT \ | 
 | 131 |   if (data->__statep->__count >> 3 != ASCII_set)			      \ | 
 | 132 |     {									      \ | 
 | 133 |       if (FROM_DIRECTION)						      \ | 
 | 134 | 	/* It's easy, we don't have to emit anything, we just reset the	      \ | 
 | 135 | 	   state for the input.  */					      \ | 
 | 136 | 	data->__statep->__count = ASCII_set << 3;			      \ | 
 | 137 |       else								      \ | 
 | 138 | 	{								      \ | 
 | 139 | 	  /* We are not in the initial state.  To switch back we have	      \ | 
 | 140 | 	     to emit `SI'.  */						      \ | 
 | 141 | 	  if (__glibc_unlikely (outbuf == outend))			      \ | 
 | 142 | 	    /* We don't have enough room in the output buffer.  */	      \ | 
 | 143 | 	    status = __GCONV_FULL_OUTPUT;				      \ | 
 | 144 | 	  else								      \ | 
 | 145 | 	    {								      \ | 
 | 146 | 	      /* Write out the shift sequence.  */			      \ | 
 | 147 | 	      *outbuf++ = SI;						      \ | 
 | 148 | 	      if (data->__flags & __GCONV_IS_LAST)			      \ | 
 | 149 | 		*irreversible += 1;					      \ | 
 | 150 | 	      data->__statep->__count = ASCII_set << 3;			      \ | 
 | 151 | 	    }								      \ | 
 | 152 | 	}								      \ | 
 | 153 |     } | 
 | 154 |  | 
 | 155 |  | 
 | 156 | /* Since we might have to reset input pointer we must be able to save | 
 | 157 |    and retore the state.  */ | 
 | 158 | #define SAVE_RESET_STATE(Save) \ | 
 | 159 |   if (Save)								      \ | 
 | 160 |     save_set = *setp;							      \ | 
 | 161 |   else									      \ | 
 | 162 |     *setp = save_set | 
 | 163 |  | 
 | 164 |  | 
 | 165 | /* First define the conversion function from ISO-2022-CN to UCS4.  */ | 
 | 166 | #define MIN_NEEDED_INPUT	FROM_LOOP_MIN_NEEDED_FROM | 
 | 167 | #define MAX_NEEDED_INPUT	FROM_LOOP_MAX_NEEDED_FROM | 
 | 168 | #define MIN_NEEDED_OUTPUT	FROM_LOOP_MIN_NEEDED_TO | 
 | 169 | #define MAX_NEEDED_OUTPUT	FROM_LOOP_MAX_NEEDED_TO | 
 | 170 | #define LOOPFCT			FROM_LOOP | 
 | 171 | #define BODY \ | 
 | 172 |   {									      \ | 
 | 173 |     uint32_t ch = *inptr;						      \ | 
 | 174 | 									      \ | 
 | 175 |     /* This is a 7bit character set, disallow all 8bit characters.  */	      \ | 
 | 176 |     if (ch > 0x7f)							      \ | 
 | 177 |       STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \ | 
 | 178 | 									      \ | 
 | 179 |     /* Recognize escape sequences.  */					      \ | 
 | 180 |     if (ch == ESC)							      \ | 
 | 181 |       {									      \ | 
 | 182 | 	/* There are three kinds of escape sequences we have to handle:	      \ | 
 | 183 | 	   - those announcing the use of GB and CNS characters on the	      \ | 
 | 184 | 	     line; we can simply ignore them				      \ | 
 | 185 | 	   - the initial byte of the SS2 sequence.			      \ | 
 | 186 | 	   - the initial byte of the SS3 sequence.			      \ | 
 | 187 | 	*/								      \ | 
 | 188 | 	if (inptr + 2 > inend						      \ | 
 | 189 | 	    || (inptr[1] == '$'						      \ | 
 | 190 | 		&& (inptr + 3 > inend					      \ | 
 | 191 | 		    || (inptr[2] == ')' && inptr + 4 > inend)		      \ | 
 | 192 | 		    || (inptr[2] == '*' && inptr + 4 > inend)		      \ | 
 | 193 | 		    || (inptr[2] == '+' && inptr + 4 > inend)))		      \ | 
 | 194 | 	    || (inptr[1] == SS2_1 && inptr + 4 > inend)			      \ | 
 | 195 | 	    || (inptr[1] == SS3_1 && inptr + 4 > inend))		      \ | 
 | 196 | 	  {								      \ | 
 | 197 | 	    result = __GCONV_INCOMPLETE_INPUT;				      \ | 
 | 198 | 	    break;							      \ | 
 | 199 | 	  }								      \ | 
 | 200 | 	if (inptr[1] == '$'						      \ | 
 | 201 | 	    && ((inptr[2] == ')'					      \ | 
 | 202 | 		 && (inptr[3] == 'A'					      \ | 
 | 203 | 		     || (X12345 != '\0' && inptr[3] == X12345)		      \ | 
 | 204 | 		     || inptr[3] == 'E' || inptr[3] == 'G'))		      \ | 
 | 205 | 		|| (inptr[2] == '*'					      \ | 
 | 206 | 		    && ((X7589 != '\0' && inptr[3] == X7589)		      \ | 
 | 207 | 			|| (X13131 != '\0' && inptr[3] == X13131)	      \ | 
 | 208 | 			|| inptr[3] == 'H'))				      \ | 
 | 209 | 		|| (inptr[2] == '+'					      \ | 
 | 210 | 		    && ((X7590 != '\0' && inptr[3] == X7590)		      \ | 
 | 211 | 			|| (X13132 != '\0' && inptr[3] == X13132)	      \ | 
 | 212 | 			|| inptr[3] == 'I' || inptr[3] == 'J'		      \ | 
 | 213 | 			|| inptr[3] == 'K' || inptr[3] == 'L'		      \ | 
 | 214 | 			|| inptr[3] == 'M'))))				      \ | 
 | 215 | 	  {								      \ | 
 | 216 | 	    /* OK, we accept those character sets.  */			      \ | 
 | 217 | 	    if (inptr[3] == 'A')					      \ | 
 | 218 | 	      ann = (ann & ~SO_ann) | GB2312_ann;			      \ | 
 | 219 | 	    else if (inptr[3] == 'G')					      \ | 
 | 220 | 	      ann = (ann & ~SO_ann) | CNS11643_1_ann;			      \ | 
 | 221 | 	    else if (inptr[3] == 'E')					      \ | 
 | 222 | 	      ann = (ann & ~SO_ann) | ISO_IR_165_ann;			      \ | 
 | 223 | 	    else if (X12345 != '\0' && inptr[3] == X12345)		      \ | 
 | 224 | 	      ann = (ann & ~SO_ann) | GB12345_ann;			      \ | 
 | 225 | 	    else if (inptr[3] == 'H')					      \ | 
 | 226 | 	      ann = (ann & ~SS2_ann) | CNS11643_2_ann;			      \ | 
 | 227 | 	    else if (X7589 != '\0' && inptr[3] == X7589)		      \ | 
 | 228 | 	      ann = (ann & ~SS2_ann) | GB7589_ann;			      \ | 
 | 229 | 	    else if (X13131 != '\0' && inptr[3] == X13131)		      \ | 
 | 230 | 	      ann = (ann & ~SS2_ann) | GB13131_ann;			      \ | 
 | 231 | 	    else if (inptr[3] == 'I')					      \ | 
 | 232 | 	      ann = (ann & ~SS3_ann) | CNS11643_3_ann;			      \ | 
 | 233 | 	    else if (inptr[3] == 'J')					      \ | 
 | 234 | 	      ann = (ann & ~SS3_ann) | CNS11643_4_ann;			      \ | 
 | 235 | 	    else if (inptr[3] == 'K')					      \ | 
 | 236 | 	      ann = (ann & ~SS3_ann) | CNS11643_5_ann;			      \ | 
 | 237 | 	    else if (inptr[3] == 'L')					      \ | 
 | 238 | 	      ann = (ann & ~SS3_ann) | CNS11643_6_ann;			      \ | 
 | 239 | 	    else if (inptr[3] == 'M')					      \ | 
 | 240 | 	      ann = (ann & ~SS3_ann) | CNS11643_7_ann;			      \ | 
 | 241 | 	    else if (X7590 != '\0' && inptr[3] == X7590)		      \ | 
 | 242 | 	      ann = (ann & ~SS3_ann) | GB7590_ann;			      \ | 
 | 243 | 	    else if (X13132 != '\0' && inptr[3] == X13132)		      \ | 
 | 244 | 	      ann = (ann & ~SS3_ann) | GB13132_ann;			      \ | 
 | 245 | 	    inptr += 4;							      \ | 
 | 246 | 	    continue;							      \ | 
 | 247 | 	  }								      \ | 
 | 248 |       }									      \ | 
 | 249 |     else if (ch == SO)							      \ | 
 | 250 |       {									      \ | 
 | 251 | 	/* Switch to use GB2312, GB12345, CNS 11643 plane 1, or ISO-IR-165,   \ | 
 | 252 | 	   depending on which S0 designation came last.  The only problem     \ | 
 | 253 | 	   is what to do with faulty input files where no designator came.    \ | 
 | 254 | 	   XXX For now I'll default to use GB2312.  If this is not the	      \ | 
 | 255 | 	   best behavior (e.g., we should flag an error) let me know.  */     \ | 
 | 256 | 	++inptr;							      \ | 
 | 257 | 	if ((ann & SO_ann) != 0)					      \ | 
 | 258 | 	  switch (ann & SO_ann)						      \ | 
 | 259 | 	    {								      \ | 
 | 260 | 	    case GB2312_ann:						      \ | 
 | 261 | 	      set = GB2312_set;						      \ | 
 | 262 | 	      break;							      \ | 
 | 263 | 	    case GB12345_ann:						      \ | 
 | 264 | 	      set = GB12345_set;					      \ | 
 | 265 | 	      break;							      \ | 
 | 266 | 	    case CNS11643_1_ann:					      \ | 
 | 267 | 	      set = CNS11643_1_set;					      \ | 
 | 268 | 	      break;							      \ | 
 | 269 | 	    case ISO_IR_165_ann:					      \ | 
 | 270 | 	      set = ISO_IR_165_set;					      \ | 
 | 271 | 	      break;							      \ | 
 | 272 | 	    default:							      \ | 
 | 273 | 	      abort ();							      \ | 
 | 274 | 	    }								      \ | 
 | 275 | 	else								      \ | 
 | 276 | 	  {								      \ | 
 | 277 | 	    STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \ | 
 | 278 | 	  }								      \ | 
 | 279 | 	continue;							      \ | 
 | 280 |       }									      \ | 
 | 281 |     else if (ch == SI)							      \ | 
 | 282 |       {									      \ | 
 | 283 | 	/* Switch to use ASCII.  */					      \ | 
 | 284 | 	++inptr;							      \ | 
 | 285 | 	set = ASCII_set;						      \ | 
 | 286 | 	continue;							      \ | 
 | 287 |       }									      \ | 
 | 288 | 									      \ | 
 | 289 |     if (ch == ESC && inptr[1] == SS2_1)					      \ | 
 | 290 |       {									      \ | 
 | 291 | 	/* This is a character from CNS 11643 plane 2.			      \ | 
 | 292 | 	   XXX We could test here whether the use of this character	      \ | 
 | 293 | 	   set was announced.						      \ | 
 | 294 | 	   XXX Currently GB7589 and GB13131 are not supported.  */	      \ | 
 | 295 | 	inptr += 2;							      \ | 
 | 296 | 	ch = cns11643l2_to_ucs4 (&inptr, 2, 0);				      \ | 
 | 297 | 	if (ch == __UNKNOWN_10646_CHAR)					      \ | 
 | 298 | 	  STANDARD_FROM_LOOP_ERR_HANDLER (2);				      \ | 
 | 299 |       }									      \ | 
 | 300 |     /* Note that we can assume here that at least 4 bytes are available if    \ | 
 | 301 |        the first byte is ESC since otherwise the first if would have been     \ | 
 | 302 |        true.  */							      \ | 
 | 303 |     else if (ch == ESC && inptr[1] == SS3_1)				      \ | 
 | 304 |       {									      \ | 
 | 305 | 	/* This is a character from CNS 11643 plane 3 or higher.	      \ | 
 | 306 | 	   XXX Currently GB7590 and GB13132 are not supported.  */	      \ | 
 | 307 | 	unsigned char buf[3];						      \ | 
 | 308 | 	const unsigned char *tmp = buf;					      \ | 
 | 309 | 									      \ | 
 | 310 | 	buf[1] = inptr[2];						      \ | 
 | 311 | 	buf[2] = inptr[3];						      \ | 
 | 312 | 	switch (ann & SS3_ann)						      \ | 
 | 313 | 	  {								      \ | 
 | 314 | 	  case CNS11643_3_ann:						      \ | 
 | 315 | 	    buf[0] = 0x23;						      \ | 
 | 316 | 	    ch = cns11643_to_ucs4 (&tmp, 3, 0);				      \ | 
 | 317 | 	    break;							      \ | 
 | 318 | 	  case CNS11643_4_ann:						      \ | 
 | 319 | 	    buf[0] = 0x24;						      \ | 
 | 320 | 	    ch = cns11643_to_ucs4 (&tmp, 3, 0);				      \ | 
 | 321 | 	    break;							      \ | 
 | 322 | 	  case CNS11643_5_ann:						      \ | 
 | 323 | 	    buf[0] = 0x25;						      \ | 
 | 324 | 	    ch = cns11643_to_ucs4 (&tmp, 3, 0);				      \ | 
 | 325 | 	    break;							      \ | 
 | 326 | 	  case CNS11643_6_ann:						      \ | 
 | 327 | 	    buf[0] = 0x26;						      \ | 
 | 328 | 	    ch = cns11643_to_ucs4 (&tmp, 3, 0);				      \ | 
 | 329 | 	    break;							      \ | 
 | 330 | 	  case CNS11643_7_ann:						      \ | 
 | 331 | 	    buf[0] = 0x27;						      \ | 
 | 332 | 	    ch = cns11643_to_ucs4 (&tmp, 3, 0);				      \ | 
 | 333 | 	    break;							      \ | 
 | 334 | 	  default:							      \ | 
 | 335 | 	    /* XXX Currently GB7590 and GB13132 are not supported.  */	      \ | 
 | 336 | 	    ch = __UNKNOWN_10646_CHAR;					      \ | 
 | 337 | 	    break;							      \ | 
 | 338 | 	  }								      \ | 
 | 339 | 	if (ch == __UNKNOWN_10646_CHAR)					      \ | 
 | 340 | 	  {								      \ | 
 | 341 | 	    STANDARD_FROM_LOOP_ERR_HANDLER (4);				      \ | 
 | 342 | 	  }								      \ | 
 | 343 | 	assert (tmp == buf + 3);					      \ | 
 | 344 | 	inptr += 4;							      \ | 
 | 345 |       }									      \ | 
 | 346 |     else if (set == ASCII_set)						      \ | 
 | 347 |       {									      \ | 
 | 348 | 	/* Almost done, just advance the input pointer.  */		      \ | 
 | 349 | 	++inptr;							      \ | 
 | 350 |       }									      \ | 
 | 351 |     else								      \ | 
 | 352 |       {									      \ | 
 | 353 | 	/* That's pretty easy, we have a dedicated functions for this.  */    \ | 
 | 354 | 	if (inend - inptr < 2)						      \ | 
 | 355 | 	  {								      \ | 
 | 356 | 	    result = __GCONV_INCOMPLETE_INPUT;				      \ | 
 | 357 | 	    break;							      \ | 
 | 358 | 	  }								      \ | 
 | 359 | 	if (set == GB2312_set)						      \ | 
 | 360 | 	  ch = gb2312_to_ucs4 (&inptr, inend - inptr, 0);		      \ | 
 | 361 | 	else if (set == ISO_IR_165_set)					      \ | 
 | 362 | 	  ch = isoir165_to_ucs4 (&inptr, inend - inptr);		      \ | 
 | 363 | 	else								      \ | 
 | 364 | 	  {								      \ | 
 | 365 | 	    assert (set == CNS11643_1_set);				      \ | 
 | 366 | 	    ch = cns11643l1_to_ucs4 (&inptr, inend - inptr, 0);		      \ | 
 | 367 | 	  }								      \ | 
 | 368 | 									      \ | 
 | 369 | 	if (ch == 0)							      \ | 
 | 370 | 	  {								      \ | 
 | 371 | 	    result = __GCONV_INCOMPLETE_INPUT;				      \ | 
 | 372 | 	    break;							      \ | 
 | 373 | 	  }								      \ | 
 | 374 | 	else if (ch == __UNKNOWN_10646_CHAR)				      \ | 
 | 375 | 	  {								      \ | 
 | 376 | 	    STANDARD_FROM_LOOP_ERR_HANDLER (2);				      \ | 
 | 377 | 	  }								      \ | 
 | 378 |       }									      \ | 
 | 379 | 									      \ | 
 | 380 |     *((uint32_t *) outptr) = ch;					      \ | 
 | 381 |     outptr += sizeof (uint32_t);					      \ | 
 | 382 |   } | 
 | 383 | #define EXTRA_LOOP_DECLS	, int *setp | 
 | 384 | #define INIT_PARAMS		int set = (*setp >> 3) & CURRENT_MASK; \ | 
 | 385 | 				int ann = (*setp >> 3) & ~CURRENT_MASK | 
 | 386 | #define UPDATE_PARAMS		*setp = (set | ann) << 3 | 
 | 387 | #define LOOP_NEED_FLAGS | 
 | 388 | #include <iconv/loop.c> | 
 | 389 |  | 
 | 390 |  | 
 | 391 | /* Next, define the other direction.  */ | 
 | 392 | #define MIN_NEEDED_INPUT	TO_LOOP_MIN_NEEDED_FROM | 
 | 393 | #define MAX_NEEDED_INPUT	TO_LOOP_MAX_NEEDED_FROM | 
 | 394 | #define MIN_NEEDED_OUTPUT	TO_LOOP_MIN_NEEDED_TO | 
 | 395 | #define MAX_NEEDED_OUTPUT	TO_LOOP_MAX_NEEDED_TO | 
 | 396 | #define LOOPFCT			TO_LOOP | 
 | 397 | #define BODY \ | 
 | 398 |   {									      \ | 
 | 399 |     uint32_t ch;							      \ | 
 | 400 |     size_t written = 0;							      \ | 
 | 401 | 									      \ | 
 | 402 |     ch = *((const uint32_t *) inptr);					      \ | 
 | 403 | 									      \ | 
 | 404 |     /* First see whether we can write the character using the currently	      \ | 
 | 405 |        selected character set.  */					      \ | 
 | 406 |     if (ch < 0x80)							      \ | 
 | 407 |       {									      \ | 
 | 408 | 	if (set != ASCII_set)						      \ | 
 | 409 | 	  {								      \ | 
 | 410 | 	    *outptr++ = SI;						      \ | 
 | 411 | 	    set = ASCII_set;						      \ | 
 | 412 | 	    if (outptr == outend)					      \ | 
 | 413 | 	      {								      \ | 
 | 414 | 		result = __GCONV_FULL_OUTPUT;				      \ | 
 | 415 | 		break;							      \ | 
 | 416 | 	      }								      \ | 
 | 417 | 	  }								      \ | 
 | 418 | 									      \ | 
 | 419 | 	*outptr++ = ch;							      \ | 
 | 420 | 	written = 1;							      \ | 
 | 421 | 									      \ | 
 | 422 | 	/* At the end of the line we have to clear the `ann' flags since      \ | 
 | 423 | 	   every line must contain this information again.  */		      \ | 
 | 424 | 	if (ch == L'\n')						      \ | 
 | 425 | 	  ann = 0;							      \ | 
 | 426 |       }									      \ | 
 | 427 |     else								      \ | 
 | 428 |       {									      \ | 
 | 429 | 	unsigned char buf[2] = { 0, 0 };				      \ | 
 | 430 | 	int used;							      \ | 
 | 431 | 									      \ | 
 | 432 | 	if (set == GB2312_set || ((ann & SO_ann) != CNS11643_1_ann	      \ | 
 | 433 | 				  && (ann & SO_ann) != ISO_IR_165_ann))	      \ | 
 | 434 | 	  {								      \ | 
 | 435 | 	    written = ucs4_to_gb2312 (ch, buf, 2);			      \ | 
 | 436 | 	    used = GB2312_set;						      \ | 
 | 437 | 	  }								      \ | 
 | 438 | 	else if (set == ISO_IR_165_set || (ann & SO_ann) == ISO_IR_165_set)   \ | 
 | 439 | 	  {								      \ | 
 | 440 | 	    written = ucs4_to_isoir165 (ch, buf, 2);			      \ | 
 | 441 | 	    used = ISO_IR_165_set;					      \ | 
 | 442 | 	  }								      \ | 
 | 443 | 	else								      \ | 
 | 444 | 	  {								      \ | 
 | 445 | 	    written = ucs4_to_cns11643l1 (ch, buf, 2);			      \ | 
 | 446 | 	    used = CNS11643_1_set;					      \ | 
 | 447 | 	  }								      \ | 
 | 448 | 									      \ | 
 | 449 | 	if (written == __UNKNOWN_10646_CHAR)				      \ | 
 | 450 | 	  {								      \ | 
 | 451 | 	    /* Cannot convert it using the currently selected SO set.	      \ | 
 | 452 | 	       Next try the SS2 set.  */				      \ | 
 | 453 | 	    written = ucs4_to_cns11643l2 (ch, buf, 2);			      \ | 
 | 454 | 	    if (written != __UNKNOWN_10646_CHAR)			      \ | 
 | 455 | 	      /* Yep, that worked.  */					      \ | 
 | 456 | 	      used = CNS11643_2_set;					      \ | 
 | 457 | 	    else							      \ | 
 | 458 | 	      {								      \ | 
 | 459 | 		unsigned char tmpbuf[3];				      \ | 
 | 460 | 									      \ | 
 | 461 | 		switch (0)						      \ | 
 | 462 | 		  {							      \ | 
 | 463 | 		  default:						      \ | 
 | 464 | 		    /* Well, see whether we have to change the SO set.  */    \ | 
 | 465 | 									      \ | 
 | 466 | 		    if (used != GB2312_set)				      \ | 
 | 467 | 		      {							      \ | 
 | 468 | 			written = ucs4_to_gb2312 (ch, buf, 2);		      \ | 
 | 469 | 			if (written != __UNKNOWN_10646_CHAR)		      \ | 
 | 470 | 			  {						      \ | 
 | 471 | 			    used = GB2312_set;				      \ | 
 | 472 | 			    break;					      \ | 
 | 473 | 			  }						      \ | 
 | 474 | 		      }							      \ | 
 | 475 | 									      \ | 
 | 476 | 		    if (used != ISO_IR_165_set)				      \ | 
 | 477 | 		      {							      \ | 
 | 478 | 			written = ucs4_to_isoir165 (ch, buf, 2);	      \ | 
 | 479 | 			if (written != __UNKNOWN_10646_CHAR)		      \ | 
 | 480 | 			  {						      \ | 
 | 481 | 			    used = ISO_IR_165_set;			      \ | 
 | 482 | 			    break;					      \ | 
 | 483 | 			  }						      \ | 
 | 484 | 		      }							      \ | 
 | 485 | 									      \ | 
 | 486 | 		    if (used != CNS11643_1_set)				      \ | 
 | 487 | 		      {							      \ | 
 | 488 | 			written = ucs4_to_cns11643l1 (ch, buf, 2);	      \ | 
 | 489 | 			if (written != __UNKNOWN_10646_CHAR)		      \ | 
 | 490 | 			  {						      \ | 
 | 491 | 			    used = CNS11643_1_set;			      \ | 
 | 492 | 			    break;					      \ | 
 | 493 | 			  }						      \ | 
 | 494 | 		      }							      \ | 
 | 495 | 									      \ | 
 | 496 | 		    written = ucs4_to_cns11643 (ch, tmpbuf, 3);		      \ | 
 | 497 | 		    if (written == 3 && tmpbuf[0] >= 3 && tmpbuf[0] <= 7)     \ | 
 | 498 | 		      {							      \ | 
 | 499 | 			buf[0] = tmpbuf[1];				      \ | 
 | 500 | 			buf[1] = tmpbuf[2];				      \ | 
 | 501 | 			switch (tmpbuf[0])				      \ | 
 | 502 | 			  {						      \ | 
 | 503 | 			  case 3:					      \ | 
 | 504 | 			    used = CNS11643_3_set;			      \ | 
 | 505 | 			    break;					      \ | 
 | 506 | 			  case 4:					      \ | 
 | 507 | 			    used = CNS11643_4_set;			      \ | 
 | 508 | 			    break;					      \ | 
 | 509 | 			  case 5:					      \ | 
 | 510 | 			    used = CNS11643_5_set;			      \ | 
 | 511 | 			    break;					      \ | 
 | 512 | 			  case 6:					      \ | 
 | 513 | 			    used = CNS11643_6_set;			      \ | 
 | 514 | 			    break;					      \ | 
 | 515 | 			  case 7:					      \ | 
 | 516 | 			    used = CNS11643_7_set;			      \ | 
 | 517 | 			    break;					      \ | 
 | 518 | 			  default:					      \ | 
 | 519 | 			    abort ();					      \ | 
 | 520 | 			  }						      \ | 
 | 521 | 			written = 2;					      \ | 
 | 522 | 			break;						      \ | 
 | 523 | 		      }							      \ | 
 | 524 | 									      \ | 
 | 525 | 		    /* XXX Currently GB7590 and GB13132 are not supported.  */\ | 
 | 526 | 									      \ | 
 | 527 | 		    /* Even this does not work.  Error.  */		      \ | 
 | 528 | 		    used = ASCII_set;					      \ | 
 | 529 | 		  }							      \ | 
 | 530 | 		if (used == ASCII_set)					      \ | 
 | 531 | 		  {							      \ | 
 | 532 | 		    UNICODE_TAG_HANDLER (ch, 4);			      \ | 
 | 533 | 		    STANDARD_TO_LOOP_ERR_HANDLER (4);			      \ | 
 | 534 | 		  }							      \ | 
 | 535 | 	      }								      \ | 
 | 536 | 	  }								      \ | 
 | 537 | 	assert (written == 2);						      \ | 
 | 538 | 									      \ | 
 | 539 | 	/* See whether we have to emit an escape sequence.  */		      \ | 
 | 540 | 	if (set != used)						      \ | 
 | 541 | 	  {								      \ | 
 | 542 | 	    /* First see whether we announced that we use this		      \ | 
 | 543 | 	       character set.  */					      \ | 
 | 544 | 	    if ((used & SO_mask) != 0 && (ann & SO_ann) != (used << 8))	      \ | 
 | 545 | 	      {								      \ | 
 | 546 | 		const char *escseq;					      \ | 
 | 547 | 									      \ | 
 | 548 | 		if (outptr + 4 > outend)				      \ | 
 | 549 | 		  {							      \ | 
 | 550 | 		    result = __GCONV_FULL_OUTPUT;			      \ | 
 | 551 | 		    break;						      \ | 
 | 552 | 		  }							      \ | 
 | 553 | 									      \ | 
 | 554 | 		assert (used >= 1 && used <= 4);			      \ | 
 | 555 | 		escseq = ")A\0\0)G)E" + (used - 1) * 2;			      \ | 
 | 556 | 		*outptr++ = ESC;					      \ | 
 | 557 | 		*outptr++ = '$';					      \ | 
 | 558 | 		*outptr++ = *escseq++;					      \ | 
 | 559 | 		*outptr++ = *escseq++;					      \ | 
 | 560 | 									      \ | 
 | 561 | 		ann = (ann & ~SO_ann) | (used << 8);			      \ | 
 | 562 | 	      }								      \ | 
 | 563 | 	    else if ((used & SS2_mask) != 0 && (ann & SS2_ann) != (used << 8))\ | 
 | 564 | 	      {								      \ | 
 | 565 | 		const char *escseq;					      \ | 
 | 566 | 									      \ | 
 | 567 | 		assert (used == CNS11643_2_set); /* XXX */		      \ | 
 | 568 | 		escseq = "*H";						      \ | 
 | 569 | 		*outptr++ = ESC;					      \ | 
 | 570 | 		*outptr++ = '$';					      \ | 
 | 571 | 		*outptr++ = *escseq++;					      \ | 
 | 572 | 		*outptr++ = *escseq++;					      \ | 
 | 573 | 									      \ | 
 | 574 | 		ann = (ann & ~SS2_ann) | (used << 8);			      \ | 
 | 575 | 	      }								      \ | 
 | 576 | 	    else if ((used & SS3_mask) != 0 && (ann & SS3_ann) != (used << 8))\ | 
 | 577 | 	      {								      \ | 
 | 578 | 		const char *escseq;					      \ | 
 | 579 | 									      \ | 
 | 580 | 		assert ((used >> 5) >= 3 && (used >> 5) <= 7);		      \ | 
 | 581 | 		escseq = "+I+J+K+L+M" + ((used >> 5) - 3) * 2;		      \ | 
 | 582 | 		*outptr++ = ESC;					      \ | 
 | 583 | 		*outptr++ = '$';					      \ | 
 | 584 | 		*outptr++ = *escseq++;					      \ | 
 | 585 | 		*outptr++ = *escseq++;					      \ | 
 | 586 | 									      \ | 
 | 587 | 		ann = (ann & ~SS3_ann) | (used << 8);			      \ | 
 | 588 | 	      }								      \ | 
 | 589 | 									      \ | 
 | 590 | 	    if (used == CNS11643_2_set)					      \ | 
 | 591 | 	      {								      \ | 
 | 592 | 		if (outptr + 2 > outend)				      \ | 
 | 593 | 		  {							      \ | 
 | 594 | 		    result = __GCONV_FULL_OUTPUT;			      \ | 
 | 595 | 		    break;						      \ | 
 | 596 | 		  }							      \ | 
 | 597 | 		*outptr++ = SS2_0;					      \ | 
 | 598 | 		*outptr++ = SS2_1;					      \ | 
 | 599 | 	      }								      \ | 
 | 600 | 	    else if (used >= CNS11643_3_set && used <= CNS11643_7_set)	      \ | 
 | 601 | 	      {								      \ | 
 | 602 | 		if (outptr + 2 > outend)				      \ | 
 | 603 | 		  {							      \ | 
 | 604 | 		    result = __GCONV_FULL_OUTPUT;			      \ | 
 | 605 | 		    break;						      \ | 
 | 606 | 		  }							      \ | 
 | 607 | 		*outptr++ = SS3_0;					      \ | 
 | 608 | 		*outptr++ = SS3_1;					      \ | 
 | 609 | 	      }								      \ | 
 | 610 | 	    else							      \ | 
 | 611 | 	      {								      \ | 
 | 612 | 		/* We only have to emit something if currently ASCII is	      \ | 
 | 613 | 		   selected.  Otherwise we are switching within the	      \ | 
 | 614 | 		   SO charset.  */					      \ | 
 | 615 | 		if (set == ASCII_set)					      \ | 
 | 616 | 		  {							      \ | 
 | 617 | 		    if (outptr + 1 > outend)				      \ | 
 | 618 | 		      {							      \ | 
 | 619 | 			result = __GCONV_FULL_OUTPUT;			      \ | 
 | 620 | 			break;						      \ | 
 | 621 | 		      }							      \ | 
 | 622 | 		    *outptr++ = SO;					      \ | 
 | 623 | 		  }							      \ | 
 | 624 | 	      }								      \ | 
 | 625 | 									      \ | 
 | 626 | 	    /* Always test the length here since we have used up all the      \ | 
 | 627 | 	       guaranteed output buffer slots.  */			      \ | 
 | 628 | 	    if (outptr + 2 > outend)					      \ | 
 | 629 | 	      {								      \ | 
 | 630 | 		result = __GCONV_FULL_OUTPUT;				      \ | 
 | 631 | 		break;							      \ | 
 | 632 | 	      }								      \ | 
 | 633 | 	  }								      \ | 
 | 634 | 	else if (outptr + 2 > outend)					      \ | 
 | 635 | 	  {								      \ | 
 | 636 | 	    result = __GCONV_FULL_OUTPUT;				      \ | 
 | 637 | 	    break;							      \ | 
 | 638 | 	  }								      \ | 
 | 639 | 									      \ | 
 | 640 | 	*outptr++ = buf[0];						      \ | 
 | 641 | 	*outptr++ = buf[1];						      \ | 
 | 642 | 	set = used;							      \ | 
 | 643 |       }									      \ | 
 | 644 | 									      \ | 
 | 645 |     /* Now that we wrote the output increment the input pointer.  */	      \ | 
 | 646 |     inptr += 4;								      \ | 
 | 647 |   } | 
 | 648 | #define EXTRA_LOOP_DECLS	, int *setp | 
 | 649 | #define INIT_PARAMS		int set = (*setp >> 3) & CURRENT_MASK; \ | 
 | 650 | 				int ann = (*setp >> 3) & ~CURRENT_MASK | 
 | 651 | #define REINIT_PARAMS		do					      \ | 
 | 652 | 				  {					      \ | 
 | 653 | 				    set = (*setp >> 3) & CURRENT_MASK;	      \ | 
 | 654 | 				    ann = (*setp >> 3) & ~CURRENT_MASK;	      \ | 
 | 655 | 				  }					      \ | 
 | 656 | 				while (0) | 
 | 657 | #define UPDATE_PARAMS		*setp = (set | ann) << 3 | 
 | 658 | #define LOOP_NEED_FLAGS | 
 | 659 | #include <iconv/loop.c> | 
 | 660 |  | 
 | 661 |  | 
 | 662 | /* Now define the toplevel functions.  */ | 
 | 663 | #include <iconv/skeleton.c> |