| xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame] | 1 | /* Mapping tables from GBK to GB2312 and vice versa. | 
 | 2 |    Copyright (C) 1999-2016 Free Software Foundation, Inc. | 
 | 3 |    This file is part of the GNU C Library. | 
 | 4 |    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999. | 
 | 5 |  | 
 | 6 |    The GNU C Library is free software; you can redistribute it and/or | 
 | 7 |    modify it under the terms of the GNU Lesser General Public | 
 | 8 |    License as published by the Free Software Foundation; either | 
 | 9 |    version 2.1 of the License, or (at your option) any later version. | 
 | 10 |  | 
 | 11 |    The GNU C Library is distributed in the hope that it will be useful, | 
 | 12 |    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 | 13 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
 | 14 |    Lesser General Public License for more details. | 
 | 15 |  | 
 | 16 |    You should have received a copy of the GNU Lesser General Public | 
 | 17 |    License along with the GNU C Library; if not, see | 
 | 18 |    <http://www.gnu.org/licenses/>.  */ | 
 | 19 |  | 
 | 20 | #include <dlfcn.h> | 
 | 21 | #include <gconv.h> | 
 | 22 | #include <stdint.h> | 
 | 23 |  | 
 | 24 |  | 
 | 25 | /* Definitions used in the body of the `gconv' function.  */ | 
 | 26 | #define CHARSET_NAME		"GBK//" | 
 | 27 | #define FROM_LOOP		from_gbk_to_gb | 
 | 28 | #define TO_LOOP			from_gb_to_gbk | 
 | 29 | #define DEFINE_INIT		1 | 
 | 30 | #define DEFINE_FINI		1 | 
 | 31 | #define MIN_NEEDED_FROM		1 | 
 | 32 | #define MAX_NEEDED_FROM		2 | 
 | 33 | #define MIN_NEEDED_TO		1 | 
 | 34 | #define MAX_NEEDED_TO		2 | 
 | 35 | #define ONE_DIRECTION		0 | 
 | 36 |  | 
 | 37 |  | 
 | 38 | /* First define the conversion function from GBK to GB2312.  */ | 
 | 39 | #define MIN_NEEDED_INPUT	MIN_NEEDED_FROM | 
 | 40 | #define MAX_NEEDED_INPUT	MAX_NEEDED_FROM | 
 | 41 | #define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO | 
 | 42 | #define MAX_NEEDED_OUTPUT	MAX_NEEDED_TO | 
 | 43 | #define LOOPFCT			FROM_LOOP | 
 | 44 | #define BODY \ | 
 | 45 |   {									      \ | 
 | 46 |     uint32_t ch = *inptr;						      \ | 
 | 47 | 									      \ | 
 | 48 |     if (ch <= 0x7f)							      \ | 
 | 49 |       *outptr++ = *inptr++;						      \ | 
 | 50 |     else								      \ | 
 | 51 |       {									      \ | 
 | 52 | 	/* It's a two-byte sequence.  We have to mask out all the sequences   \ | 
 | 53 | 	   which are not in GB2312.  Besides all of them in the range	      \ | 
 | 54 | 	   0x8140 to 0xA0FE this also includes in the remaining range the     \ | 
 | 55 | 	   sequences which the second byte being in the range from 0x40 to    \ | 
 | 56 | 	   0xA0 and the following exceptions:				      \ | 
 | 57 | 									      \ | 
 | 58 | 	     0xA2A1 to 0xA2A9,						      \ | 
 | 59 | 	     0xA2AA,							      \ | 
 | 60 | 	     0xA6E0 to 0xA6EB,						      \ | 
 | 61 | 	     0xA6EE to 0xA6F2,						      \ | 
 | 62 | 	     0xA6F4, 0xA6F5,						      \ | 
 | 63 | 	     0xA8BB to 0xA8C0						      \ | 
 | 64 | 									      \ | 
 | 65 | 	   All these characters are not defined in GB2312.  Besides this      \ | 
 | 66 | 	   there is an incomatibility in the mapping.  The Unicode tables     \ | 
 | 67 | 	   say that 0xA1A4 maps in GB2312 to U30FB while in GBK it maps to    \ | 
 | 68 | 	   U00B7.  Similarly, 0xA1AA maps in GB2312 to U2015 while in GBK     \ | 
 | 69 | 	   it maps to U2014.  Since we are free to do whatever we want if     \ | 
 | 70 | 	   a mapping is not available we will not flag this as an error	      \ | 
 | 71 | 	   but instead map the two positions.  But this means that the	      \ | 
 | 72 | 	   mapping							      \ | 
 | 73 | 									      \ | 
 | 74 | 		UCS4 -> GB2312 -> GBK -> UCS4				      \ | 
 | 75 | 									      \ | 
 | 76 | 	   might not produce identical text.  */			      \ | 
 | 77 | 	if (__glibc_unlikely (inptr + 1 >= inend))			      \ | 
 | 78 | 	  {								      \ | 
 | 79 | 	    /* The second character is not available.  Store		      \ | 
 | 80 | 	       the intermediate result.  */				      \ | 
 | 81 | 	    result = __GCONV_INCOMPLETE_INPUT;				      \ | 
 | 82 | 	    break;							      \ | 
 | 83 | 	  }								      \ | 
 | 84 | 									      \ | 
 | 85 | 	if (__glibc_unlikely (outend - outptr < 2))			      \ | 
 | 86 | 	  {								      \ | 
 | 87 | 	    /* We ran out of space.  */					      \ | 
 | 88 | 	    result = __GCONV_FULL_OUTPUT;				      \ | 
 | 89 | 	    break;							      \ | 
 | 90 | 	  }								      \ | 
 | 91 | 									      \ | 
 | 92 | 	ch = (ch << 8) | inptr[1];					      \ | 
 | 93 | 									      \ | 
 | 94 | 	/* Map 0xA844 (U2015 in GBK) to 0xA1AA (U2015 in GB2312).  */	      \ | 
 | 95 | 	if (__glibc_unlikely (ch == 0xa844))				      \ | 
 | 96 | 	  ch = 0xa1aa;							      \ | 
 | 97 | 									      \ | 
 | 98 | 	/* Now determine whether the character is valid.  */		      \ | 
 | 99 | 	if (__builtin_expect (ch < 0xa1a1, 0)				      \ | 
 | 100 | 	    || __builtin_expect (ch > 0xf7fe, 0)			      \ | 
 | 101 | 	    || __builtin_expect (inptr[1] < 0xa1, 0)			      \ | 
 | 102 | 	    /* Now test the exceptions.  */				      \ | 
 | 103 | 	    || (__builtin_expect (ch >= 0xa2a1, 0)			      \ | 
 | 104 | 		&& __builtin_expect (ch <= 0xa2aa, 0))			      \ | 
 | 105 | 	    || (__builtin_expect (ch >= 0xa6e0, 0)			      \ | 
 | 106 | 		&& __builtin_expect (ch <= 0xa6f5, 0))			      \ | 
 | 107 | 	    || (__builtin_expect (ch >= 0xa8bb, 0)			      \ | 
 | 108 | 		&& __builtin_expect (ch <= 0xa8c0, 0)))			      \ | 
 | 109 | 	  {								      \ | 
 | 110 | 	    /* One of the characters we cannot map.  */			      \ | 
 | 111 | 	    STANDARD_TO_LOOP_ERR_HANDLER (2);				      \ | 
 | 112 | 	  }								      \ | 
 | 113 | 									      \ | 
 | 114 | 	/* Copy the two bytes.  */					      \ | 
 | 115 | 	*outptr++ = *inptr++;						      \ | 
 | 116 | 	*outptr++ = *inptr++;						      \ | 
 | 117 |       }									      \ | 
 | 118 |   } | 
 | 119 | #define LOOP_NEED_FLAGS | 
 | 120 | #include <iconv/loop.c> | 
 | 121 |  | 
 | 122 |  | 
 | 123 | /* Next, define the other direction.  */ | 
 | 124 | #define MIN_NEEDED_INPUT	MIN_NEEDED_TO | 
 | 125 | #define MAX_NEEDED_INPUT	MAX_NEEDED_TO | 
 | 126 | #define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM | 
 | 127 | #define MAX_NEEDED_OUTPUT	MAX_NEEDED_FROM | 
 | 128 | #define LOOPFCT			TO_LOOP | 
 | 129 | #define BODY \ | 
 | 130 |   {									      \ | 
 | 131 |     /* We don't have to care about characters we cannot map.  The only	      \ | 
 | 132 |        problem are the mapping of 0xA1A4 and 0xA1AA but as explained above    \ | 
 | 133 |        we do not do anything special here.  */				      \ | 
 | 134 |     unsigned char ch = *inptr++;					      \ | 
 | 135 | 									      \ | 
 | 136 |     if (ch > 0x7f)							      \ | 
 | 137 |       {									      \ | 
 | 138 | 	if (__glibc_unlikely (inptr + 1 >= inend))			      \ | 
 | 139 | 	  {								      \ | 
 | 140 | 	    /* The second character is not available.  Store		      \ | 
 | 141 | 		 the intermediate result.  */				      \ | 
 | 142 | 	    result = __GCONV_INCOMPLETE_INPUT;				      \ | 
 | 143 | 	    break;							      \ | 
 | 144 | 	  }								      \ | 
 | 145 | 									      \ | 
 | 146 | 	if (__glibc_unlikely (outend - outptr < 2))			      \ | 
 | 147 | 	  {								      \ | 
 | 148 | 	    /* We ran out of space.  */					      \ | 
 | 149 | 	    result = __GCONV_FULL_OUTPUT;				      \ | 
 | 150 | 	    break;							      \ | 
 | 151 | 	  }								      \ | 
 | 152 | 									      \ | 
 | 153 | 	*outptr++ = ch;							      \ | 
 | 154 | 	ch = *inptr++;							      \ | 
 | 155 |       }									      \ | 
 | 156 |     *outptr++ = ch;							      \ | 
 | 157 |   } | 
 | 158 | #include <iconv/loop.c> | 
 | 159 |  | 
 | 160 |  | 
 | 161 | /* Now define the toplevel functions.  */ | 
 | 162 | #include <iconv/skeleton.c> |