Blame - ap/libc/glibc/glibc-2.22/iconvdata/iso-2022-jp.c - T106_DC

blob: d4cbabc5dfefd6c2ba39c38001df2aed3cba121b [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	1	/* Conversion module for ISO-2022-JP and ISO-2022-JP-2.
				2	Copyright (C) 1998-2015 Free Software Foundation, Inc.
				3	This file is part of the GNU C Library.
				4	Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
				5
				6	The GNU C Library is free software; you can redistribute it and/or
				7	modify it under the terms of the GNU Lesser General Public
				8	License as published by the Free Software Foundation; either
				9	version 2.1 of the License, or (at your option) any later version.
				10
				11	The GNU C Library is distributed in the hope that it will be useful,
				12	but WITHOUT ANY WARRANTY; without even the implied warranty of
				13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				14	Lesser General Public License for more details.
				15
				16	You should have received a copy of the GNU Lesser General Public
				17	License along with the GNU C Library; if not, see
				18	<http://www.gnu.org/licenses/>. */
				19
				20	#include <assert.h>
				21	#include <dlfcn.h>
				22	#include <gconv.h>
				23	#include <stdint.h>
				24	#include <stdlib.h>
				25	#include <string.h>
				26	#include "jis0201.h"
				27	#include "jis0208.h"
				28	#include "jis0212.h"
				29	#include "gb2312.h"
				30	#include "ksc5601.h"
				31
				32	struct gap
				33	{
				34	uint16_t start;
				35	uint16_t end;
				36	int32_t idx;
				37	};
				38
				39	#include "iso8859-7jp.h"
				40
				41	/* This makes obvious what everybody knows: 0x1b is the Esc character. */
				42	#define ESC 0x1b
				43
				44	/* We provide our own initialization and destructor function. */
				45	#define DEFINE_INIT 0
				46	#define DEFINE_FINI 0
				47
				48	/* Definitions used in the body of the `gconv' function. */
				49	#define FROM_LOOP from_iso2022jp_loop
				50	#define TO_LOOP to_iso2022jp_loop
				51	#define ONE_DIRECTION 0
				52	#define FROM_LOOP_MIN_NEEDED_FROM 1
				53	#define FROM_LOOP_MAX_NEEDED_FROM 4
				54	#define FROM_LOOP_MIN_NEEDED_TO 4
				55	#define FROM_LOOP_MAX_NEEDED_TO 4
				56	#define TO_LOOP_MIN_NEEDED_FROM 4
				57	#define TO_LOOP_MAX_NEEDED_FROM 4
				58	#define TO_LOOP_MIN_NEEDED_TO 1
				59	#define TO_LOOP_MAX_NEEDED_TO 6
				60	#define FROM_DIRECTION (dir == from_iso2022jp)
				61	#define PREPARE_LOOP \
				62	enum direction dir = ((struct iso2022jp_data *) step->__data)->dir; \
				63	enum variant var = ((struct iso2022jp_data *) step->__data)->var; \
				64	int save_set; \
				65	int *setp = &data->__statep->__count;
				66	#define EXTRA_LOOP_ARGS , var, setp
				67
				68
				69	/* Direction of the transformation. */
				70	enum direction
				71	{
				72	illegal_dir,
				73	to_iso2022jp,
				74	from_iso2022jp
				75	};
				76
				77	/* We handle ISO-2022-jp and ISO-2022-JP-2 here. */
				78	enum variant
				79	{
				80	illegal_var,
				81	iso2022jp,
				82	iso2022jp2
				83	};
				84
				85
				86	struct iso2022jp_data
				87	{
				88	enum direction dir;
				89	enum variant var;
				90	};
				91
				92
				93	/* The COUNT element of the state keeps track of the currently selected
				94	character set. The possible values are: */
				95	enum
				96	{
				97	ASCII_set = 0,
				98	JISX0208_1978_set = 1 << 3,
				99	JISX0208_1983_set = 2 << 3,
				100	JISX0201_Roman_set = 3 << 3,
				101	JISX0201_Kana_set = 4 << 3,
				102	GB2312_set = 5 << 3,
				103	KSC5601_set = 6 << 3,
				104	JISX0212_set = 7 << 3,
				105	CURRENT_SEL_MASK = 7 << 3
				106	};
				107
				108	/* The second value stored is the designation of the G2 set. The following
				109	values are possible: */
				110	enum
				111	{
				112	UNSPECIFIED_set = 0,
				113	ISO88591_set = 1 << 6,
				114	ISO88597_set = 2 << 6,
				115	CURRENT_ASSIGN_MASK = 3 << 6
				116	};
				117
				118	/* The third value, only used during conversion from Unicode to ISO-2022-JP-2,
				119	describes the language tag parsing status. The possible values are as
				120	follows. Values >= TAG_language are temporary tag parsing states. */
				121	enum
				122	{
				123	TAG_none = 0,
				124	TAG_language = 4 << 8,
				125	TAG_language_j = 5 << 8,
				126	TAG_language_ja = 1 << 8,
				127	TAG_language_k = 6 << 8,
				128	TAG_language_ko = 2 << 8,
				129	TAG_language_z = 7 << 8,
				130	TAG_language_zh = 3 << 8,
				131	CURRENT_TAG_MASK = 7 << 8
				132	};
				133
				134
				135	extern int gconv_init (struct __gconv_step *step);
				136	int
				137	gconv_init (struct __gconv_step *step)
				138	{
				139	/* Determine which direction. */
				140	struct iso2022jp_data *new_data;
				141	enum direction dir = illegal_dir;
				142	enum variant var = illegal_var;
				143	int result;
				144
				145	if (__strcasecmp (step->__from_name, "ISO-2022-JP//") == 0)
				146	{
				147	dir = from_iso2022jp;
				148	var = iso2022jp;
				149	}
				150	else if (__strcasecmp (step->__to_name, "ISO-2022-JP//") == 0)
				151	{
				152	dir = to_iso2022jp;
				153	var = iso2022jp;
				154	}
				155	else if (__strcasecmp (step->__from_name, "ISO-2022-JP-2//") == 0)
				156	{
				157	dir = from_iso2022jp;
				158	var = iso2022jp2;
				159	}
				160	else if (__strcasecmp (step->__to_name, "ISO-2022-JP-2//") == 0)
				161	{
				162	dir = to_iso2022jp;
				163	var = iso2022jp2;
				164	}
				165
				166	result = __GCONV_NOCONV;
				167	if (__builtin_expect (dir, from_iso2022jp) != illegal_dir)
				168	{
				169	new_data
				170	= (struct iso2022jp_data *) malloc (sizeof (struct iso2022jp_data));
				171
				172	result = __GCONV_NOMEM;
				173	if (new_data != NULL)
				174	{
				175	new_data->dir = dir;
				176	new_data->var = var;
				177	step->__data = new_data;
				178
				179	if (dir == from_iso2022jp)
				180	{
				181	step->__min_needed_from = FROM_LOOP_MIN_NEEDED_FROM;
				182	step->__max_needed_from = FROM_LOOP_MAX_NEEDED_FROM;
				183	step->__min_needed_to = FROM_LOOP_MIN_NEEDED_TO;
				184	step->__max_needed_to = FROM_LOOP_MAX_NEEDED_TO;
				185	}
				186	else
				187	{
				188	step->__min_needed_from = TO_LOOP_MIN_NEEDED_FROM;
				189	step->__max_needed_from = TO_LOOP_MAX_NEEDED_FROM;
				190	step->__min_needed_to = TO_LOOP_MIN_NEEDED_TO;
				191	step->__max_needed_to = TO_LOOP_MAX_NEEDED_TO;
				192	}
				193
				194	/* Yes, this is a stateful encoding. */
				195	step->__stateful = 1;
				196
				197	result = __GCONV_OK;
				198	}
				199	}
				200
				201	return result;
				202	}
				203
				204
				205	extern void gconv_end (struct __gconv_step *data);
				206	void
				207	gconv_end (struct __gconv_step *data)
				208	{
				209	free (data->__data);
				210	}
				211
				212
				213	/* Since this is a stateful encoding we have to provide code which resets
				214	the output state to the initial state. This has to be done during the
				215	flushing. */
				216	#define EMIT_SHIFT_TO_INIT \
				217	/* Avoid warning about unused variable 'var'. */ \
				218	(void) var; \
				219	\
				220	if ((data->__statep->__count & ~7) != ASCII_set) \
				221	{ \
				222	if (dir == from_iso2022jp \
				223	\|\| (data->__statep->__count & CURRENT_SEL_MASK) == ASCII_set) \
				224	{ \
				225	/* It's easy, we don't have to emit anything, we just reset the \
				226	state for the input. Note that this also clears the G2 \
				227	designation. */ \
				228	data->__statep->__count &= 7; \
				229	data->__statep->__count \|= ASCII_set; \
				230	} \
				231	else \
				232	{ \
				233	/* We are not in the initial state. To switch back we have \
				234	to emit the sequence `Esc ( B'. */ \
				235	if (__glibc_unlikely (outbuf + 3 > outend)) \
				236	/* We don't have enough room in the output buffer. */ \
				237	status = __GCONV_FULL_OUTPUT; \
				238	else \
				239	{ \
				240	/* Write out the shift sequence. */ \
				241	*outbuf++ = ESC; \
				242	*outbuf++ = '('; \
				243	*outbuf++ = 'B'; \
				244	/* Note that this also clears the G2 designation. */ \
				245	data->__statep->__count &= 7; \
				246	data->__statep->__count \|= ASCII_set; \
				247	} \
				248	} \
				249	}
				250
				251
				252	/* Since we might have to reset input pointer we must be able to save
				253	and retore the state. */
				254	#define SAVE_RESET_STATE(Save) \
				255	if (Save) \
				256	save_set = *setp; \
				257	else \
				258	*setp = save_set
				259
				260
				261	/* First define the conversion function from ISO-2022-JP to UCS4. */
				262	#define MIN_NEEDED_INPUT FROM_LOOP_MIN_NEEDED_FROM
				263	#define MAX_NEEDED_INPUT FROM_LOOP_MAX_NEEDED_FROM
				264	#define MIN_NEEDED_OUTPUT FROM_LOOP_MIN_NEEDED_TO
				265	#define MAX_NEEDED_OUTPUT FROM_LOOP_MAX_NEEDED_TO
				266	#define LOOPFCT FROM_LOOP
				267	#define BODY \
				268	{ \
				269	uint32_t ch = *inptr; \
				270	\
				271	/* Recognize escape sequences. */ \
				272	if (__builtin_expect (ch, 0) == ESC) \
				273	{ \
				274	/* We now must be prepared to read two to three more \
				275	characters. If we have a match in the first character but \
				276	then the input buffer ends we terminate with an error since \
				277	we must not risk missing an escape sequence just because it \
				278	is not entirely in the current input buffer. */ \
				279	if (__builtin_expect (inptr + 2 >= inend, 0) \
				280	\|\| (var == iso2022jp2 && inptr[1] == '$' && inptr[2] == '(' \
				281	&& __builtin_expect (inptr + 3 >= inend, 0))) \
				282	{ \
				283	/* Not enough input available. */ \
				284	result = __GCONV_INCOMPLETE_INPUT; \
				285	break; \
				286	} \
				287	\
				288	if (inptr[1] == '(') \
				289	{ \
				290	if (inptr[2] == 'B') \
				291	{ \
				292	/* ASCII selected. */ \
				293	set = ASCII_set; \
				294	inptr += 3; \
				295	continue; \
				296	} \
				297	else if (inptr[2] == 'J') \
				298	{ \
				299	/* JIS X 0201 selected. */ \
				300	set = JISX0201_Roman_set; \
				301	inptr += 3; \
				302	continue; \
				303	} \
				304	else if (var == iso2022jp2 && inptr[2] == 'I') \
				305	{ \
				306	/* JIS X 0201 selected. */ \
				307	set = JISX0201_Kana_set; \
				308	inptr += 3; \
				309	continue; \
				310	} \
				311	} \
				312	else if (inptr[1] == '$') \
				313	{ \
				314	if (inptr[2] == '@') \
				315	{ \
				316	/* JIS X 0208-1978 selected. */ \
				317	set = JISX0208_1978_set; \
				318	inptr += 3; \
				319	continue; \
				320	} \
				321	else if (inptr[2] == 'B') \
				322	{ \
				323	/* JIS X 0208-1983 selected. */ \
				324	set = JISX0208_1983_set; \
				325	inptr += 3; \
				326	continue; \
				327	} \
				328	else if (var == iso2022jp2) \
				329	{ \
				330	if (inptr[2] == 'A') \
				331	{ \
				332	/* GB 2312-1980 selected. */ \
				333	set = GB2312_set; \
				334	inptr += 3; \
				335	continue; \
				336	} \
				337	else if (inptr[2] == '(') \
				338	{ \
				339	if (inptr[3] == 'C') \
				340	{ \
				341	/* KSC 5601-1987 selected. */ \
				342	set = KSC5601_set; \
				343	inptr += 4; \
				344	continue; \
				345	} \
				346	else if (inptr[3] == 'D') \
				347	{ \
				348	/* JIS X 0212-1990 selected. */ \
				349	set = JISX0212_set; \
				350	inptr += 4; \
				351	continue; \
				352	} \
				353	} \
				354	} \
				355	} \
				356	else if (var == iso2022jp2 && inptr[1] == '.') \
				357	{ \
				358	if (inptr[2] == 'A') \
				359	{ \
				360	/* ISO 8859-1-GR selected. */ \
				361	set2 = ISO88591_set; \
				362	inptr += 3; \
				363	continue; \
				364	} \
				365	else if (inptr[2] == 'F') \
				366	{ \
				367	/* ISO 8859-7-GR selected. */ \
				368	set2 = ISO88597_set; \
				369	inptr += 3; \
				370	continue; \
				371	} \
				372	} \
				373	} \
				374	\
				375	if (ch == ESC && var == iso2022jp2 && inptr[1] == 'N') \
				376	{ \
				377	if (set2 == ISO88591_set) \
				378	{ \
				379	ch = inptr[2] \| 0x80; \
				380	inptr += 3; \
				381	} \
				382	else if (__builtin_expect (set2, ISO88597_set) == ISO88597_set) \
				383	{ \
				384	/* We use the table from the ISO 8859-7 module. */ \
				385	if (inptr[2] < 0x20 \|\| inptr[2] >= 0x80) \
				386	STANDARD_FROM_LOOP_ERR_HANDLER (1); \
				387	ch = iso88597_to_ucs4[inptr[2] - 0x20]; \
				388	if (ch == 0) \
				389	STANDARD_FROM_LOOP_ERR_HANDLER (3); \
				390	inptr += 3; \
				391	} \
				392	else \
				393	{ \
				394	STANDARD_FROM_LOOP_ERR_HANDLER (1); \
				395	} \
				396	} \
				397	else if (ch >= 0x80) \
				398	{ \
				399	STANDARD_FROM_LOOP_ERR_HANDLER (1); \
				400	} \
				401	else if (set == ASCII_set \|\| (ch < 0x21 \|\| ch == 0x7f)) \
				402	/* Almost done, just advance the input pointer. */ \
				403	++inptr; \
				404	else if (set == JISX0201_Roman_set) \
				405	{ \
				406	/* Use the JIS X 0201 table. */ \
				407	ch = jisx0201_to_ucs4 (ch); \
				408	if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR)) \
				409	STANDARD_FROM_LOOP_ERR_HANDLER (1); \
				410	++inptr; \
				411	} \
				412	else if (set == JISX0201_Kana_set) \
				413	{ \
				414	/* Use the JIS X 0201 table. */ \
				415	ch = jisx0201_to_ucs4 (ch + 0x80); \
				416	if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR)) \
				417	STANDARD_FROM_LOOP_ERR_HANDLER (1); \
				418	++inptr; \
				419	} \
				420	else \
				421	{ \
				422	if (set == JISX0208_1978_set \|\| set == JISX0208_1983_set) \
				423	/* XXX I don't have the tables for these two old variants of \
				424	JIS X 0208. Therefore I'm using the tables for JIS X \
				425	0208-1990. If somebody has problems with this please \
				426	provide the appropriate tables. */ \
				427	ch = jisx0208_to_ucs4 (&inptr, inend - inptr, 0); \
				428	else if (set == JISX0212_set) \
				429	/* Use the JIS X 0212 table. */ \
				430	ch = jisx0212_to_ucs4 (&inptr, inend - inptr, 0); \
				431	else if (set == GB2312_set) \
				432	/* Use the GB 2312 table. */ \
				433	ch = gb2312_to_ucs4 (&inptr, inend - inptr, 0); \
				434	else \
				435	{ \
				436	assert (set == KSC5601_set); \
				437	\
				438	/* Use the KSC 5601 table. */ \
				439	ch = ksc5601_to_ucs4 (&inptr, inend - inptr, 0); \
				440	} \
				441	\
				442	if (__glibc_unlikely (ch == 0)) \
				443	{ \
				444	result = __GCONV_INCOMPLETE_INPUT; \
				445	break; \
				446	} \
				447	else if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR)) \
				448	{ \
				449	STANDARD_FROM_LOOP_ERR_HANDLER (1); \
				450	} \
				451	} \
				452	\
				453	put32 (outptr, ch); \
				454	outptr += 4; \
				455	}
				456	#define LOOP_NEED_FLAGS
				457	#define EXTRA_LOOP_DECLS , enum variant var, int *setp
				458	#define INIT_PARAMS int set = *setp & CURRENT_SEL_MASK; \
				459	int set2 = *setp & CURRENT_ASSIGN_MASK
				460	#define UPDATE_PARAMS *setp = set \| set2
				461	#include <iconv/loop.c>
				462
				463
				464	/* Next, define the other direction. */
				465
				466	enum conversion { none = 0, european, japanese, chinese, korean, other };
				467
				468	/* A datatype for conversion lists. */
				469	typedef unsigned int cvlist_t;
				470	#define CVLIST(cv1, cv2, cv3, cv4, cv5) \
				471	((cv1) + ((cv2) << 3) + ((cv3) << 6) + ((cv4) << 9) + ((cv5) << 12))
				472	#define CVLIST_FIRST(cvl) ((cvl) & ((1 << 3) - 1))
				473	#define CVLIST_REST(cvl) ((cvl) >> 3)
				474	static const cvlist_t conversion_lists[4] =
				475	{
				476	/* TAG_none */ CVLIST (japanese, european, chinese, korean, other),
				477	/* TAG_language_ja */ CVLIST (japanese, european, chinese, korean, other),
				478	/* TAG_language_ko */ CVLIST (korean, european, japanese, chinese, other),
				479	/* TAG_language_zh */ CVLIST (chinese, european, japanese, korean, other)
				480	};
				481
				482	#define MIN_NEEDED_INPUT TO_LOOP_MIN_NEEDED_FROM
				483	#define MAX_NEEDED_INPUT TO_LOOP_MAX_NEEDED_FROM
				484	#define MIN_NEEDED_OUTPUT TO_LOOP_MIN_NEEDED_TO
				485	#define MAX_NEEDED_OUTPUT TO_LOOP_MAX_NEEDED_TO
				486	#define LOOPFCT TO_LOOP
				487	#define BODY \
				488	{ \
				489	uint32_t ch; \
				490	size_t written; \
				491	\
				492	ch = get32 (inptr); \
				493	\
				494	if (var == iso2022jp2) \
				495	{ \
				496	/* Handle Unicode tag characters (range U+E0000..U+E007F). */ \
				497	if (__glibc_unlikely ((ch >> 7) == (0xe0000 >> 7))) \
				498	{ \
				499	ch &= 0x7f; \
				500	if (ch >= 'A' && ch <= 'Z') \
				501	ch += 'a' - 'A'; \
				502	if (ch == 0x01) \
				503	tag = TAG_language; \
				504	else if (ch == 'j' && tag == TAG_language) \
				505	tag = TAG_language_j; \
				506	else if (ch == 'a' && tag == TAG_language_j) \
				507	tag = TAG_language_ja; \
				508	else if (ch == 'k' && tag == TAG_language) \
				509	tag = TAG_language_k; \
				510	else if (ch == 'o' && tag == TAG_language_k) \
				511	tag = TAG_language_ko; \
				512	else if (ch == 'z' && tag == TAG_language) \
				513	tag = TAG_language_z; \
				514	else if (ch == 'h' && tag == TAG_language_z) \
				515	tag = TAG_language_zh; \
				516	else if (ch == 0x7f) \
				517	tag = TAG_none; \
				518	else \
				519	{ \
				520	/* Other tag characters reset the tag parsing state (if the \
				521	current state is a temporary state) or are ignored (if \
				522	the current state is a stable one). */ \
				523	if (tag >= TAG_language) \
				524	tag = TAG_none; \
				525	} \
				526	\
				527	inptr += 4; \
				528	continue; \
				529	} \
				530	\
				531	/* Non-tag characters reset the tag parsing state, if the current \
				532	state is a temporary state. */ \
				533	if (__glibc_unlikely (tag >= TAG_language)) \
				534	tag = TAG_none; \
				535	} \
				536	\
				537	/* First see whether we can write the character using the currently \
				538	selected character set. But ignore the selected character set if \
				539	the current language tag shows different preferences. */ \
				540	if (set == ASCII_set) \
				541	{ \
				542	/* Please note that the NUL byte is not matched if we are not \
				543	currently using the ASCII charset. This is because we must \
				544	switch to the initial state whenever a NUL byte is written. */ \
				545	if (ch <= 0x7f) \
				546	{ \
				547	*outptr++ = ch; \
				548	written = 1; \
				549	\
				550	/* At the beginning of a line, G2 designation is cleared. */ \
				551	if (var == iso2022jp2 && ch == 0x0a) \
				552	set2 = UNSPECIFIED_set; \
				553	} \
				554	else \
				555	written = __UNKNOWN_10646_CHAR; \
				556	} \
				557	/* ISO-2022-JP recommends to encode the newline character always in \
				558	ASCII since this allows a context-free interpretation of the \
				559	characters at the beginning of the next line. Otherwise it would \
				560	have to be known whether the last line ended using ASCII or \
				561	JIS X 0201. */ \
				562	else if (set == JISX0201_Roman_set \
				563	&& (__builtin_expect (tag == TAG_none, 1) \
				564	\|\| tag == TAG_language_ja)) \
				565	{ \
				566	unsigned char buf[1]; \
				567	written = ucs4_to_jisx0201 (ch, buf); \
				568	if (written != __UNKNOWN_10646_CHAR) \
				569	{ \
				570	if (buf[0] > 0x20 && buf[0] < 0x80) \
				571	{ \
				572	*outptr++ = buf[0]; \
				573	written = 1; \
				574	} \
				575	else \
				576	written = __UNKNOWN_10646_CHAR; \
				577	} \
				578	} \
				579	else if (set == JISX0201_Kana_set \
				580	&& (__builtin_expect (tag == TAG_none, 1) \
				581	\|\| tag == TAG_language_ja)) \
				582	{ \
				583	unsigned char buf[1]; \
				584	written = ucs4_to_jisx0201 (ch, buf); \
				585	if (written != __UNKNOWN_10646_CHAR) \
				586	{ \
				587	if (buf[0] > 0xa0 && buf[0] < 0xe0) \
				588	{ \
				589	*outptr++ = buf[0] - 0x80; \
				590	written = 1; \
				591	} \
				592	else \
				593	written = __UNKNOWN_10646_CHAR; \
				594	} \
				595	} \
				596	else \
				597	{ \
				598	if ((set == JISX0208_1978_set \|\| set == JISX0208_1983_set) \
				599	&& (__builtin_expect (tag == TAG_none, 1) \
				600	\|\| tag == TAG_language_ja)) \
				601	written = ucs4_to_jisx0208 (ch, outptr, outend - outptr); \
				602	else if (set == JISX0212_set \
				603	&& (__builtin_expect (tag == TAG_none, 1) \
				604	\|\| tag == TAG_language_ja)) \
				605	written = ucs4_to_jisx0212 (ch, outptr, outend - outptr); \
				606	else if (set == GB2312_set \
				607	&& (__builtin_expect (tag == TAG_none, 1) \
				608	\|\| tag == TAG_language_zh)) \
				609	written = ucs4_to_gb2312 (ch, outptr, outend - outptr); \
				610	else if (set == KSC5601_set \
				611	&& (__builtin_expect (tag == TAG_none, 1) \
				612	\|\| tag == TAG_language_ko)) \
				613	written = ucs4_to_ksc5601 (ch, outptr, outend - outptr); \
				614	else \
				615	written = __UNKNOWN_10646_CHAR; \
				616	\
				617	if (__glibc_unlikely (written == 0)) \
				618	{ \
				619	result = __GCONV_FULL_OUTPUT; \
				620	break; \
				621	} \
				622	else if (written != __UNKNOWN_10646_CHAR) \
				623	outptr += written; \
				624	} \
				625	\
				626	if (written == __UNKNOWN_10646_CHAR \
				627	&& __builtin_expect (tag == TAG_none, 1)) \
				628	{ \
				629	if (set2 == ISO88591_set) \
				630	{ \
				631	if (ch >= 0x80 && ch <= 0xff) \
				632	{ \
				633	if (__glibc_unlikely (outptr + 3 > outend)) \
				634	{ \
				635	result = __GCONV_FULL_OUTPUT; \
				636	break; \
				637	} \
				638	\
				639	*outptr++ = ESC; \
				640	*outptr++ = 'N'; \
				641	*outptr++ = ch & 0x7f; \
				642	written = 3; \
				643	} \
				644	} \
				645	else if (set2 == ISO88597_set) \
				646	{ \
				647	if (__glibc_likely (ch < 0xffff)) \
				648	{ \
				649	const struct gap *rp = from_idx; \
				650	\
				651	while (ch > rp->end) \
				652	++rp; \
				653	if (ch >= rp->start) \
				654	{ \
				655	unsigned char res = \
				656	iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \
				657	if (res != '\0') \
				658	{ \
				659	if (__glibc_unlikely (outptr + 3 > outend)) \
				660	{ \
				661	result = __GCONV_FULL_OUTPUT; \
				662	break; \
				663	} \
				664	\
				665	*outptr++ = ESC; \
				666	*outptr++ = 'N'; \
				667	*outptr++ = res & 0x7f; \
				668	written = 3; \
				669	} \
				670	} \
				671	} \
				672	} \
				673	} \
				674	\
				675	if (written == __UNKNOWN_10646_CHAR) \
				676	{ \
				677	/* The attempts to use the currently selected character set \
				678	failed, either because the language tag changed, or because \
				679	the character requires a different character set, or because \
				680	the character is unknown. \
				681	The CJK character sets partially overlap when seen as subsets \
				682	of ISO 10646; therefore there is no single correct result. \
				683	We use a preferrence order which depends on the language tag. */ \
				684	\
				685	if (ch <= 0x7f) \
				686	{ \
				687	/* We must encode using ASCII. First write out the \
				688	escape sequence. */ \
				689	if (__glibc_unlikely (outptr + 3 > outend)) \
				690	{ \
				691	result = __GCONV_FULL_OUTPUT; \
				692	break; \
				693	} \
				694	\
				695	*outptr++ = ESC; \
				696	*outptr++ = '('; \
				697	*outptr++ = 'B'; \
				698	set = ASCII_set; \
				699	\
				700	if (__glibc_unlikely (outptr + 1 > outend)) \
				701	{ \
				702	result = __GCONV_FULL_OUTPUT; \
				703	break; \
				704	} \
				705	*outptr++ = ch; \
				706	\
				707	/* At the beginning of a line, G2 designation is cleared. */ \
				708	if (var == iso2022jp2 && ch == 0x0a) \
				709	set2 = UNSPECIFIED_set; \
				710	} \
				711	else \
				712	{ \
				713	/* Now it becomes difficult. We must search the other \
				714	character sets one by one. Use an ordered conversion \
				715	list that depends on the current language tag. */ \
				716	cvlist_t conversion_list; \
				717	unsigned char buf[2]; \
				718	int res = __GCONV_ILLEGAL_INPUT; \
				719	\
				720	if (var == iso2022jp2) \
				721	conversion_list = conversion_lists[tag >> 8]; \
				722	else \
				723	conversion_list = CVLIST (japanese, 0, 0, 0, 0); \
				724	\
				725	do \
				726	switch (CVLIST_FIRST (conversion_list)) \
				727	{ \
				728	case european: \
				729	\
				730	/* Try ISO 8859-1 upper half. */ \
				731	if (ch >= 0x80 && ch <= 0xff) \
				732	{ \
				733	if (set2 != ISO88591_set) \
				734	{ \
				735	if (__builtin_expect (outptr + 3 > outend, 0)) \
				736	{ \
				737	res = __GCONV_FULL_OUTPUT; \
				738	break; \
				739	} \
				740	*outptr++ = ESC; \
				741	*outptr++ = '.'; \
				742	*outptr++ = 'A'; \
				743	set2 = ISO88591_set; \
				744	} \
				745	\
				746	if (__glibc_unlikely (outptr + 3 > outend)) \
				747	{ \
				748	res = __GCONV_FULL_OUTPUT; \
				749	break; \
				750	} \
				751	*outptr++ = ESC; \
				752	*outptr++ = 'N'; \
				753	*outptr++ = ch - 0x80; \
				754	res = __GCONV_OK; \
				755	break; \
				756	} \
				757	\
				758	/* Try ISO 8859-7 upper half. */ \
				759	if (__glibc_likely (ch < 0xffff)) \
				760	{ \
				761	const struct gap *rp = from_idx; \
				762	\
				763	while (ch > rp->end) \
				764	++rp; \
				765	if (ch >= rp->start) \
				766	{ \
				767	unsigned char ch2 = \
				768	iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \
				769	if (ch2 != '\0') \
				770	{ \
				771	if (set2 != ISO88597_set) \
				772	{ \
				773	if (__builtin_expect (outptr + 3 > outend, \
				774	0)) \
				775	{ \
				776	res = __GCONV_FULL_OUTPUT; \
				777	break; \
				778	} \
				779	*outptr++ = ESC; \
				780	*outptr++ = '.'; \
				781	*outptr++ = 'F'; \
				782	set2 = ISO88597_set; \
				783	} \
				784	\
				785	if (__builtin_expect (outptr + 3 > outend, 0)) \
				786	{ \
				787	res = __GCONV_FULL_OUTPUT; \
				788	break; \
				789	} \
				790	*outptr++ = ESC; \
				791	*outptr++ = 'N'; \
				792	*outptr++ = ch2 - 0x80; \
				793	res = __GCONV_OK; \
				794	break; \
				795	} \
				796	} \
				797	} \
				798	\
				799	break; \
				800	\
				801	case japanese: \
				802	\
				803	/* Try JIS X 0201 Roman. */ \
				804	written = ucs4_to_jisx0201 (ch, buf); \
				805	if (written != __UNKNOWN_10646_CHAR \
				806	&& buf[0] > 0x20 && buf[0] < 0x80) \
				807	{ \
				808	if (set != JISX0201_Roman_set) \
				809	{ \
				810	if (__builtin_expect (outptr + 3 > outend, 0)) \
				811	{ \
				812	res = __GCONV_FULL_OUTPUT; \
				813	break; \
				814	} \
				815	*outptr++ = ESC; \
				816	*outptr++ = '('; \
				817	*outptr++ = 'J'; \
				818	set = JISX0201_Roman_set; \
				819	} \
				820	\
				821	if (__glibc_unlikely (outptr + 1 > outend)) \
				822	{ \
				823	res = __GCONV_FULL_OUTPUT; \
				824	break; \
				825	} \
				826	*outptr++ = buf[0]; \
				827	res = __GCONV_OK; \
				828	break; \
				829	} \
				830	\
				831	/* Try JIS X 0208. */ \
				832	written = ucs4_to_jisx0208 (ch, buf, 2); \
				833	if (written != __UNKNOWN_10646_CHAR) \
				834	{ \
				835	if (set != JISX0208_1983_set) \
				836	{ \
				837	if (__builtin_expect (outptr + 3 > outend, 0)) \
				838	{ \
				839	res = __GCONV_FULL_OUTPUT; \
				840	break; \
				841	} \
				842	*outptr++ = ESC; \
				843	*outptr++ = '$'; \
				844	*outptr++ = 'B'; \
				845	set = JISX0208_1983_set; \
				846	} \
				847	\
				848	if (__glibc_unlikely (outptr + 2 > outend)) \
				849	{ \
				850	res = __GCONV_FULL_OUTPUT; \
				851	break; \
				852	} \
				853	*outptr++ = buf[0]; \
				854	*outptr++ = buf[1]; \
				855	res = __GCONV_OK; \
				856	break; \
				857	} \
				858	\
				859	if (__glibc_unlikely (var == iso2022jp)) \
				860	/* Don't use the other Japanese character sets. */ \
				861	break; \
				862	\
				863	/* Try JIS X 0212. */ \
				864	written = ucs4_to_jisx0212 (ch, buf, 2); \
				865	if (written != __UNKNOWN_10646_CHAR) \
				866	{ \
				867	if (set != JISX0212_set) \
				868	{ \
				869	if (__builtin_expect (outptr + 4 > outend, 0)) \
				870	{ \
				871	res = __GCONV_FULL_OUTPUT; \
				872	break; \
				873	} \
				874	*outptr++ = ESC; \
				875	*outptr++ = '$'; \
				876	*outptr++ = '('; \
				877	*outptr++ = 'D'; \
				878	set = JISX0212_set; \
				879	} \
				880	\
				881	if (__glibc_unlikely (outptr + 2 > outend)) \
				882	{ \
				883	res = __GCONV_FULL_OUTPUT; \
				884	break; \
				885	} \
				886	*outptr++ = buf[0]; \
				887	*outptr++ = buf[1]; \
				888	res = __GCONV_OK; \
				889	break; \
				890	} \
				891	\
				892	break; \
				893	\
				894	case chinese: \
				895	assert (var == iso2022jp2); \
				896	\
				897	/* Try GB 2312. */ \
				898	written = ucs4_to_gb2312 (ch, buf, 2); \
				899	if (written != __UNKNOWN_10646_CHAR) \
				900	{ \
				901	if (set != GB2312_set) \
				902	{ \
				903	if (__builtin_expect (outptr + 3 > outend, 0)) \
				904	{ \
				905	res = __GCONV_FULL_OUTPUT; \
				906	break; \
				907	} \
				908	*outptr++ = ESC; \
				909	*outptr++ = '$'; \
				910	*outptr++ = 'A'; \
				911	set = GB2312_set; \
				912	} \
				913	\
				914	if (__glibc_unlikely (outptr + 2 > outend)) \
				915	{ \
				916	res = __GCONV_FULL_OUTPUT; \
				917	break; \
				918	} \
				919	*outptr++ = buf[0]; \
				920	*outptr++ = buf[1]; \
				921	res = __GCONV_OK; \
				922	break; \
				923	} \
				924	\
				925	break; \
				926	\
				927	case korean: \
				928	assert (var == iso2022jp2); \
				929	\
				930	/* Try KSC 5601. */ \
				931	written = ucs4_to_ksc5601 (ch, buf, 2); \
				932	if (written != __UNKNOWN_10646_CHAR) \
				933	{ \
				934	if (set != KSC5601_set) \
				935	{ \
				936	if (__builtin_expect (outptr + 4 > outend, 0)) \
				937	{ \
				938	res = __GCONV_FULL_OUTPUT; \
				939	break; \
				940	} \
				941	*outptr++ = ESC; \
				942	*outptr++ = '$'; \
				943	*outptr++ = '('; \
				944	*outptr++ = 'C'; \
				945	set = KSC5601_set; \
				946	} \
				947	\
				948	if (__glibc_unlikely (outptr + 2 > outend)) \
				949	{ \
				950	res = __GCONV_FULL_OUTPUT; \
				951	break; \
				952	} \
				953	*outptr++ = buf[0]; \
				954	*outptr++ = buf[1]; \
				955	res = __GCONV_OK; \
				956	break; \
				957	} \
				958	\
				959	break; \
				960	\
				961	case other: \
				962	assert (var == iso2022jp2); \
				963	\
				964	/* Try JIS X 0201 Kana. This is not officially part \
				965	of ISO-2022-JP-2, according to RFC 1554. Therefore \
				966	we try this only after all other attempts. */ \
				967	written = ucs4_to_jisx0201 (ch, buf); \
				968	if (written != __UNKNOWN_10646_CHAR && buf[0] >= 0x80) \
				969	{ \
				970	if (set != JISX0201_Kana_set) \
				971	{ \
				972	if (__builtin_expect (outptr + 3 > outend, 0)) \
				973	{ \
				974	res = __GCONV_FULL_OUTPUT; \
				975	break; \
				976	} \
				977	*outptr++ = ESC; \
				978	*outptr++ = '('; \
				979	*outptr++ = 'I'; \
				980	set = JISX0201_Kana_set; \
				981	} \
				982	\
				983	if (__glibc_unlikely (outptr + 1 > outend)) \
				984	{ \
				985	res = __GCONV_FULL_OUTPUT; \
				986	break; \
				987	} \
				988	*outptr++ = buf[0] - 0x80; \
				989	res = __GCONV_OK; \
				990	break; \
				991	} \
				992	\
				993	break; \
				994	\
				995	default: \
				996	abort (); \
				997	} \
				998	while (res == __GCONV_ILLEGAL_INPUT \
				999	&& (conversion_list = CVLIST_REST (conversion_list)) != 0);\
				1000	\
				1001	if (res == __GCONV_FULL_OUTPUT) \
				1002	{ \
				1003	result = res; \
				1004	break; \
				1005	} \
				1006	\
				1007	if (res == __GCONV_ILLEGAL_INPUT) \
				1008	{ \
				1009	STANDARD_TO_LOOP_ERR_HANDLER (4); \
				1010	} \
				1011	} \
				1012	} \
				1013	\
				1014	/* Now that we wrote the output increment the input pointer. */ \
				1015	inptr += 4; \
				1016	}
				1017	#define LOOP_NEED_FLAGS
				1018	#define EXTRA_LOOP_DECLS , enum variant var, int *setp
				1019	#define INIT_PARAMS int set = *setp & CURRENT_SEL_MASK; \
				1020	int set2 = *setp & CURRENT_ASSIGN_MASK; \
				1021	int tag = *setp & CURRENT_TAG_MASK;
				1022	#define REINIT_PARAMS do \
				1023	{ \
				1024	set = *setp & CURRENT_SEL_MASK; \
				1025	set2 = *setp & CURRENT_ASSIGN_MASK; \
				1026	tag = *setp & CURRENT_TAG_MASK; \
				1027	} \
				1028	while (0)
				1029	#define UPDATE_PARAMS *setp = set \| set2 \| tag
				1030	#include <iconv/loop.c>
				1031
				1032
				1033	/* Now define the toplevel functions. */
				1034	#include <iconv/skeleton.c>