Blame - ap/build/uClibc/libc/misc/wchar/wchar.c - T106_DC

blob: 412c557eb2a92d36d30885f85cc5ff7f372d4bbc [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame^]	1
				2	/* Copyright (C) 2002, 2003, 2004 Manuel Novoa III
				3	*
				4	* This library is free software; you can redistribute it and/or
				5	* modify it under the terms of the GNU Library General Public
				6	* License as published by the Free Software Foundation; either
				7	* version 2 of the License, or (at your option) any later version.
				8	*
				9	* This library is distributed in the hope that it will be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				12	* Library General Public License for more details.
				13	*
				14	* You should have received a copy of the GNU Library General Public
				15	* License along with this library; if not, write to the Free
				16	* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
				17	*/
				18
				19	/* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION!
				20	*
				21	* Besides uClibc, I'm using this code in my libc for elks, which is
				22	* a 16-bit environment with a fairly limited compiler. It would make
				23	* things much easier for me if this file isn't modified unnecessarily.
				24	* In particular, please put any new or replacement functions somewhere
				25	* else, and modify the makefile to use your version instead.
				26	* Thanks. Manuel
				27	*
				28	* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */
				29
				30
				31	/* May 23, 2002 Initial Notes:
				32	*
				33	* I'm still tweaking this stuff, but it passes the tests I've thrown
				34	* at it, and Erik needs it for the gcc port. The glibc extension
				35	* __wcsnrtombs() hasn't been tested, as I didn't find a test for it
				36	* in the glibc source. I also need to fix the behavior of
				37	* _wchar_utf8sntowcs() if the max number of wchars to convert is 0.
				38	*
				39	* UTF-8 -> wchar -> UTF-8 conversion tests on Markus Kuhn's UTF-8-demo.txt
				40	* file on my platform (x86) show about 5-10% faster conversion speed than
				41	* glibc with mbsrtowcs()/wcsrtombs() and almost twice as fast as glibc with
				42	* individual mbrtowc()/wcrtomb() calls.
				43	*
				44	* If 'DECODER' is defined, then _wchar_utf8sntowcs() will be compiled
				45	* as a fail-safe UTF-8 decoder appropriate for a terminal, etc. which
				46	* needs to deal gracefully with whatever is sent to it. In that mode,
				47	* it passes Markus Kuhn's UTF-8-test.txt stress test. I plan to add
				48	* an arg to force that behavior, so the interface will be changing.
				49	*
				50	* I need to fix the error checking for 16-bit wide chars. This isn't
				51	* an issue for uClibc, but may be for ELKS. I'm currently not sure
				52	* if I'll use 16-bit, 32-bit, or configureable wchars in ELKS.
				53	*
				54	* July 1, 2002
				55	*
				56	* Fixed _wchar_utf8sntowcs() for the max number of wchars == 0 case.
				57	* Fixed nul-char bug in btowc(), and another in __mbsnrtowcs() for 8-bit
				58	* locales.
				59	* Enabled building of a C/POSIX-locale-only version, so full locale support
				60	* no longer needs to be enabled.
				61	*
				62	* Nov 4, 2002
				63	*
				64	* Fixed a bug in _wchar_wcsntoutf8s(). Don't store wcs position if dst is NULL.
				65	* Also, introduce an awful hack into _wchar_wcsntoutf8s() and wcsrtombs() in
				66	* order to support %ls in printf. See comments below for details.
				67	* Change behaviour of wc<->mb functions when in the C locale. Now they do
				68	* a 1-1 map for the range 0x80-UCHAR_MAX. This is for backwards compatibility
				69	* and consistency with the stds requirements that a printf format string by
				70	* a valid multibyte string beginning and ending in it's initial shift state.
				71	*
				72	* Nov 5, 2002
				73	*
				74	* Forgot to change btowc and wctob when I changed the wc<->mb functions yesterday.
				75	*
				76	* Nov 7, 2002
				77	*
				78	* Add wcwidth and wcswidth, based on Markus Kuhn's wcwidth of 2002-05-08.
				79	* Added some size/speed optimizations and integrated it into my locale
				80	* framework. Minimally tested at the moment, but the stub C-locale
				81	* version (which most people would probably be using) should be fine.
				82	*
				83	* Nov 21, 2002
				84	*
				85	* Revert the wc<->mb changes from earlier this month involving the C-locale.
				86	* Add a couple of ugly hacks to support *wprintf.
				87	* Add a mini iconv() and iconv implementation (requires locale support).
				88	*
				89	* Aug 1, 2003
				90	* Bug fix for mbrtowc.
				91	*
				92	* Aug 18, 2003
				93	* Bug fix: _wchar_utf8sntowcs and _wchar_wcsntoutf8s now set errno if EILSEQ.
				94	*
				95	* Feb 11, 2004
				96	* Bug fix: Fix size check for remaining output space in iconv().
				97	*
				98	* Manuel
				99	*/
				100	#ifdef _LIBC
				101	#include <errno.h>
				102	#include <stddef.h>
				103	#include <limits.h>
				104	#include <stdint.h>
				105	#include <inttypes.h>
				106	#include <stdlib.h>
				107	#include <stdio.h>
				108	#include <assert.h>
				109	#include <locale.h>
				110	#include <wchar.h>
				111	#include <bits/uClibc_uwchar.h>
				112
				113	/**********************************************************************/
				114	#ifdef __UCLIBC_HAS_LOCALE__
				115	#ifdef __UCLIBC_MJN3_ONLY__
				116	#ifdef L_iswspace
				117	/* generates one warning */
				118	#warning TODO: Fix Cc2wc* and Cwc2c* defines!
				119	#endif
				120	#endif /* __UCLIBC_MJN3_ONLY__ */
				121
				122	#define ENCODING (__UCLIBC_CURLOCALE->encoding)
				123
				124	#define Cc2wc_IDX_SHIFT __LOCALE_DATA_Cc2wc_IDX_SHIFT
				125	#define Cc2wc_ROW_LEN __LOCALE_DATA_Cc2wc_ROW_LEN
				126	#define Cwc2c_DOMAIN_MAX __LOCALE_DATA_Cwc2c_DOMAIN_MAX
				127	#define Cwc2c_TI_SHIFT __LOCALE_DATA_Cwc2c_TI_SHIFT
				128	#define Cwc2c_TT_SHIFT __LOCALE_DATA_Cwc2c_TT_SHIFT
				129	#define Cwc2c_TI_LEN __LOCALE_DATA_Cwc2c_TI_LEN
				130
				131	#ifndef __CTYPE_HAS_UTF_8_LOCALES
				132	#warning __CTYPE_HAS_UTF_8_LOCALES not set!
				133	#endif
				134
				135	#else /* __UCLIBC_HAS_LOCALE__ */
				136
				137	#ifdef __UCLIBC_MJN3_ONLY__
				138	#ifdef L_btowc
				139	/* emit only once */
				140	#warning fix preprocessor logic testing locale settings
				141	#endif
				142	#endif
				143
				144	#define ENCODING (__ctype_encoding_7_bit)
				145	#ifdef __CTYPE_HAS_8_BIT_LOCALES
				146	#error __CTYPE_HAS_8_BIT_LOCALES is defined!
				147	#endif
				148	#ifdef __CTYPE_HAS_UTF_8_LOCALES
				149	#error __CTYPE_HAS_UTF_8_LOCALES is defined!
				150	#endif
				151	#undef L__wchar_utf8sntowcs
				152	#undef L__wchar_wcsntoutf8s
				153
				154	#endif /* __UCLIBC_HAS_LOCALE__ */
				155	/**********************************************************************/
				156
				157	#if WCHAR_MAX > 0xffffUL
				158	#define UTF_8_MAX_LEN 6
				159	#else
				160	#define UTF_8_MAX_LEN 3
				161	#endif
				162
				163	#define KUHN 1
				164
				165	/* Implementation-specific work functions. */
				166
				167	extern size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
				168	const char **__restrict src, size_t n,
				169	mbstate_t *ps, int allow_continuation) attribute_hidden;
				170
				171	extern size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
				172	const wchar_t **__restrict src, size_t wn) attribute_hidden;
				173	#endif
				174	/**********************************************************************/
				175	#ifdef L_btowc
				176
				177
				178	wint_t btowc(int c)
				179	{
				180	#ifdef __CTYPE_HAS_8_BIT_LOCALES
				181
				182	wchar_t wc;
				183	unsigned char buf[1];
				184	mbstate_t mbstate;
				185
				186	if (c != EOF) {
				187	*buf = (unsigned char) c;
				188	mbstate.__mask = 0; /* Initialize the mbstate. */
				189	if (mbrtowc(&wc, (char*) buf, 1, &mbstate) <= 1) {
				190	return wc;
				191	}
				192	}
				193	return WEOF;
				194
				195	#else /* !__CTYPE_HAS_8_BIT_LOCALES */
				196
				197	#ifdef __UCLIBC_HAS_LOCALE__
				198	assert((ENCODING == __ctype_encoding_7_bit)
				199	\|\| (ENCODING == __ctype_encoding_utf8));
				200	#endif
				201
				202	/* If we don't have 8-bit locale support, then this is trivial since
				203	* anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */
				204	return (((unsigned int)c) < 0x80) ? c : WEOF;
				205
				206	#endif /* !__CTYPE_HAS_8_BIT_LOCALES */
				207	}
				208	libc_hidden_def(btowc)
				209
				210	#endif
				211	/**********************************************************************/
				212	#ifdef L_wctob
				213
				214	/* Note: We completely ignore ps in all currently supported conversions. */
				215
				216
				217	int wctob(wint_t c)
				218	{
				219	#ifdef __CTYPE_HAS_8_BIT_LOCALES
				220
				221	unsigned char buf[MB_LEN_MAX];
				222
				223	return (wcrtomb((char) buf, c, NULL) == 1) ? buf : EOF;
				224
				225	#else /* __CTYPE_HAS_8_BIT_LOCALES */
				226
				227	#ifdef __UCLIBC_HAS_LOCALE__
				228	assert((ENCODING == __ctype_encoding_7_bit)
				229	\|\| (ENCODING == __ctype_encoding_utf8));
				230	#endif /* __UCLIBC_HAS_LOCALE__ */
				231
				232	/* If we don't have 8-bit locale support, then this is trivial since
				233	* anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */
				234
				235	/* TODO: need unsigned version of wint_t... */
				236	/* return (((unsigned int)c) < 0x80) ? c : WEOF; */
				237	return ((c >= 0) && (c < 0x80)) ? c : EOF;
				238
				239	#endif /* __CTYPE_HAS_8_BIT_LOCALES */
				240	}
				241
				242	#endif
				243	/**********************************************************************/
				244	#ifdef L_mbsinit
				245
				246	int mbsinit(const mbstate_t *ps)
				247	{
				248	return !ps \|\| !ps->__mask;
				249	}
				250	libc_hidden_def(mbsinit)
				251
				252	#endif
				253	/**********************************************************************/
				254	#ifdef L_mbrlen
				255
				256
				257	size_t mbrlen(const char __restrict s, size_t n, mbstate_t __restrict ps)
				258	{
				259	static mbstate_t mbstate; /* Rely on bss 0-init. */
				260
				261	return mbrtowc(NULL, s, n, (ps != NULL) ? ps : &mbstate);
				262	}
				263	libc_hidden_def(mbrlen)
				264
				265	#endif
				266	/**********************************************************************/
				267	#ifdef L_mbrtowc
				268
				269
				270	size_t mbrtowc(wchar_t __restrict pwc, const char __restrict s,
				271	size_t n, mbstate_t *__restrict ps)
				272	{
				273	static mbstate_t mbstate; /* Rely on bss 0-init. */
				274	wchar_t wcbuf[1];
				275	const char *p;
				276	size_t r;
				277	char empty_string[1]; /* Avoid static to be fPIC friendly. */
				278
				279	if (!ps) {
				280	ps = &mbstate;
				281	}
				282
				283	if (!s) {
				284	pwc = (wchar_t ) s; / NULL */
				285	empty_string[0] = 0; /* Init the empty string when necessary. */
				286	s = empty_string;
				287	n = 1;
				288	} else if (*s == '\0') {
				289	if (pwc)
				290	*pwc = '\0';
				291	/* According to the ISO C 89 standard this is the expected behaviour. */
				292	return 0;
				293	} else if (!n) {
				294	/* TODO: change error code? */
				295	#if 0
				296	return (ps->__mask && (ps->__wc == 0xffffU))
				297	? ((size_t) -1) : ((size_t) -2);
				298	#else
				299	return 0;
				300	#endif
				301	}
				302
				303	p = s;
				304
				305	#ifdef __CTYPE_HAS_UTF_8_LOCALES
				306	/* Need to do this here since mbsrtowcs doesn't allow incompletes. */
				307	if (ENCODING == __ctype_encoding_utf8) {
				308	if (!pwc) {
				309	pwc = wcbuf;
				310	}
				311	r = _wchar_utf8sntowcs(pwc, 1, &p, n, ps, 1);
				312	return (r == 1) ? (p-s) : r; /* Need to return 0 if nul char. */
				313	}
				314	#endif
				315
				316	#ifdef __UCLIBC_MJN3_ONLY__
				317	#warning TODO: This adds a trailing nul!
				318	#endif /* __UCLIBC_MJN3_ONLY__ */
				319
				320	r = mbsnrtowcs(wcbuf, &p, SIZE_MAX, 1, ps);
				321
				322	if (((ssize_t) r) >= 0) {
				323	if (pwc) {
				324	pwc = wcbuf;
				325	}
				326	}
				327	return (size_t) r;
				328	}
				329	libc_hidden_def(mbrtowc)
				330
				331	#endif
				332	/**********************************************************************/
				333	#ifdef L_wcrtomb
				334
				335
				336	/* Note: We completely ignore ps in all currently supported conversions. */
				337	/* TODO: Check for valid state anyway? */
				338
				339	size_t wcrtomb(register char *__restrict s, wchar_t wc,
				340	mbstate_t *__restrict ps)
				341	{
				342	#ifdef __UCLIBC_MJN3_ONLY__
				343	#warning TODO: Should wcsnrtombs nul-terminate unconditionally? Check glibc.
				344	#endif /* __UCLIBC_MJN3_ONLY__ */
				345	wchar_t wcbuf[1];
				346	const wchar_t *pwc;
				347	size_t r;
				348	char buf[MB_LEN_MAX];
				349
				350	if (!s) {
				351	s = buf;
				352	wc = 0;
				353	}
				354
				355	pwc = wcbuf;
				356	wcbuf[0] = wc;
				357
				358	r = wcsnrtombs(s, &pwc, 1, MB_LEN_MAX, ps);
				359	return (r != 0) ? r : 1;
				360	}
				361	libc_hidden_def(wcrtomb)
				362
				363	#endif
				364	/**********************************************************************/
				365	#ifdef L_mbsrtowcs
				366
				367
				368	size_t mbsrtowcs(wchar_t __restrict dst, const char *__restrict src,
				369	size_t len, mbstate_t *__restrict ps)
				370	{
				371	static mbstate_t mbstate; /* Rely on bss 0-init. */
				372
				373	return mbsnrtowcs(dst, src, SIZE_MAX, len,
				374	((ps != NULL) ? ps : &mbstate));
				375	}
				376	libc_hidden_def(mbsrtowcs)
				377
				378	#endif
				379	/**********************************************************************/
				380	#ifdef L_wcsrtombs
				381
				382	/* Note: We completely ignore ps in all currently supported conversions.
				383
				384	* TODO: Check for valid state anyway? */
				385
				386
				387	size_t wcsrtombs(char __restrict dst, const wchar_t *__restrict src,
				388	size_t len, mbstate_t *__restrict ps)
				389	{
				390	return wcsnrtombs(dst, src, SIZE_MAX, len, ps);
				391	}
				392	libc_hidden_def(wcsrtombs)
				393
				394	#endif
				395	/**********************************************************************/
				396	#ifdef L__wchar_utf8sntowcs
				397
				398	/* Define DECODER to generate a UTF-8 decoder which passes Markus Kuhn's
				399	* UTF-8-test.txt strss test.
				400	*/
				401	/* #define DECODER */
				402
				403	#ifdef DECODER
				404	#ifndef KUHN
				405	#define KUHN
				406	#endif
				407	#endif
				408
				409	size_t attribute_hidden _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
				410	const char **__restrict src, size_t n,
				411	mbstate_t *ps, int allow_continuation)
				412	{
				413	register const char *s;
				414	__uwchar_t mask;
				415	__uwchar_t wc;
				416	wchar_t wcbuf[1];
				417	size_t count;
				418	int incr;
				419
				420	s = *src;
				421
				422	assert(s != NULL);
				423	assert(ps != NULL);
				424
				425	incr = 1;
				426	/* NOTE: The following is an AWFUL HACK! In order to support %s in
				427	* wprintf, we need to be able to compute the number of wchars needed
				428	* for the mbs conversion, not to exceed the precision specified.
				429	* But if dst is NULL, the return value is the length assuming a
				430	* sufficiently sized buffer. So, we allow passing of (wchar_t *) ps
				431	* as pwc in order to flag that we really want the length, subject
				432	* to the restricted buffer size and no partial conversions.
				433	* See mbsnrtowcs() as well. */
				434	if (!pwc \|\| (pwc == ((wchar_t *)ps))) {
				435	if (!pwc) {
				436	wn = SIZE_MAX;
				437	}
				438	pwc = wcbuf;
				439	incr = 0;
				440	}
				441
				442	/* This is really here only to support the glibc extension function
				443	* __mbsnrtowcs which apparently returns 0 if wn == 0 without any
				444	* check on the validity of the mbstate. */
				445	if (!(count = wn)) {
				446	return 0;
				447	}
				448
				449	if ((mask = (__uwchar_t) ps->__mask) != 0) { /* A continuation... */
				450	#ifdef DECODER
				451	wc = (__uwchar_t) ps->__wc;
				452	if (n) {
				453	goto CONTINUE;
				454	}
				455	goto DONE;
				456	#else
				457	if ((wc = (__uwchar_t) ps->__wc) != 0xffffU) {
				458	/* TODO: change error code here and below? */
				459	if (n) {
				460	goto CONTINUE;
				461	}
				462	goto DONE;
				463	}
				464	__set_errno(EILSEQ);
				465	return (size_t) -1; /* We're in an error state. */
				466	#endif
				467	}
				468
				469	do {
				470	if (!n) {
				471	goto DONE;
				472	}
				473	--n;
				474	if ((wc = ((unsigned char) s++)) >= 0x80) { / Not ASCII... */
				475	mask = 0x40;
				476	#ifdef __UCLIBC_MJN3_ONLY__
				477	#warning TODO: Fix range for 16 bit wchar_t case.
				478	#endif
				479	if (( ((unsigned char)(s[-1] - 0xc0)) < (0xfe - 0xc0) ) &&
				480	(((unsigned char)s[-1] != 0xc0 ) && ((unsigned char)s[-1] != 0xc1 ))) {
				481	goto START;
				482	}
				483	BAD:
				484	#ifdef DECODER
				485	wc = 0xfffdU;
				486	goto COMPLETE;
				487	#else
				488	ps->__mask = mask;
				489	ps->__wc = 0xffffU;
				490	__set_errno(EILSEQ);
				491	return (size_t) -1; /* Illegal start byte! */
				492	#endif
				493
				494	CONTINUE:
				495	while (n) {
				496	--n;
				497	if ((*s & 0xc0) != 0x80) {
				498	goto BAD;
				499	}
				500	mask <<= 5;
				501	wc <<= 6;
				502	wc += (s & 0x3f); / keep seperate for bcc (smaller code) */
				503	++s;
				504	START:
				505	wc &= ~(mask << 1);
				506
				507	if ((wc & mask) == 0) { /* Character completed. */
				508	if ((mask >>= 5) == 0x40) {
				509	mask += mask;
				510	}
				511	/* Check for invalid sequences (longer than necessary)
				512	* and invalid chars. */
				513	if ( (wc < mask) /* Sequence not minimal length. */
				514	#ifdef KUHN
				515	#if UTF_8_MAX_LEN == 3
				516	#error broken since mask can overflow!!
				517	/* For plane 0, these are the only defined values.*/
				518	\|\| (wc > 0xfffdU)
				519	#else
				520	/* Note that we don't need to worry about exceeding */
				521	/* 31 bits as that is the most that UTF-8 provides. */
				522	\|\| ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
				523	#endif
				524	\|\| ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
				525	#endif /* KUHN */
				526	) {
				527	goto BAD;
				528	}
				529	goto COMPLETE;
				530	}
				531	}
				532	/* Character potentially valid but incomplete. */
				533	if (!allow_continuation) {
				534	if (count != wn) {
				535	return 0;
				536	}
				537	/* NOTE: The following can fail if you allow and then disallow
				538	* continuation!!! */
				539	#if UTF_8_MAX_LEN == 3
				540	#error broken since mask can overflow!!
				541	#endif
				542	/* Need to back up... */
				543	do {
				544	--s;
				545	} while ((mask >>= 5) >= 0x40);
				546	goto DONE;
				547	}
				548	ps->__mask = (wchar_t) mask;
				549	ps->__wc = (wchar_t) wc;
				550	*src = s;
				551	return (size_t) -2;
				552	}
				553	COMPLETE:
				554	*pwc = wc;
				555	pwc += incr;
				556	}
				557	#ifdef DECODER
				558	while (--count);
				559	#else
				560	while (wc && --count);
				561
				562	if (!wc) {
				563	s = NULL;
				564	}
				565	#endif
				566
				567	DONE:
				568	/* ps->__wc is irrelavent here. */
				569	ps->__mask = 0;
				570	if (pwc != wcbuf) {
				571	*src = s;
				572	}
				573
				574	return wn - count;
				575	}
				576
				577	#endif
				578	/**********************************************************************/
				579	#ifdef L__wchar_wcsntoutf8s
				580
				581	size_t attribute_hidden _wchar_wcsntoutf8s(char *__restrict s, size_t n,
				582	const wchar_t **__restrict src, size_t wn)
				583	{
				584	register char *p;
				585	size_t len, t;
				586	__uwchar_t wc;
				587	const __uwchar_t *swc;
				588	int store;
				589	char buf[MB_LEN_MAX];
				590	char m;
				591
				592	store = 1;
				593	/* NOTE: The following is an AWFUL HACK! In order to support %ls in
				594	* printf, we need to be able to compute the number of bytes needed
				595	* for the mbs conversion, not to exceed the precision specified.
				596	* But if dst is NULL, the return value is the length assuming a
				597	* sufficiently sized buffer. So, we allow passing of (char *) src
				598	* as dst in order to flag that we really want the length, subject
				599	* to the restricted buffer size and no partial conversions.
				600	* See wcsnrtombs() as well. */
				601	if (!s \|\| (s == ((char *) src))) {
				602	if (!s) {
				603	n = SIZE_MAX;
				604	}
				605	s = buf;
				606	store = 0;
				607	}
				608
				609	t = n;
				610	swc = (const __uwchar_t ) src;
				611
				612	assert(swc != NULL);
				613
				614	while (wn && t) {
				615	wc = *swc;
				616
				617	*s = wc;
				618	len = 1;
				619
				620	if (wc >= 0x80) {
				621	#ifdef KUHN
				622	if (
				623	#if UTF_8_MAX_LEN == 3
				624	/* For plane 0, these are the only defined values.*/
				625	/* Note that we don't need to worry about exceeding */
				626	/* 31 bits as that is the most that UTF-8 provides. */
				627	(wc > 0xfffdU)
				628	#else
				629	/* UTF_8_MAX_LEN == 6 */
				630	(wc > 0x7fffffffUL)
				631	\|\| ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
				632	#endif
				633	\|\| ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
				634	) {
				635	__set_errno(EILSEQ);
				636	return (size_t) -1;
				637	}
				638	#else /* KUHN */
				639	#if UTF_8_MAX_LEN != 3
				640	if (wc > 0x7fffffffUL) { /* Value too large. */
				641	__set_errno(EILSEQ);
				642	return (size_t) -1;
				643	}
				644	#endif
				645	#endif /* KUHN */
				646
				647	wc >>= 1;
				648	p = s;
				649	do {
				650	++p;
				651	} while (wc >>= 5);
				652	wc = *swc;
				653	if ((len = p - s) > t) { /* Not enough space. */
				654	break;
				655	}
				656
				657	m = 0x80;
				658	while( p>s ) {
				659	m = (m >> 1) \| 0x80;
				660	*--p = (wc & 0x3f) \| 0x80;
				661	wc >>= 6;
				662	}
				663	*s \|= (m << 1);
				664	} else if (wc == 0) { /* End of string. */
				665	swc = NULL;
				666	break;
				667	}
				668
				669	++swc;
				670	--wn;
				671	t -= len;
				672	if (store) {
				673	s += len;
				674	}
				675	}
				676
				677	if (store) {
				678	src = (const wchar_t ) swc;
				679	}
				680
				681	return n - t;
				682	}
				683
				684
				685	#endif
				686	/**********************************************************************/
				687	#ifdef L_mbsnrtowcs
				688
				689	/* WARNING: We treat len as SIZE_MAX when dst is NULL! */
				690
				691	size_t mbsnrtowcs(wchar_t __restrict dst, const char *__restrict src,
				692	size_t NMC, size_t len, mbstate_t *__restrict ps)
				693	{
				694	static mbstate_t mbstate; /* Rely on bss 0-init. */
				695	wchar_t wcbuf[1];
				696	const char *s;
				697	size_t count;
				698	int incr;
				699
				700	if (!ps) {
				701	ps = &mbstate;
				702	}
				703
				704	#ifdef __CTYPE_HAS_UTF_8_LOCALES
				705	if (ENCODING == __ctype_encoding_utf8) {
				706	size_t r;
				707	return ((r = _wchar_utf8sntowcs(dst, len, src, NMC, ps, 1))
				708	!= (size_t) -2) ? r : 0;
				709	}
				710	#endif
				711	incr = 1;
				712	/* NOTE: The following is an AWFUL HACK! In order to support %s in
				713	* wprintf, we need to be able to compute the number of wchars needed
				714	* for the mbs conversion, not to exceed the precision specified.
				715	* But if dst is NULL, the return value is the length assuming a
				716	* sufficiently sized buffer. So, we allow passing of ((wchar_t *)ps)
				717	* as dst in order to flag that we really want the length, subject
				718	* to the restricted buffer size and no partial conversions.
				719	* See _wchar_utf8sntowcs() as well. */
				720	if (!dst \|\| (dst == ((wchar_t *)ps))) {
				721	if (!dst) {
				722	len = SIZE_MAX;
				723	}
				724	dst = wcbuf;
				725	incr = 0;
				726	}
				727
				728	/* Since all the following encodings are single-byte encodings... */
				729	if (len > NMC) {
				730	len = NMC;
				731	}
				732
				733	count = len;
				734	s = *src;
				735
				736	#ifdef __CTYPE_HAS_8_BIT_LOCALES
				737	if (ENCODING == __ctype_encoding_8_bit) {
				738	wchar_t wc;
				739	while (count) {
				740	if ((wc = ((unsigned char)(s))) >= 0x80) { / Non-ASCII... */
				741	wc -= 0x80;
				742	wc = __UCLIBC_CURLOCALE->tbl8c2wc[
				743	(__UCLIBC_CURLOCALE->idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
				744	<< Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
				745	if (!wc) {
				746	goto BAD;
				747	}
				748	}
				749	if (!(*dst = wc)) {
				750	s = NULL;
				751	break;
				752	}
				753	dst += incr;
				754	++s;
				755	--count;
				756	}
				757	if (dst != wcbuf) {
				758	*src = s;
				759	}
				760	return len - count;
				761	}
				762	#endif
				763
				764	#ifdef __UCLIBC_HAS_LOCALE__
				765	assert(ENCODING == __ctype_encoding_7_bit);
				766	#endif
				767
				768	while (count) {
				769	if ((dst = (unsigned char) s) == 0) {
				770	s = NULL;
				771	break;
				772	}
				773	if (*dst >= 0x80) {
				774	#ifdef __CTYPE_HAS_8_BIT_LOCALES
				775	BAD:
				776	#endif
				777	__set_errno(EILSEQ);
				778	return (size_t) -1;
				779	}
				780	++s;
				781	dst += incr;
				782	--count;
				783	}
				784	if (dst != wcbuf) {
				785	*src = s;
				786	}
				787	return len - count;
				788	}
				789	libc_hidden_def(mbsnrtowcs)
				790
				791	#endif
				792	/**********************************************************************/
				793	#ifdef L_wcsnrtombs
				794
				795	/* WARNING: We treat len as SIZE_MAX when dst is NULL! */
				796
				797	/* Note: We completely ignore ps in all currently supported conversions.
				798	* TODO: Check for valid state anyway? */
				799
				800	size_t wcsnrtombs(char __restrict dst, const wchar_t *__restrict src,
				801	size_t NWC, size_t len, mbstate_t *__restrict ps)
				802	{
				803	const __uwchar_t *s;
				804	size_t count;
				805	int incr;
				806	char buf[MB_LEN_MAX];
				807
				808	#ifdef __CTYPE_HAS_UTF_8_LOCALES
				809	if (ENCODING == __ctype_encoding_utf8) {
				810	return _wchar_wcsntoutf8s(dst, len, src, NWC);
				811	}
				812	#endif /* __CTYPE_HAS_UTF_8_LOCALES */
				813
				814	incr = 1;
				815	/* NOTE: The following is an AWFUL HACK! In order to support %ls in
				816	* printf, we need to be able to compute the number of bytes needed
				817	* for the mbs conversion, not to exceed the precision specified.
				818	* But if dst is NULL, the return value is the length assuming a
				819	* sufficiently sized buffer. So, we allow passing of (char *) src
				820	* as dst in order to flag that we really want the length, subject
				821	* to the restricted buffer size and no partial conversions.
				822	* See _wchar_wcsntoutf8s() as well. */
				823	if (!dst \|\| (dst == ((char *) src))) {
				824	if (!dst) {
				825	len = SIZE_MAX;
				826	}
				827	dst = buf;
				828	incr = 0;
				829	}
				830
				831	/* Since all the following encodings are single-byte encodings... */
				832	if (len > NWC) {
				833	len = NWC;
				834	}
				835
				836	count = len;
				837	s = (const __uwchar_t ) src;
				838
				839	#ifdef __CTYPE_HAS_8_BIT_LOCALES
				840	if (ENCODING == __ctype_encoding_8_bit) {
				841	__uwchar_t wc;
				842	__uwchar_t u;
				843	while (count) {
				844	if ((wc = *s) <= 0x7f) {
				845	if (!(*dst = (unsigned char) wc)) {
				846	s = NULL;
				847	break;
				848	}
				849	} else {
				850	u = 0;
				851	if (wc <= Cwc2c_DOMAIN_MAX) {
				852	u = __UCLIBC_CURLOCALE->idx8wc2c[wc >> (Cwc2c_TI_SHIFT
				853	+ Cwc2c_TT_SHIFT)];
				854	u = __UCLIBC_CURLOCALE->tbl8wc2c[(u << Cwc2c_TI_SHIFT)
				855	+ ((wc >> Cwc2c_TT_SHIFT)
				856	& ((1 << Cwc2c_TI_SHIFT)-1))];
				857	u = __UCLIBC_CURLOCALE->tbl8wc2c[Cwc2c_TI_LEN
				858	+ (u << Cwc2c_TT_SHIFT)
				859	+ (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
				860	}
				861
				862	#ifdef __WCHAR_REPLACEMENT_CHAR
				863	*dst = (unsigned char) ( u ? u : __WCHAR_REPLACEMENT_CHAR );
				864	#else /* __WCHAR_REPLACEMENT_CHAR */
				865	if (!u) {
				866	goto BAD;
				867	}
				868	*dst = (unsigned char) u;
				869	#endif /* __WCHAR_REPLACEMENT_CHAR */
				870	}
				871	++s;
				872	dst += incr;
				873	--count;
				874	}
				875	if (dst != buf) {
				876	src = (const wchar_t ) s;
				877	}
				878	return len - count;
				879	}
				880	#endif /* __CTYPE_HAS_8_BIT_LOCALES */
				881
				882	#ifdef __UCLIBC_HAS_LOCALE__
				883	assert(ENCODING == __ctype_encoding_7_bit);
				884	#endif
				885
				886	while (count) {
				887	if (*s >= 0x80) {
				888	#if defined(__CTYPE_HAS_8_BIT_LOCALES) && !defined(__WCHAR_REPLACEMENT_CHAR)
				889	BAD:
				890	#endif
				891	__set_errno(EILSEQ);
				892	return (size_t) -1;
				893	}
				894	if ((dst = (unsigned char) s) == 0) {
				895	s = NULL;
				896	break;
				897	}
				898	++s;
				899	dst += incr;
				900	--count;
				901	}
				902	if (dst != buf) {
				903	src = (const wchar_t ) s;
				904	}
				905	return len - count;
				906	}
				907	libc_hidden_def(wcsnrtombs)
				908
				909	#endif
				910	/**********************************************************************/
				911	#ifdef L_wcswidth
				912
				913
				914	#ifdef __UCLIBC_MJN3_ONLY__
				915	#warning REMINDER: If we start doing translit, wcwidth and wcswidth will need updating.
				916	#warning TODO: Update wcwidth to match latest by Kuhn.
				917	#endif
				918
				919	#if defined(__UCLIBC_HAS_LOCALE__) && \
				920	( defined(__CTYPE_HAS_8_BIT_LOCALES) \|\| defined(__CTYPE_HAS_UTF_8_LOCALES) )
				921
				922	static const unsigned char new_idx[] = {
				923	0, 5, 5, 6, 10, 15, 28, 39,
				924	48, 48, 71, 94, 113, 128, 139, 154,
				925	175, 186, 188, 188, 188, 188, 188, 188,
				926	203, 208, 208, 208, 208, 208, 208, 208,
				927	208, 219, 219, 219, 222, 222, 222, 222,
				928	222, 222, 222, 222, 222, 222, 222, 224,
				929	224, 231, 231, 231, 231, 231, 231, 231,
				930	231, 231, 231, 231, 231, 231, 231, 231,
				931	231, 231, 231, 231, 231, 231, 231, 231,
				932	231, 231, 231, 231, 231, 231, 231, 231,
				933	231, 231, 231, 231, 231, 231, 231, 231,
				934	231, 231, 231, 231, 231, 231, 231, 231,
				935	231, 231, 231, 231, 231, 231, 231, 231,
				936	231, 231, 231, 231, 231, 231, 231, 231,
				937	231, 231, 231, 231, 231, 231, 231, 231,
				938	231, 231, 231, 231, 231, 231, 231, 231,
				939	231, 231, 231, 231, 231, 231, 231, 231,
				940	231, 231, 231, 231, 231, 231, 231, 231,
				941	231, 231, 231, 231, 231, 231, 231, 231,
				942	231, 231, 231, 231, 231, 231, 231, 231,
				943	231, 231, 231, 231, 231, 233, 233, 233,
				944	233, 233, 233, 233, 234, 234, 234, 234,
				945	234, 234, 234, 234, 234, 234, 234, 234,
				946	234, 234, 234, 234, 234, 234, 234, 234,
				947	234, 234, 234, 234, 234, 234, 234, 234,
				948	234, 234, 234, 234, 234, 234, 234, 234,
				949	234, 234, 234, 234, 234, 234, 234, 234,
				950	236, 236, 236, 236, 236, 236, 236, 236,
				951	236, 236, 236, 236, 236, 236, 236, 236,
				952	236, 236, 236, 236, 236, 236, 236, 236,
				953	236, 236, 236, 236, 236, 236, 236, 236,
				954	236, 237, 237, 238, 241, 241, 242, 249,
				955	255,
				956	};
				957
				958	static const unsigned char new_tbl[] = {
				959	0x00, 0x01, 0x20, 0x7f, 0xa0, 0x00, 0x00, 0x50,
				960	0x60, 0x70, 0x00, 0x83, 0x87, 0x88, 0x8a, 0x00,
				961	0x91, 0xa2, 0xa3, 0xba, 0xbb, 0xbe, 0xbf, 0xc0,
				962	0xc1, 0xc3, 0xc4, 0xc5, 0x00, 0x4b, 0x56, 0x70,
				963	0x71, 0xd6, 0xe5, 0xe7, 0xe9, 0xea, 0xee, 0x00,
				964	0x0f, 0x10, 0x11, 0x12, 0x30, 0x4b, 0xa6, 0xb1,
				965	0x00, 0x01, 0x03, 0x3c, 0x3d, 0x41, 0x49, 0x4d,
				966	0x4e, 0x51, 0x55, 0x62, 0x64, 0x81, 0x82, 0xbc,
				967	0xbd, 0xc1, 0xc5, 0xcd, 0xce, 0xe2, 0xe4, 0x00,
				968	0x02, 0x03, 0x3c, 0x3d, 0x41, 0x43, 0x47, 0x49,
				969	0x4b, 0x4e, 0x70, 0x72, 0x81, 0x83, 0xbc, 0xbd,
				970	0xc1, 0xc6, 0xc7, 0xc9, 0xcd, 0xce, 0x00, 0x01,
				971	0x02, 0x3c, 0x3d, 0x3f, 0x40, 0x41, 0x44, 0x4d,
				972	0x4e, 0x56, 0x57, 0x82, 0x83, 0xc0, 0xc1, 0xcd,
				973	0xce, 0x00, 0x3e, 0x41, 0x46, 0x49, 0x4a, 0x4e,
				974	0x55, 0x57, 0xbf, 0xc0, 0xc6, 0xc7, 0xcc, 0xce,
				975	0x00, 0x41, 0x44, 0x4d, 0x4e, 0xca, 0xcb, 0xd2,
				976	0xd5, 0xd6, 0xd7, 0x00, 0x31, 0x32, 0x34, 0x3b,
				977	0x47, 0x4f, 0xb1, 0xb2, 0xb4, 0xba, 0xbb, 0xbd,
				978	0xc8, 0xce, 0x00, 0x18, 0x1a, 0x35, 0x36, 0x37,
				979	0x38, 0x39, 0x3a, 0x71, 0x7f, 0x80, 0x85, 0x86,
				980	0x88, 0x90, 0x98, 0x99, 0xbd, 0xc6, 0xc7, 0x00,
				981	0x2d, 0x31, 0x32, 0x33, 0x36, 0x38, 0x39, 0x3a,
				982	0x58, 0x5a, 0x00, 0x60, 0x00, 0x12, 0x15, 0x32,
				983	0x35, 0x52, 0x54, 0x72, 0x74, 0xb7, 0xbe, 0xc6,
				984	0xc7, 0xc9, 0xd4, 0x00, 0x0b, 0x0f, 0xa9, 0xaa,
				985	0x00, 0x0b, 0x10, 0x2a, 0x2f, 0x60, 0x64, 0x6a,
				986	0x70, 0xd0, 0xeb, 0x00, 0x29, 0x2b, 0x00, 0x80,
				987	0x00, 0x2a, 0x30, 0x3f, 0x40, 0x99, 0x9b, 0x00,
				988	0xd0, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x1e,
				989	0x1f, 0x00, 0x00, 0x10, 0x20, 0x24, 0x30, 0x70,
				990	0xff, 0x00, 0x61, 0xe0, 0xe7, 0xf9, 0xfc,
				991	};
				992
				993	static const signed char new_wtbl[] = {
				994	0, -1, 1, -1, 1, 1, 0, 1,
				995	0, 1, 1, 0, 1, 0, 1, 1,
				996	0, 1, 0, 1, 0, 1, 0, 1,
				997	0, 1, 0, 1, 1, 0, 1, 0,
				998	1, 0, 1, 0, 1, 0, 1, 1,
				999	0, 1, 0, 1, 0, 1, 0, 1,
				1000	1, 0, 1, 0, 1, 0, 1, 0,
				1001	1, 0, 1, 0, 1, 0, 1, 0,
				1002	1, 0, 1, 0, 1, 0, 1, 1,
				1003	0, 1, 0, 1, 0, 1, 0, 1,
				1004	0, 1, 0, 1, 0, 1, 0, 1,
				1005	0, 1, 0, 1, 0, 1, 1, 0,
				1006	1, 0, 1, 0, 1, 0, 1, 0,
				1007	1, 0, 1, 0, 1, 0, 1, 0,
				1008	1, 1, 0, 1, 0, 1, 0, 1,
				1009	0, 1, 0, 1, 0, 1, 0, 1,
				1010	1, 0, 1, 0, 1, 0, 1, 0,
				1011	1, 0, 1, 1, 0, 1, 0, 1,
				1012	0, 1, 0, 1, 0, 1, 0, 1,
				1013	0, 1, 1, 0, 1, 0, 1, 0,
				1014	1, 0, 1, 0, 1, 0, 1, 0,
				1015	1, 0, 1, 0, 1, 0, 1, 1,
				1016	0, 1, 0, 1, 0, 1, 0, 1,
				1017	0, 1, 2, 0, 1, 0, 1, 0,
				1018	1, 0, 1, 0, 1, 0, 1, 0,
				1019	1, 0, 1, 1, 0, 1, 0, 1,
				1020	1, 0, 1, 0, 1, 0, 1, 0,
				1021	1, 0, 1, 1, 2, 1, 1, 2,
				1022	2, 0, 2, 1, 2, 0, 2, 2,
				1023	1, 1, 2, 1, 1, 2, 1, 0,
				1024	1, 1, 0, 1, 0, 1, 2, 1,
				1025	0, 2, 1, 2, 1, 0, 1,
				1026	};
				1027
				1028
				1029	int wcswidth(const wchar_t *pwcs, size_t n)
				1030	{
				1031	int h, l, m, count;
				1032	wchar_t wc;
				1033	unsigned char b;
				1034
				1035	if (ENCODING == __ctype_encoding_7_bit) {
				1036	size_t i;
				1037
				1038	for (i = 0 ; (i < n) && pwcs[i] ; i++) {
				1039	if (pwcs[i] != (pwcs[i] & 0x7f)) {
				1040	return -1;
				1041	}
				1042	}
				1043	}
				1044	#ifdef __CTYPE_HAS_8_BIT_LOCALES
				1045	else if (ENCODING == __ctype_encoding_8_bit) {
				1046	mbstate_t mbstate;
				1047
				1048	mbstate.__mask = 0; /* Initialize the mbstate. */
				1049	if (wcsnrtombs(NULL, &pwcs, n, SIZE_MAX, &mbstate) == ((size_t) - 1)) {
				1050	return -1;
				1051	}
				1052	}
				1053	#endif /* __CTYPE_HAS_8_BIT_LOCALES */
				1054	#if defined(__CTYPE_HAS_UTF_8_LOCALES) && defined(KUHN)
				1055	/* For stricter handling of allowed unicode values... see comments above. */
				1056	else if (ENCODING == __ctype_encoding_utf8) {
				1057	size_t i;
				1058
				1059	for (i = 0 ; (i < n) && pwcs[i] ; i++) {
				1060	if ( (((__uwchar_t)((pwcs[i]) - 0xfffeU)) < 2)
				1061	\|\| (((__uwchar_t)((pwcs[i]) - 0xd800U)) < (0xe000U - 0xd800U))
				1062	) {
				1063	return -1;
				1064	}
				1065	}
				1066	}
				1067	#endif /* __CTYPE_HAS_UTF_8_LOCALES */
				1068
				1069	for (count = 0 ; n && (wc = *pwcs++) ; n--) {
				1070	if (wc <= 0xff) {
				1071	/* If we're here, wc != 0. */
				1072	if ((wc < 32) \|\| ((wc >= 0x7f) && (wc < 0xa0))) {
				1073	return -1;
				1074	}
				1075	++count;
				1076	continue;
				1077	}
				1078	if (((unsigned int) wc) <= 0xffff) {
				1079	b = wc & 0xff;
				1080	h = (wc >> 8);
				1081	l = new_idx[h];
				1082	h = new_idx[h+1];
				1083	while ((m = (l+h) >> 1) != l) {
				1084	if (b >= new_tbl[m]) {
				1085	l = m;
				1086	} else { /* wc < tbl[m] */
				1087	h = m;
				1088	}
				1089	}
				1090	count += new_wtbl[l]; /* none should be -1. */
				1091	continue;
				1092	}
				1093
				1094	/* Redo this to minimize average number of compares?*/
				1095	if (wc >= 0x1d167) {
				1096	if (wc <= 0x1d1ad) {
				1097	if ((wc <= 0x1d169
				1098	\|\| (wc >= 0x1d173
				1099	&& (wc <= 0x1d182
				1100	\|\| (wc >= 0x1d185
				1101	&& (wc <= 0x1d18b
				1102	\|\| (wc >= 0x1d1aa))))))
				1103	) {
				1104	continue;
				1105	}
				1106	} else if (((wc >= 0xe0020) && (wc <= 0xe007f)) \|\| (wc == 0xe0001)) {
				1107	continue;
				1108	} else if ((wc >= 0x20000) && (wc <= 0x2ffff)) {
				1109	++count; /* need 2.. add one here */
				1110	}
				1111	#if (WCHAR_MAX > 0x7fffffffL)
				1112	else if (wc > 0x7fffffffL) {
				1113	return -1;
				1114	}
				1115	#endif /* (WCHAR_MAX > 0x7fffffffL) */
				1116	}
				1117
				1118	++count;
				1119	}
				1120
				1121	return count;
				1122	}
				1123
				1124	#else /* __UCLIBC_HAS_LOCALE__ */
				1125
				1126	int wcswidth(const wchar_t *pwcs, size_t n)
				1127	{
				1128	int count;
				1129	wchar_t wc;
				1130	size_t i;
				1131
				1132	for (i = 0 ; (i < n) && pwcs[i] ; i++) {
				1133	if (pwcs[i] != (pwcs[i] & 0x7f)) {
				1134	return -1;
				1135	}
				1136	}
				1137
				1138	for (count = 0 ; n && (wc = *pwcs++) ; n--) {
				1139	if (wc <= 0xff) {
				1140	/* If we're here, wc != 0. */
				1141	if ((wc < 32) \|\| ((wc >= 0x7f) && (wc < 0xa0))) {
				1142	return -1;
				1143	}
				1144	++count;
				1145	continue;
				1146	} else {
				1147	return -1;
				1148	}
				1149	}
				1150
				1151	return count;
				1152	}
				1153
				1154	#endif /* __UCLIBC_HAS_LOCALE__ */
				1155
				1156	libc_hidden_def(wcswidth)
				1157
				1158	#endif
				1159	/**********************************************************************/
				1160	#ifdef L_wcwidth
				1161
				1162
				1163	int wcwidth(wchar_t wc)
				1164	{
				1165	return wcswidth(&wc, 1);
				1166	}
				1167
				1168	#endif
				1169	/**********************************************************************/
				1170
				1171
				1172	typedef struct {
				1173	mbstate_t tostate;
				1174	mbstate_t fromstate;
				1175	int tocodeset;
				1176	int fromcodeset;
				1177	int frombom;
				1178	int tobom;
				1179	int fromcodeset0;
				1180	int frombom0;
				1181	int tobom0;
				1182	int skip_invalid_input; /* To support iconv -c option. */
				1183	} _UC_iconv_t;
				1184
				1185	/* For the multibyte
				1186	* bit 0 means swap endian
				1187	* bit 1 means 2 byte
				1188	* bit 2 means 4 byte
				1189	*
				1190	*/
				1191
				1192	#if defined L_iconv && defined _LIBC
				1193	/* Used externally only by iconv utility */
				1194	extern const unsigned char __iconv_codesets[];
				1195	libc_hidden_proto(__iconv_codesets)
				1196	#endif
				1197
				1198	#if defined L_iconv \|\| defined L_iconv_main
				1199	const unsigned char __iconv_codesets[] =
				1200	"\x0a\xe0""WCHAR_T\x00" /* superset of UCS-4 but platform-endian */
				1201	#if __BYTE_ORDER == __BIG_ENDIAN
				1202	"\x08\xec""UCS-4\x00" /* always BE */
				1203	"\x0a\xec""UCS-4BE\x00"
				1204	"\x0a\xed""UCS-4LE\x00"
				1205	"\x09\xe4""UTF-32\x00" /* platform endian with BOM */
				1206	"\x0b\xe4""UTF-32BE\x00"
				1207	"\x0b\xe5""UTF-32LE\x00"
				1208	"\x08\xe2""UCS-2\x00" /* always BE */
				1209	"\x0a\xe2""UCS-2BE\x00"
				1210	"\x0a\xe3""UCS-2LE\x00"
				1211	"\x09\xea""UTF-16\x00" /* platform endian with BOM */
				1212	"\x0b\xea""UTF-16BE\x00"
				1213	"\x0b\xeb""UTF-16LE\x00"
				1214	#elif __BYTE_ORDER == __LITTLE_ENDIAN
				1215	"\x08\xed""UCS-4\x00" /* always BE */
				1216	"\x0a\xed""UCS-4BE\x00"
				1217	"\x0a\xec""UCS-4LE\x00"
				1218	"\x09\xf4""UTF-32\x00" /* platform endian with BOM */
				1219	"\x0b\xe5""UTF-32BE\x00"
				1220	"\x0b\xe4""UTF-32LE\x00"
				1221	"\x08\xe3""UCS-2\x00" /* always BE */
				1222	"\x0a\xe3""UCS-2BE\x00"
				1223	"\x0a\xe2""UCS-2LE\x00"
				1224	"\x09\xfa""UTF-16\x00" /* platform endian with BOM */
				1225	"\x0b\xeb""UTF-16BE\x00"
				1226	"\x0b\xea""UTF-16LE\x00"
				1227	#endif
				1228	"\x08\x02""UTF-8\x00"
				1229	"\x0b\x01""US-ASCII\x00"
				1230	"\x07\x01""ASCII"; /* Must be last! (special case to save a nul) */
				1231	#endif
				1232	#if defined L_iconv && defined _LIBC
				1233	libc_hidden_data_def(__iconv_codesets)
				1234	#endif
				1235
				1236
				1237	#ifdef L_iconv
				1238
				1239	#include <iconv.h>
				1240	#include <string.h>
				1241	#include <endian.h>
				1242	#include <byteswap.h>
				1243
				1244	#if (__BYTE_ORDER != __BIG_ENDIAN) && (__BYTE_ORDER != __LITTLE_ENDIAN)
				1245	#error unsupported endianness for iconv
				1246	#endif
				1247
				1248	#ifndef __CTYPE_HAS_8_BIT_LOCALES
				1249	#error currently iconv requires 8 bit locales
				1250	#endif
				1251	#ifndef __CTYPE_HAS_UTF_8_LOCALES
				1252	#error currently iconv requires UTF-8 locales
				1253	#endif
				1254
				1255
				1256	enum {
				1257	IC_WCHAR_T = 0xe0,
				1258	IC_MULTIBYTE = 0xe0,
				1259	#if __BYTE_ORDER == __BIG_ENDIAN
				1260	IC_UCS_4 = 0xec,
				1261	IC_UTF_32 = 0xe4,
				1262	IC_UCS_2 = 0xe2,
				1263	IC_UTF_16 = 0xea,
				1264	#else
				1265	IC_UCS_4 = 0xed,
				1266	IC_UTF_32 = 0xe5,
				1267	IC_UCS_2 = 0xe3,
				1268	IC_UTF_16 = 0xeb,
				1269	#endif
				1270	IC_UTF_8 = 2,
				1271	IC_ASCII = 1
				1272	};
				1273
				1274
				1275	static int find_codeset(const char *name)
				1276	{
				1277	const unsigned char *s;
				1278	int codeset;
				1279
				1280	for (s = __iconv_codesets; s; s += s) {
				1281	if (!strcasecmp((char*) (s + 2), name)) {
				1282	return s[1];
				1283	}
				1284	}
				1285
				1286	/* The following is ripped from find_locale in locale.c. */
				1287
				1288	/* TODO: maybe CODESET_LIST + s ??? /
				1289	/* 7bit is 1, UTF-8 is 2, 8-bit is >= 3 */
				1290	codeset = 2;
				1291	s = (const unsigned char *) __LOCALE_DATA_CODESET_LIST;
				1292	do {
				1293	++codeset; /* Increment codeset first. */
				1294	if (!strcasecmp(__LOCALE_DATA_CODESET_LIST+*s, name)) {
				1295	return codeset;
				1296	}
				1297	} while (*++s);
				1298
				1299	return 0; /* No matching codeset! */
				1300	}
				1301
				1302	iconv_t weak_function iconv_open(const char tocode, const char fromcode)
				1303	{
				1304	register _UC_iconv_t *px;
				1305	int tocodeset, fromcodeset;
				1306
				1307	if (((tocodeset = find_codeset(tocode)) != 0)
				1308	&& ((fromcodeset = find_codeset(fromcode)) != 0)) {
				1309	if ((px = malloc(sizeof(_UC_iconv_t))) != NULL) {
				1310	px->tocodeset = tocodeset;
				1311	px->tobom0 = px->tobom = (tocodeset >= 0xe0) ? (tocodeset & 0x10) >> 4 : 0;
				1312	px->fromcodeset0 = px->fromcodeset = fromcodeset;
				1313	px->frombom0 = px->frombom = (fromcodeset >= 0xe0) ? (fromcodeset & 0x10) >> 4 : 0;
				1314	px->skip_invalid_input = px->tostate.__mask
				1315	= px->fromstate.__mask = 0;
				1316	return (iconv_t) px;
				1317	}
				1318	} else {
				1319	__set_errno(EINVAL);
				1320	}
				1321	return (iconv_t)(-1);
				1322	}
				1323
				1324	int weak_function iconv_close(iconv_t cd)
				1325	{
				1326	free(cd);
				1327
				1328	return 0;
				1329	}
				1330
				1331	size_t weak_function iconv(iconv_t cd, char **__restrict inbuf,
				1332	size_t *__restrict inbytesleft,
				1333	char **__restrict outbuf,
				1334	size_t *__restrict outbytesleft)
				1335	{
				1336	_UC_iconv_t px = (_UC_iconv_t ) cd;
				1337	size_t nrcount, r;
				1338	wchar_t wc, wc2;
				1339	int inci, inco;
				1340
				1341	assert(px != (_UC_iconv_t *)(-1));
				1342	assert(sizeof(wchar_t) == 4);
				1343
				1344	if (!inbuf \|\| !inbuf) { / Need to reinitialze conversion state. */
				1345	/* Note: For shift-state encodings we possibly need to output the
				1346	* shift sequence to return to initial state! */
				1347	if ((px->fromcodeset & 0xf0) == 0xe0) {
				1348	}
				1349	px->tostate.__mask = px->fromstate.__mask = 0;
				1350	px->fromcodeset = px->fromcodeset0;
				1351	px->tobom = px->tobom0;
				1352	px->frombom = px->frombom0;
				1353	return 0;
				1354	}
				1355
				1356	nrcount = 0;
				1357	while (*inbytesleft) {
				1358	if (!*outbytesleft) {
				1359	TOO_BIG:
				1360	__set_errno(E2BIG);
				1361	return (size_t) -1;
				1362	}
				1363
				1364	inci = inco = 1;
				1365	if (px->fromcodeset >= IC_MULTIBYTE) {
				1366	inci = (px->fromcodeset == IC_WCHAR_T) ? 4: (px->fromcodeset & 6);
				1367	if (*inbytesleft < inci) goto INVALID;
				1368	wc = (((unsigned int)((unsigned char)((*inbuf)[0]))) << 8)
				1369	+ ((unsigned char)((*inbuf)[1]));
				1370	if (inci == 4) {
				1371	wc = (((unsigned int)((unsigned char)((*inbuf)[2]))) << 8)
				1372	+ ((unsigned char)((*inbuf)[3])) + (wc << 16);
				1373	if (!(px->fromcodeset & 1)) wc = bswap_32(wc);
				1374	} else {
				1375	if (!(px->fromcodeset & 1)) wc = bswap_16(wc);
				1376	if (((px->fromcodeset & IC_UTF_16) == IC_UTF_16)
				1377	&& (((__uwchar_t)(wc - 0xd800U)) < (0xdc00U - 0xd800U))
				1378	) { /* surrogate */
				1379	wc =- 0xd800U;
				1380	if (*inbytesleft < 4) goto INVALID;
				1381	wc2 = (((unsigned int)((unsigned char)((*inbuf)[2]))) << 8)
				1382	+ ((unsigned char)((*inbuf)[3]));
				1383	if (!(px->fromcodeset & 1)) wc = bswap_16(wc2);
				1384	if (((__uwchar_t)(wc2 -= 0xdc00U)) < (0xe0000U - 0xdc00U)) {
				1385	goto ILLEGAL;
				1386	}
				1387	inci = 4; /* Change inci here in case skipping illegals. */
				1388	wc = 0x10000UL + (wc << 10) + wc2;
				1389	}
				1390	}
				1391
				1392	if (px->frombom) {
				1393	px->frombom = 0;
				1394	if ((wc == 0xfeffU)
				1395	\|\| (wc == ((inci == 4)
				1396	? (((wchar_t) 0xfffe0000UL))
				1397	: ((wchar_t)(0xfffeUL))))
				1398	) {
				1399	if (wc != 0xfeffU) {
				1400	px->fromcodeset ^= 1; /* toggle endianness */
				1401	wc = 0xfeffU;
				1402	}
				1403	if (!px->frombom) {
				1404	goto BOM_SKIP_OUTPUT;
				1405	}
				1406	goto GOT_BOM;
				1407	}
				1408	}
				1409
				1410	if (px->fromcodeset != IC_WCHAR_T) {
				1411	if (((__uwchar_t) wc) > (((px->fromcodeset & IC_UCS_4) == IC_UCS_4)
				1412	? 0x7fffffffUL : 0x10ffffUL)
				1413	#ifdef KUHN
				1414	\|\| (((__uwchar_t)(wc - 0xfffeU)) < 2)
				1415	\|\| (((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U))
				1416	#endif
				1417	) {
				1418	goto ILLEGAL;
				1419	}
				1420	}
				1421	} else if (px->fromcodeset == IC_UTF_8) {
				1422	const char p = inbuf;
				1423	r = _wchar_utf8sntowcs(&wc, 1, &p, *inbytesleft, &px->fromstate, 0);
				1424	if (((ssize_t) r) <= 0) { /* either EILSEQ or incomplete or nul */
				1425	if (((ssize_t) r) < 0) { /* either EILSEQ or incomplete or nul */
				1426	assert((r == (size_t)(-1)) \|\| (r == (size_t)(-2)));
				1427	if (r == (size_t)(-2)) {
				1428	INVALID:
				1429	__set_errno(EINVAL);
				1430	} else {
				1431	px->fromstate.__mask = 0;
				1432	inci = 1;
				1433	ILLEGAL:
				1434	if (px->skip_invalid_input) {
				1435	px->skip_invalid_input = 2; /* flag for iconv utility */
				1436	goto BOM_SKIP_OUTPUT;
				1437	}
				1438	__set_errno(EILSEQ);
				1439	}
				1440	return (size_t)(-1);
				1441	}
				1442	#ifdef __UCLIBC_MJN3_ONLY__
				1443	#warning TODO: optimize this.
				1444	#endif
				1445	if (p != NULL) { /* incomplete char case */
				1446	goto INVALID;
				1447	}
				1448	p = inbuf + 1; / nul */
				1449	}
				1450	inci = p - *inbuf;
				1451	} else if ((wc = ((unsigned char)(*inbuf))) >= 0x80) { / Non-ASCII... */
				1452	if (px->fromcodeset == IC_ASCII) { /* US-ASCII codeset */
				1453	goto ILLEGAL;
				1454	} else { /* some other 8-bit ascii-extension codeset */
				1455	const __codeset_8_bit_t *c8b
				1456	= __locale_mmap->codeset_8_bit + px->fromcodeset - 3;
				1457	wc -= 0x80;
				1458	wc = __UCLIBC_CURLOCALE->tbl8c2wc[
				1459	(c8b->idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
				1460	<< Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
				1461	if (!wc) {
				1462	goto ILLEGAL;
				1463	}
				1464	}
				1465	}
				1466
				1467
				1468	if (px->tobom) {
				1469	inci = 0;
				1470	wc = 0xfeffU;
				1471	GOT_BOM:
				1472	px->tobom = 0;
				1473	}
				1474
				1475	if (px->tocodeset >= IC_MULTIBYTE) {
				1476	inco = (px->tocodeset == IC_WCHAR_T) ? 4: (px->tocodeset & 6);
				1477	if (*outbytesleft < inco) goto TOO_BIG;
				1478	if (px->tocodeset != IC_WCHAR_T) {
				1479	if (((__uwchar_t) wc) > (((px->tocodeset & IC_UCS_4) == IC_UCS_4)
				1480	? 0x7fffffffUL : 0x10ffffUL)
				1481	#ifdef KUHN
				1482	\|\| (((__uwchar_t)(wc - 0xfffeU)) < 2)
				1483	\|\| (((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U))
				1484	#endif
				1485	) {
				1486	REPLACE_32:
				1487	wc = 0xfffd;
				1488	++nrcount;
				1489	}
				1490	}
				1491	if (inco == 4) {
				1492	if (px->tocodeset & 1) wc = bswap_32(wc);
				1493	} else {
				1494	if (((__uwchar_t)wc ) > 0xffffU) {
				1495	if ((px->tocodeset & IC_UTF_16) != IC_UTF_16) {
				1496	goto REPLACE_32;
				1497	}
				1498	if (*outbytesleft < (inco = 4)) goto TOO_BIG;
				1499	wc2 = 0xdc00U + (wc & 0x3ff);
				1500	wc = 0xd800U + ((wc >> 10) & 0x3ff);
				1501	if (px->tocodeset & 1) {
				1502	wc = bswap_16(wc);
				1503	wc2 = bswap_16(wc2);
				1504	}
				1505	wc += (wc2 << 16);
				1506	} else if (px->tocodeset & 1) wc = bswap_16(wc);
				1507	}
				1508	(*outbuf)[0] = (char)((unsigned char)(wc));
				1509	(*outbuf)[1] = (char)((unsigned char)(wc >> 8));
				1510	if (inco == 4) {
				1511	(*outbuf)[2] = (char)((unsigned char)(wc >> 16));
				1512	(*outbuf)[3] = (char)((unsigned char)(wc >> 24));
				1513	}
				1514	} else if (px->tocodeset == IC_UTF_8) {
				1515	const wchar_t *pw = &wc;
				1516	do {
				1517	r = _wchar_wcsntoutf8s(outbuf, outbytesleft, &pw, 1);
				1518	if (r != (size_t)(-1)) {
				1519	#ifdef __UCLIBC_MJN3_ONLY__
				1520	#warning TODO: What happens for a nul?
				1521	#endif
				1522	if (r == 0) {
				1523	if (wc != 0) {
				1524	goto TOO_BIG;
				1525	}
				1526	++r;
				1527	}
				1528	break;
				1529	}
				1530	wc = 0xfffdU;
				1531	++nrcount;
				1532	} while (1);
				1533	inco = r;
				1534	} else if (((__uwchar_t)(wc)) < 0x80) {
				1535	CHAR_GOOD:
				1536	**outbuf = wc;
				1537	} else {
				1538	if ((px->tocodeset != 0x01) && (wc <= Cwc2c_DOMAIN_MAX)) {
				1539	const __codeset_8_bit_t *c8b
				1540	= __locale_mmap->codeset_8_bit + px->tocodeset - 3;
				1541	__uwchar_t u;
				1542	u = c8b->idx8wc2c[wc >> (Cwc2c_TI_SHIFT + Cwc2c_TT_SHIFT)];
				1543	u = __UCLIBC_CURLOCALE->tbl8wc2c[(u << Cwc2c_TI_SHIFT)
				1544	+ ((wc >> Cwc2c_TT_SHIFT)
				1545	& ((1 << Cwc2c_TI_SHIFT)-1))];
				1546	wc = __UCLIBC_CURLOCALE->tbl8wc2c[Cwc2c_TI_LEN
				1547	+ (u << Cwc2c_TT_SHIFT)
				1548	+ (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
				1549	if (wc) {
				1550	goto CHAR_GOOD;
				1551	}
				1552	}
				1553	**outbuf = '?';
				1554	++nrcount;
				1555	}
				1556
				1557	*outbuf += inco;
				1558	*outbytesleft -= inco;
				1559	BOM_SKIP_OUTPUT:
				1560	*inbuf += inci;
				1561	*inbytesleft -= inci;
				1562	}
				1563	return nrcount;
				1564	}
				1565	#endif