blob: 0c175d19d197111e02ebf3fdddb74a6d27d78401 [file] [log] [blame]
lh9ed821d2023-04-07 01:36:19 -07001/* Copyright (C) 1999-2015 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>.
17
18 As a special exception, if you link the code in this file with
19 files compiled with a GNU compiler to produce an executable,
20 that does not cause the resulting executable to be covered by
21 the GNU Lesser General Public License. This exception does not
22 however invalidate any other reasons why the executable file
23 might be covered by the GNU Lesser General Public License.
24 This exception applies to code released by its copyright holders
25 in files containing the exception. */
26
27#include <libioP.h>
28#ifdef _LIBC
29# include <dlfcn.h>
30# include <wchar.h>
31#endif
32#include <assert.h>
33#include <stdlib.h>
34#include <string.h>
35
36#ifdef _LIBC
37# include <langinfo.h>
38# include <locale/localeinfo.h>
39# include <wcsmbs/wcsmbsload.h>
40# include <iconv/gconv_int.h>
41# include <shlib-compat.h>
42# include <sysdep.h>
43#endif
44
45
46/* Prototypes of libio's codecvt functions. */
47static enum __codecvt_result do_out (struct _IO_codecvt *codecvt,
48 __mbstate_t *statep,
49 const wchar_t *from_start,
50 const wchar_t *from_end,
51 const wchar_t **from_stop, char *to_start,
52 char *to_end, char **to_stop);
53static enum __codecvt_result do_unshift (struct _IO_codecvt *codecvt,
54 __mbstate_t *statep, char *to_start,
55 char *to_end, char **to_stop);
56static enum __codecvt_result do_in (struct _IO_codecvt *codecvt,
57 __mbstate_t *statep,
58 const char *from_start,
59 const char *from_end,
60 const char **from_stop, wchar_t *to_start,
61 wchar_t *to_end, wchar_t **to_stop);
62static int do_encoding (struct _IO_codecvt *codecvt);
63static int do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep,
64 const char *from_start,
65 const char *from_end, _IO_size_t max);
66static int do_max_length (struct _IO_codecvt *codecvt);
67static int do_always_noconv (struct _IO_codecvt *codecvt);
68
69
70/* The functions used in `codecvt' for libio are always the same. */
71const struct _IO_codecvt __libio_codecvt =
72{
73 .__codecvt_destr = NULL, /* Destructor, never used. */
74 .__codecvt_do_out = do_out,
75 .__codecvt_do_unshift = do_unshift,
76 .__codecvt_do_in = do_in,
77 .__codecvt_do_encoding = do_encoding,
78 .__codecvt_do_always_noconv = do_always_noconv,
79 .__codecvt_do_length = do_length,
80 .__codecvt_do_max_length = do_max_length
81};
82
83
84/* Return orientation of stream. If mode is nonzero try to change
85 the orientation first. */
86#undef _IO_fwide
87int
88_IO_fwide (fp, mode)
89 _IO_FILE *fp;
90 int mode;
91{
92 /* Normalize the value. */
93 mode = mode < 0 ? -1 : (mode == 0 ? 0 : 1);
94
95#if defined SHARED && defined _LIBC \
96 && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1)
97 if (__builtin_expect (&_IO_stdin_used == NULL, 0)
98 && (fp == _IO_stdin || fp == _IO_stdout || fp == _IO_stderr))
99 /* This is for a stream in the glibc 2.0 format. */
100 return -1;
101#endif
102
103 /* The orientation already has been determined. */
104 if (fp->_mode != 0
105 /* Or the caller simply wants to know about the current orientation. */
106 || mode == 0)
107 return fp->_mode;
108
109 /* Set the orientation appropriately. */
110 if (mode > 0)
111 {
112 struct _IO_codecvt *cc = fp->_codecvt = &fp->_wide_data->_codecvt;
113
114 fp->_wide_data->_IO_read_ptr = fp->_wide_data->_IO_read_end;
115 fp->_wide_data->_IO_write_ptr = fp->_wide_data->_IO_write_base;
116
117 /* Get the character conversion functions based on the currently
118 selected locale for LC_CTYPE. */
119#ifdef _LIBC
120 {
121 /* Clear the state. We start all over again. */
122 memset (&fp->_wide_data->_IO_state, '\0', sizeof (__mbstate_t));
123 memset (&fp->_wide_data->_IO_last_state, '\0', sizeof (__mbstate_t));
124
125 struct gconv_fcts fcts;
126 __wcsmbs_clone_conv (&fcts);
127 assert (fcts.towc_nsteps == 1);
128 assert (fcts.tomb_nsteps == 1);
129
130 /* The functions are always the same. */
131 *cc = __libio_codecvt;
132
133 cc->__cd_in.__cd.__nsteps = fcts.towc_nsteps;
134 cc->__cd_in.__cd.__steps = fcts.towc;
135
136 cc->__cd_in.__cd.__data[0].__invocation_counter = 0;
137 cc->__cd_in.__cd.__data[0].__internal_use = 1;
138 cc->__cd_in.__cd.__data[0].__flags = __GCONV_IS_LAST;
139 cc->__cd_in.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
140
141 cc->__cd_out.__cd.__nsteps = fcts.tomb_nsteps;
142 cc->__cd_out.__cd.__steps = fcts.tomb;
143
144 cc->__cd_out.__cd.__data[0].__invocation_counter = 0;
145 cc->__cd_out.__cd.__data[0].__internal_use = 1;
146 cc->__cd_out.__cd.__data[0].__flags
147 = __GCONV_IS_LAST | __GCONV_TRANSLIT;
148 cc->__cd_out.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
149 }
150#else
151# ifdef _GLIBCPP_USE_WCHAR_T
152 {
153 /* Determine internal and external character sets.
154
155 XXX For now we make our life easy: we assume a fixed internal
156 encoding (as most sane systems have; hi HP/UX!). If somebody
157 cares about systems which changing internal charsets they
158 should come up with a solution for the determination of the
159 currently used internal character set. */
160 const char *internal_ccs = _G_INTERNAL_CCS;
161 const char *external_ccs = NULL;
162
163# ifdef HAVE_NL_LANGINFO
164 external_ccs = nl_langinfo (CODESET);
165# endif
166 if (external_ccs == NULL)
167 external_ccs = "ISO-8859-1";
168
169 cc->__cd_in = iconv_open (internal_ccs, external_ccs);
170 if (cc->__cd_in != (iconv_t) -1)
171 cc->__cd_out = iconv_open (external_ccs, internal_ccs);
172
173 if (cc->__cd_in == (iconv_t) -1 || cc->__cd_out == (iconv_t) -1)
174 {
175 if (cc->__cd_in != (iconv_t) -1)
176 iconv_close (cc->__cd_in);
177 /* XXX */
178 abort ();
179 }
180 }
181# else
182# error "somehow determine this from LC_CTYPE"
183# endif
184#endif
185
186 /* From now on use the wide character callback functions. */
187 _IO_JUMPS_FILE_plus (fp) = fp->_wide_data->_wide_vtable;
188 }
189
190 /* Set the mode now. */
191 fp->_mode = mode;
192
193 return mode;
194}
195
196
197static enum __codecvt_result
198do_out (struct _IO_codecvt *codecvt, __mbstate_t *statep,
199 const wchar_t *from_start, const wchar_t *from_end,
200 const wchar_t **from_stop, char *to_start, char *to_end,
201 char **to_stop)
202{
203 enum __codecvt_result result;
204
205#ifdef _LIBC
206 struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
207 int status;
208 size_t dummy;
209 const unsigned char *from_start_copy = (unsigned char *) from_start;
210
211 codecvt->__cd_out.__cd.__data[0].__outbuf = (unsigned char *) to_start;
212 codecvt->__cd_out.__cd.__data[0].__outbufend = (unsigned char *) to_end;
213 codecvt->__cd_out.__cd.__data[0].__statep = statep;
214
215 __gconv_fct fct = gs->__fct;
216#ifdef PTR_DEMANGLE
217 if (gs->__shlib_handle != NULL)
218 PTR_DEMANGLE (fct);
219#endif
220
221 status = DL_CALL_FCT (fct,
222 (gs, codecvt->__cd_out.__cd.__data, &from_start_copy,
223 (const unsigned char *) from_end, NULL,
224 &dummy, 0, 0));
225
226 *from_stop = (wchar_t *) from_start_copy;
227 *to_stop = (char *) codecvt->__cd_out.__cd.__data[0].__outbuf;
228
229 switch (status)
230 {
231 case __GCONV_OK:
232 case __GCONV_EMPTY_INPUT:
233 result = __codecvt_ok;
234 break;
235
236 case __GCONV_FULL_OUTPUT:
237 case __GCONV_INCOMPLETE_INPUT:
238 result = __codecvt_partial;
239 break;
240
241 default:
242 result = __codecvt_error;
243 break;
244 }
245#else
246# ifdef _GLIBCPP_USE_WCHAR_T
247 size_t res;
248 const char *from_start_copy = (const char *) from_start;
249 size_t from_len = from_end - from_start;
250 char *to_start_copy = to_start;
251 size_t to_len = to_end - to_start;
252 res = iconv (codecvt->__cd_out, &from_start_copy, &from_len,
253 &to_start_copy, &to_len);
254
255 if (res == 0 || from_len == 0)
256 result = __codecvt_ok;
257 else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
258 result = __codecvt_partial;
259 else
260 result = __codecvt_error;
261
262# else
263 /* Decide what to do. */
264 result = __codecvt_error;
265# endif
266#endif
267
268 return result;
269}
270
271
272static enum __codecvt_result
273do_unshift (struct _IO_codecvt *codecvt, __mbstate_t *statep,
274 char *to_start, char *to_end, char **to_stop)
275{
276 enum __codecvt_result result;
277
278#ifdef _LIBC
279 struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
280 int status;
281 size_t dummy;
282
283 codecvt->__cd_out.__cd.__data[0].__outbuf = (unsigned char *) to_start;
284 codecvt->__cd_out.__cd.__data[0].__outbufend = (unsigned char *) to_end;
285 codecvt->__cd_out.__cd.__data[0].__statep = statep;
286
287 __gconv_fct fct = gs->__fct;
288#ifdef PTR_DEMANGLE
289 if (gs->__shlib_handle != NULL)
290 PTR_DEMANGLE (fct);
291#endif
292
293 status = DL_CALL_FCT (fct,
294 (gs, codecvt->__cd_out.__cd.__data, NULL, NULL,
295 NULL, &dummy, 1, 0));
296
297 *to_stop = (char *) codecvt->__cd_out.__cd.__data[0].__outbuf;
298
299 switch (status)
300 {
301 case __GCONV_OK:
302 case __GCONV_EMPTY_INPUT:
303 result = __codecvt_ok;
304 break;
305
306 case __GCONV_FULL_OUTPUT:
307 case __GCONV_INCOMPLETE_INPUT:
308 result = __codecvt_partial;
309 break;
310
311 default:
312 result = __codecvt_error;
313 break;
314 }
315#else
316# ifdef _GLIBCPP_USE_WCHAR_T
317 size_t res;
318 char *to_start_copy = (char *) to_start;
319 size_t to_len = to_end - to_start;
320
321 res = iconv (codecvt->__cd_out, NULL, NULL, &to_start_copy, &to_len);
322
323 if (res == 0)
324 result = __codecvt_ok;
325 else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
326 result = __codecvt_partial;
327 else
328 result = __codecvt_error;
329# else
330 /* Decide what to do. */
331 result = __codecvt_error;
332# endif
333#endif
334
335 return result;
336}
337
338
339static enum __codecvt_result
340do_in (struct _IO_codecvt *codecvt, __mbstate_t *statep,
341 const char *from_start, const char *from_end, const char **from_stop,
342 wchar_t *to_start, wchar_t *to_end, wchar_t **to_stop)
343{
344 enum __codecvt_result result;
345
346#ifdef _LIBC
347 struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
348 int status;
349 size_t dummy;
350 const unsigned char *from_start_copy = (unsigned char *) from_start;
351
352 codecvt->__cd_in.__cd.__data[0].__outbuf = (unsigned char *) to_start;
353 codecvt->__cd_in.__cd.__data[0].__outbufend = (unsigned char *) to_end;
354 codecvt->__cd_in.__cd.__data[0].__statep = statep;
355
356 __gconv_fct fct = gs->__fct;
357#ifdef PTR_DEMANGLE
358 if (gs->__shlib_handle != NULL)
359 PTR_DEMANGLE (fct);
360#endif
361
362 status = DL_CALL_FCT (fct,
363 (gs, codecvt->__cd_in.__cd.__data, &from_start_copy,
364 (const unsigned char *) from_end, NULL,
365 &dummy, 0, 0));
366
367 *from_stop = (const char *) from_start_copy;
368 *to_stop = (wchar_t *) codecvt->__cd_in.__cd.__data[0].__outbuf;
369
370 switch (status)
371 {
372 case __GCONV_OK:
373 case __GCONV_EMPTY_INPUT:
374 result = __codecvt_ok;
375 break;
376
377 case __GCONV_FULL_OUTPUT:
378 case __GCONV_INCOMPLETE_INPUT:
379 result = __codecvt_partial;
380 break;
381
382 default:
383 result = __codecvt_error;
384 break;
385 }
386#else
387# ifdef _GLIBCPP_USE_WCHAR_T
388 size_t res;
389 const char *from_start_copy = (const char *) from_start;
390 size_t from_len = from_end - from_start;
391 char *to_start_copy = (char *) from_start;
392 size_t to_len = to_end - to_start;
393
394 res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
395 &to_start_copy, &to_len);
396
397 if (res == 0)
398 result = __codecvt_ok;
399 else if (to_len == 0)
400 result = __codecvt_partial;
401 else if (from_len < codecvt->__codecvt_do_max_length (codecvt))
402 result = __codecvt_partial;
403 else
404 result = __codecvt_error;
405# else
406 /* Decide what to do. */
407 result = __codecvt_error;
408# endif
409#endif
410
411 return result;
412}
413
414
415static int
416do_encoding (struct _IO_codecvt *codecvt)
417{
418#ifdef _LIBC
419 /* See whether the encoding is stateful. */
420 if (codecvt->__cd_in.__cd.__steps[0].__stateful)
421 return -1;
422 /* Fortunately not. Now determine the input bytes for the conversion
423 necessary for each wide character. */
424 if (codecvt->__cd_in.__cd.__steps[0].__min_needed_from
425 != codecvt->__cd_in.__cd.__steps[0].__max_needed_from)
426 /* Not a constant value. */
427 return 0;
428
429 return codecvt->__cd_in.__cd.__steps[0].__min_needed_from;
430#else
431 /* Worst case scenario. */
432 return -1;
433#endif
434}
435
436
437static int
438do_always_noconv (struct _IO_codecvt *codecvt)
439{
440 return 0;
441}
442
443
444static int
445do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep,
446 const char *from_start, const char *from_end, _IO_size_t max)
447{
448 int result;
449#ifdef _LIBC
450 const unsigned char *cp = (const unsigned char *) from_start;
451 wchar_t to_buf[max];
452 struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
453 size_t dummy;
454
455 codecvt->__cd_in.__cd.__data[0].__outbuf = (unsigned char *) to_buf;
456 codecvt->__cd_in.__cd.__data[0].__outbufend = (unsigned char *) &to_buf[max];
457 codecvt->__cd_in.__cd.__data[0].__statep = statep;
458
459 __gconv_fct fct = gs->__fct;
460#ifdef PTR_DEMANGLE
461 if (gs->__shlib_handle != NULL)
462 PTR_DEMANGLE (fct);
463#endif
464
465 DL_CALL_FCT (fct,
466 (gs, codecvt->__cd_in.__cd.__data, &cp,
467 (const unsigned char *) from_end, NULL,
468 &dummy, 0, 0));
469
470 result = cp - (const unsigned char *) from_start;
471#else
472# ifdef _GLIBCPP_USE_WCHAR_T
473 const char *from_start_copy = (const char *) from_start;
474 size_t from_len = from_end - from_start;
475 wchar_t to_buf[max];
476 size_t res;
477 char *to_start = (char *) to_buf;
478
479 res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
480 &to_start, &max);
481
482 result = from_start_copy - (char *) from_start;
483# else
484 /* Decide what to do. */
485 result = 0;
486# endif
487#endif
488
489 return result;
490}
491
492
493static int
494do_max_length (struct _IO_codecvt *codecvt)
495{
496#ifdef _LIBC
497 return codecvt->__cd_in.__cd.__steps[0].__max_needed_from;
498#else
499 return MB_CUR_MAX;
500#endif
501}