lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame] | 1 | |
| 2 | /* Copyright (C) 2002, 2003, 2004 Manuel Novoa III |
| 3 | * |
| 4 | * This library is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU Library General Public |
| 6 | * License as published by the Free Software Foundation; either |
| 7 | * version 2 of the License, or (at your option) any later version. |
| 8 | * |
| 9 | * This library is distributed in the hope that it will be useful, |
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 12 | * Library General Public License for more details. |
| 13 | * |
| 14 | * You should have received a copy of the GNU Library General Public |
| 15 | * License along with this library; if not, write to the Free |
| 16 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
| 17 | */ |
| 18 | |
| 19 | /* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! |
| 20 | * |
| 21 | * Besides uClibc, I'm using this code in my libc for elks, which is |
| 22 | * a 16-bit environment with a fairly limited compiler. It would make |
| 23 | * things much easier for me if this file isn't modified unnecessarily. |
| 24 | * In particular, please put any new or replacement functions somewhere |
| 25 | * else, and modify the makefile to use your version instead. |
| 26 | * Thanks. Manuel |
| 27 | * |
| 28 | * ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */ |
| 29 | |
| 30 | |
| 31 | /* May 23, 2002 Initial Notes: |
| 32 | * |
| 33 | * I'm still tweaking this stuff, but it passes the tests I've thrown |
| 34 | * at it, and Erik needs it for the gcc port. The glibc extension |
| 35 | * __wcsnrtombs() hasn't been tested, as I didn't find a test for it |
| 36 | * in the glibc source. I also need to fix the behavior of |
| 37 | * _wchar_utf8sntowcs() if the max number of wchars to convert is 0. |
| 38 | * |
| 39 | * UTF-8 -> wchar -> UTF-8 conversion tests on Markus Kuhn's UTF-8-demo.txt |
| 40 | * file on my platform (x86) show about 5-10% faster conversion speed than |
| 41 | * glibc with mbsrtowcs()/wcsrtombs() and almost twice as fast as glibc with |
| 42 | * individual mbrtowc()/wcrtomb() calls. |
| 43 | * |
| 44 | * If 'DECODER' is defined, then _wchar_utf8sntowcs() will be compiled |
| 45 | * as a fail-safe UTF-8 decoder appropriate for a terminal, etc. which |
| 46 | * needs to deal gracefully with whatever is sent to it. In that mode, |
| 47 | * it passes Markus Kuhn's UTF-8-test.txt stress test. I plan to add |
| 48 | * an arg to force that behavior, so the interface will be changing. |
| 49 | * |
| 50 | * I need to fix the error checking for 16-bit wide chars. This isn't |
| 51 | * an issue for uClibc, but may be for ELKS. I'm currently not sure |
| 52 | * if I'll use 16-bit, 32-bit, or configureable wchars in ELKS. |
| 53 | * |
| 54 | * July 1, 2002 |
| 55 | * |
| 56 | * Fixed _wchar_utf8sntowcs() for the max number of wchars == 0 case. |
| 57 | * Fixed nul-char bug in btowc(), and another in __mbsnrtowcs() for 8-bit |
| 58 | * locales. |
| 59 | * Enabled building of a C/POSIX-locale-only version, so full locale support |
| 60 | * no longer needs to be enabled. |
| 61 | * |
| 62 | * Nov 4, 2002 |
| 63 | * |
| 64 | * Fixed a bug in _wchar_wcsntoutf8s(). Don't store wcs position if dst is NULL. |
| 65 | * Also, introduce an awful hack into _wchar_wcsntoutf8s() and wcsrtombs() in |
| 66 | * order to support %ls in printf. See comments below for details. |
| 67 | * Change behaviour of wc<->mb functions when in the C locale. Now they do |
| 68 | * a 1-1 map for the range 0x80-UCHAR_MAX. This is for backwards compatibility |
| 69 | * and consistency with the stds requirements that a printf format string by |
| 70 | * a valid multibyte string beginning and ending in it's initial shift state. |
| 71 | * |
| 72 | * Nov 5, 2002 |
| 73 | * |
| 74 | * Forgot to change btowc and wctob when I changed the wc<->mb functions yesterday. |
| 75 | * |
| 76 | * Nov 7, 2002 |
| 77 | * |
| 78 | * Add wcwidth and wcswidth, based on Markus Kuhn's wcwidth of 2002-05-08. |
| 79 | * Added some size/speed optimizations and integrated it into my locale |
| 80 | * framework. Minimally tested at the moment, but the stub C-locale |
| 81 | * version (which most people would probably be using) should be fine. |
| 82 | * |
| 83 | * Nov 21, 2002 |
| 84 | * |
| 85 | * Revert the wc<->mb changes from earlier this month involving the C-locale. |
| 86 | * Add a couple of ugly hacks to support *wprintf. |
| 87 | * Add a mini iconv() and iconv implementation (requires locale support). |
| 88 | * |
| 89 | * Aug 1, 2003 |
| 90 | * Bug fix for mbrtowc. |
| 91 | * |
| 92 | * Aug 18, 2003 |
| 93 | * Bug fix: _wchar_utf8sntowcs and _wchar_wcsntoutf8s now set errno if EILSEQ. |
| 94 | * |
| 95 | * Feb 11, 2004 |
| 96 | * Bug fix: Fix size check for remaining output space in iconv(). |
| 97 | * |
| 98 | * Manuel |
| 99 | */ |
| 100 | |
| 101 | #include "porting.h" |
| 102 | #include <string.h> |
| 103 | #include <iconv.h> |
| 104 | #include <stdarg.h> |
| 105 | #include <libgen.h> |
| 106 | #include <wchar.h> |
| 107 | #include "wchar.c" /* for _UC_iconv_t and __iconv_codesets */ |
| 108 | |
| 109 | extern const unsigned char __iconv_codesets[]; |
| 110 | |
| 111 | #define IBUF BUFSIZ |
| 112 | #define OBUF BUFSIZ |
| 113 | |
| 114 | static char *progname; |
| 115 | static int hide_errors; |
| 116 | |
| 117 | static void error_msg(const char *fmt, ...) |
| 118 | __attribute__ ((noreturn, format (printf, 1, 2))); |
| 119 | |
| 120 | static void error_msg(const char *fmt, ...) |
| 121 | { |
| 122 | va_list arg; |
| 123 | |
| 124 | if (!hide_errors) { |
| 125 | fprintf(stderr, "%s: ", progname); |
| 126 | va_start(arg, fmt); |
| 127 | vfprintf(stderr, fmt, arg); |
| 128 | va_end(arg); |
| 129 | } |
| 130 | |
| 131 | exit(EXIT_FAILURE); |
| 132 | } |
| 133 | |
| 134 | int main(int argc, char **argv) |
| 135 | { |
| 136 | FILE *ifile; |
| 137 | FILE *ofile = stdout; |
| 138 | const char *p; |
| 139 | const char *s; |
| 140 | static const char opt_chars[] = "tfocsl"; |
| 141 | /* 012345 */ |
| 142 | const char *opts[sizeof(opt_chars)]; /* last is infile name */ |
| 143 | iconv_t ic; |
| 144 | char ibuf[IBUF]; |
| 145 | char obuf[OBUF]; |
| 146 | char *pi; |
| 147 | char *po; |
| 148 | size_t ni, no, r, pos; |
| 149 | |
| 150 | hide_errors = 0; |
| 151 | |
| 152 | for (s = opt_chars ; *s ; s++) { |
| 153 | opts[ s - opt_chars ] = NULL; |
| 154 | } |
| 155 | |
| 156 | progname = *argv; |
| 157 | while (--argc) { |
| 158 | p = *++argv; |
| 159 | if ((*p != '-') || (*++p == 0)) { |
| 160 | break; |
| 161 | } |
| 162 | do { |
| 163 | if ((s = strchr(opt_chars,*p)) == NULL) { |
| 164 | USAGE: |
| 165 | s = basename(progname); |
| 166 | fprintf(stderr, |
| 167 | "%s [-cs] -f fromcode -t tocode [-o outputfile] [inputfile ...]\n" |
| 168 | " or\n%s -l\n", s, s); |
| 169 | return EXIT_FAILURE; |
| 170 | } |
| 171 | if ((s - opt_chars) < 3) { |
| 172 | if ((--argc == 0) || opts[s - opt_chars]) { |
| 173 | goto USAGE; |
| 174 | } |
| 175 | opts[s - opt_chars] = *++argv; |
| 176 | } else { |
| 177 | opts[s - opt_chars] = p; |
| 178 | } |
| 179 | } while (*++p); |
| 180 | } |
| 181 | |
| 182 | if (opts[5]) { /* -l */ |
| 183 | fprintf(stderr, "Recognized codesets:\n"); |
| 184 | for (s = (char *)__iconv_codesets ; *s ; s += *s) { |
| 185 | fprintf(stderr," %s\n", s+2); |
| 186 | } |
| 187 | s = __LOCALE_DATA_CODESET_LIST; |
| 188 | do { |
| 189 | fprintf(stderr," %s\n", __LOCALE_DATA_CODESET_LIST+ (unsigned char)(*s)); |
| 190 | } while (*++s); |
| 191 | |
| 192 | return EXIT_SUCCESS; |
| 193 | } |
| 194 | |
| 195 | if (opts[4]) { |
| 196 | hide_errors = 1; |
| 197 | } |
| 198 | |
| 199 | if (!opts[0] || !opts[1]) { |
| 200 | goto USAGE; |
| 201 | } |
| 202 | if ((ic = iconv_open(opts[0],opts[1])) == ((iconv_t)(-1))) { |
| 203 | error_msg( "unsupported codeset in %s -> %s conversion\n", opts[1], opts[0]); |
| 204 | } |
| 205 | if (opts[3]) { /* -c */ |
| 206 | ((_UC_iconv_t *) ic)->skip_invalid_input = 1; |
| 207 | } |
| 208 | |
| 209 | if ((s = opts[2]) != NULL) { |
| 210 | if (!(ofile = fopen(s, "w"))) { |
| 211 | error_msg( "couldn't open %s for writing\n", s); |
| 212 | } |
| 213 | } |
| 214 | |
| 215 | pos = ni = 0; |
| 216 | do { |
| 217 | if (!argc || ((**argv == '-') && !((*argv)[1]))) { |
| 218 | ifile = stdin; /* we don't check for duplicates */ |
| 219 | } else if (!(ifile = fopen(*argv, "r"))) { |
| 220 | error_msg( "couldn't open %s for reading\n", *argv); |
| 221 | } |
| 222 | |
| 223 | while ((r = fread(ibuf + ni, 1, IBUF - ni, ifile)) > 0) { |
| 224 | pos += r; |
| 225 | ni += r; |
| 226 | no = OBUF; |
| 227 | pi = ibuf; |
| 228 | po = obuf; |
| 229 | if ((r = iconv(ic, &pi, &ni, &po, &no)) == ((size_t)(-1))) { |
| 230 | if ((errno != EINVAL) && (errno != E2BIG)) { |
| 231 | error_msg( "iconv failed at pos %lu : %m\n", (unsigned long) (pos - ni)); |
| 232 | } |
| 233 | } |
| 234 | if ((r = OBUF - no) > 0) { |
| 235 | if (fwrite(obuf, 1, OBUF - no, ofile) < r) { |
| 236 | error_msg( "write error\n"); |
| 237 | } |
| 238 | } |
| 239 | if (ni) { /* still bytes in buffer! */ |
| 240 | memmove(ibuf, pi, ni); |
| 241 | } |
| 242 | } |
| 243 | |
| 244 | if (ferror(ifile)) { |
| 245 | error_msg( "read error\n"); |
| 246 | } |
| 247 | |
| 248 | ++argv; |
| 249 | |
| 250 | if (ifile != stdin) { |
| 251 | fclose(ifile); |
| 252 | } |
| 253 | |
| 254 | } while (--argc > 0); |
| 255 | |
| 256 | iconv_close(ic); |
| 257 | |
| 258 | if (ni) { |
| 259 | error_msg( "incomplete sequence\n"); |
| 260 | } |
| 261 | |
| 262 | return (((_UC_iconv_t *) ic)->skip_invalid_input < 2) |
| 263 | ? EXIT_SUCCESS : EXIT_FAILURE; |
| 264 | } |