yuezonghe | 824eb0c | 2024-06-27 02:32:26 -0700 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright (C) 2000-2006 Erik Andersen <andersen@uclibc.org> |
| 3 | * |
| 4 | * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. |
| 5 | */ |
| 6 | #define _GNU_SOURCE |
| 7 | #include <stdio.h> |
| 8 | #include <stdlib.h> |
| 9 | #include <string.h> |
| 10 | #include <locale.h> |
| 11 | #include <stddef.h> |
| 12 | #include <wctype.h> |
| 13 | #include <limits.h> |
| 14 | |
| 15 | #ifndef _CTYPE_H |
| 16 | #define _CTYPE_H |
| 17 | #endif |
| 18 | #ifndef _WCTYPE_H |
| 19 | #define _WCTYPE_H |
| 20 | #endif |
| 21 | #include "include/bits/uClibc_ctype.h" |
| 22 | |
| 23 | /* TODO: maybe support -v like gen_wctype.c */ |
| 24 | #define verbose_msg(msg...) if (verbose) fprintf(stderr, msg) |
| 25 | |
| 26 | /* #define CTYPE_PACKED */ |
| 27 | #define UPLOW_IDX_SHIFT 3 |
| 28 | /* best if 2 unpacked or 3 packed */ |
| 29 | #define CTYPE_IDX_SHIFT 3 |
| 30 | /* 3 or 4 are very similar */ |
| 31 | #define C2WC_IDX_SHIFT 3 |
| 32 | |
| 33 | #define CTYPE_IDX_LEN (128 >> (CTYPE_IDX_SHIFT)) |
| 34 | #define UPLOW_IDX_LEN (128 >> (UPLOW_IDX_SHIFT)) |
| 35 | #define C2WC_IDX_LEN (128 >> (C2WC_IDX_SHIFT)) |
| 36 | |
| 37 | /* #ifdef CTYPE_PACKED */ |
| 38 | /* #define CTYPE_ROW_LEN (1 << ((CTYPE_IDX_SHIFT)-1)) */ |
| 39 | /* #else */ |
| 40 | #define CTYPE_ROW_LEN (1 << (CTYPE_IDX_SHIFT)) |
| 41 | /* #endif */ |
| 42 | #define UPLOW_ROW_LEN (1 << (UPLOW_IDX_SHIFT)) |
| 43 | #define C2WC_ROW_LEN (1 << (C2WC_IDX_SHIFT)) |
| 44 | |
| 45 | |
| 46 | |
| 47 | #define MAX_WCHAR (0x2600-1) |
| 48 | |
| 49 | static unsigned char ctype_tbl[256 * CTYPE_ROW_LEN]; |
| 50 | static unsigned char uplow_tbl[256 * UPLOW_ROW_LEN]; |
| 51 | #ifdef DO_WIDE_CHAR |
| 52 | static unsigned short c2wc_tbl[256 * C2WC_ROW_LEN]; |
| 53 | #endif |
| 54 | static unsigned char tt[MAX_WCHAR+1]; |
| 55 | static unsigned char ti[MAX_WCHAR+1]; |
| 56 | static unsigned char xi[MAX_WCHAR+1]; |
| 57 | |
| 58 | static int n_ctype_rows; |
| 59 | static int n_uplow_rows; |
| 60 | #ifdef DO_WIDE_CHAR |
| 61 | static int n_c2wc_rows; |
| 62 | #endif |
| 63 | static int tt_num; |
| 64 | static int ti_num; |
| 65 | |
| 66 | #define RANGE MAX_WCHAR |
| 67 | |
| 68 | #define TT_SHIFT 4 |
| 69 | #define TI_SHIFT 4 |
| 70 | |
| 71 | #define II_LEN ((MAX_WCHAR+1) >> (TT_SHIFT+TI_SHIFT)) |
| 72 | |
| 73 | typedef struct { |
| 74 | unsigned long c2w[256]; |
| 75 | unsigned char w2c[MAX_WCHAR]; |
| 76 | unsigned char ii[II_LEN]; |
| 77 | unsigned char ctype_idx[CTYPE_IDX_LEN]; |
| 78 | unsigned char uplow_idx[UPLOW_IDX_LEN]; |
| 79 | unsigned char c2wc_idx[C2WC_IDX_LEN]; |
| 80 | } charset_data; |
| 81 | |
| 82 | int main(int argc, char **argv) |
| 83 | { |
| 84 | FILE *fp; |
| 85 | charset_data csd[30]; |
| 86 | unsigned long max_wchar; |
| 87 | unsigned char *p; |
| 88 | int numsets; |
| 89 | int i; |
| 90 | int j; |
| 91 | char buf[80]; |
| 92 | unsigned char row[256]; |
| 93 | #ifdef DO_WIDE_CHAR |
| 94 | unsigned short wrow[256]; |
| 95 | #endif |
| 96 | char codeset_list[500]; |
| 97 | char codeset_index[30]; |
| 98 | int codeset_list_end = 0; |
| 99 | int total_size = 0; |
| 100 | |
| 101 | if (!setlocale(LC_CTYPE, "en_US.UTF-8")) { |
| 102 | /* Silly foreigners disabling en_US locales */ |
| 103 | FILE *fp = popen("locale -a", "r"); |
| 104 | if (!fp) |
| 105 | goto locale_failure; |
| 106 | |
| 107 | while (!feof(fp)) { |
| 108 | char buf[256]; |
| 109 | size_t len; |
| 110 | |
| 111 | if (fgets(buf, sizeof(buf) - 10, fp) == NULL) |
| 112 | goto locale_failure; |
| 113 | |
| 114 | len = strlen(buf); |
| 115 | if (len > 0 && buf[len - 1] == '\n') |
| 116 | buf[--len] = '\0'; |
| 117 | if (len < 5 || strcasecmp(&buf[len-5], ".UTF8") != 0) |
| 118 | strcat(buf, ".UTF8"); |
| 119 | if (setlocale(LC_CTYPE, buf)) |
| 120 | goto locale_success; |
| 121 | } |
| 122 | |
| 123 | locale_failure: |
| 124 | printf("could not find a UTF8 locale ... please enable en_US.UTF-8\n"); |
| 125 | return EXIT_FAILURE; |
| 126 | locale_success: |
| 127 | pclose(fp); |
| 128 | } |
| 129 | |
| 130 | #if 0 |
| 131 | if (argc == 1) { |
| 132 | /* User requested 8-bit codesets, but didn't list any... */ |
| 133 | /* Allow to build, just so this feature can be left on in config. */ |
| 134 | printf("#ifdef __CTYPE_HAS_8_BIT_LOCALES\n"); |
| 135 | printf("#warning ignoring 8 bit codesets request" |
| 136 | " as no codesets specified.\n"); |
| 137 | printf("#endif\n"); |
| 138 | printf("#undef __CTYPE_HAS_8_BIT_LOCALES\n\n"); |
| 139 | |
| 140 | printf("#define __LOCALE_DATA_NUM_CODESETS\t\t0\n"); |
| 141 | printf("#define __LOCALE_DATA_CODESET_LIST\t\t\"\"\n"); |
| 142 | return EXIT_SUCCESS; |
| 143 | } |
| 144 | |
| 145 | /* printf("#define __CTYPE_HAS_8_BIT_LOCALES\t1\n\n"); */ |
| 146 | printf("#ifdef __CTYPE_HAS_8_BIT_LOCALES\n\n"); |
| 147 | #endif |
| 148 | |
| 149 | if (argc == 1) { |
| 150 | printf("#undef __CTYPE_HAS_8_BIT_LOCALES\n\n"); |
| 151 | |
| 152 | printf("#define __LOCALE_DATA_NUM_CODESETS\t\t0\n"); |
| 153 | printf("#define __LOCALE_DATA_CODESET_LIST\t\t\"\"\n"); |
| 154 | } else { |
| 155 | printf("#define __CTYPE_HAS_8_BIT_LOCALES\t\t1\n\n"); |
| 156 | } |
| 157 | |
| 158 | printf("#define __LOCALE_DATA_Cctype_IDX_SHIFT\t%d\n", CTYPE_IDX_SHIFT); |
| 159 | printf("#define __LOCALE_DATA_Cctype_IDX_LEN\t\t%d\n", CTYPE_IDX_LEN); |
| 160 | #ifdef CTYPE_PACKED |
| 161 | printf("#define __LOCALE_DATA_Cctype_ROW_LEN\t\t%d\n", CTYPE_ROW_LEN >> 1); |
| 162 | printf("#define __LOCALE_DATA_Cctype_PACKED\t\t1\n"); |
| 163 | #else |
| 164 | printf("#define __LOCALE_DATA_Cctype_ROW_LEN\t\t%d\n", CTYPE_ROW_LEN); |
| 165 | printf("#undef __LOCALE_DATA_Cctype_PACKED\n"); |
| 166 | #endif |
| 167 | |
| 168 | printf("\n#define __LOCALE_DATA_Cuplow_IDX_SHIFT\t%d\n", UPLOW_IDX_SHIFT); |
| 169 | printf("#define __LOCALE_DATA_Cuplow_IDX_LEN\t\t%d\n", UPLOW_IDX_LEN); |
| 170 | printf("#define __LOCALE_DATA_Cuplow_ROW_LEN\t\t%d\n", UPLOW_ROW_LEN); |
| 171 | |
| 172 | #ifdef DO_WIDE_CHAR |
| 173 | printf("\n#define __LOCALE_DATA_Cc2wc_IDX_LEN\t\t%d\n", C2WC_IDX_LEN); |
| 174 | printf("#define __LOCALE_DATA_Cc2wc_IDX_SHIFT\t\t%d\n", C2WC_IDX_SHIFT); |
| 175 | printf("#define __LOCALE_DATA_Cc2wc_ROW_LEN\t\t%d\n", C2WC_ROW_LEN); |
| 176 | #endif |
| 177 | |
| 178 | printf("\ntypedef struct {\n"); |
| 179 | printf("\tunsigned char idx8ctype[%d];\n", CTYPE_IDX_LEN); |
| 180 | printf("\tunsigned char idx8uplow[%d];\n", UPLOW_IDX_LEN); |
| 181 | #ifdef DO_WIDE_CHAR |
| 182 | printf("\tunsigned char idx8c2wc[%d];\n", C2WC_IDX_LEN); |
| 183 | printf("\tunsigned char idx8wc2c[%d];\n", II_LEN); |
| 184 | #endif |
| 185 | printf("} __codeset_8_bit_t;\n\n"); |
| 186 | |
| 187 | printf("#ifdef WANT_DATA\n\n"); |
| 188 | printf("static const __codeset_8_bit_t codeset_8_bit[%d] = {\n", argc-1); |
| 189 | |
| 190 | max_wchar = 0x7f; |
| 191 | numsets = 0; |
| 192 | codeset_index[0] = 0; |
| 193 | while (--argc) { |
| 194 | if (!(fp = fopen(*++argv,"r"))) { |
| 195 | fprintf(stderr, "cannot open file \"%s\"\n", *argv); |
| 196 | return EXIT_FAILURE; |
| 197 | } |
| 198 | fprintf(stderr, "processing %s... ", *argv); |
| 199 | |
| 200 | { |
| 201 | char *s0; |
| 202 | char *s1; |
| 203 | int n; |
| 204 | |
| 205 | s0 = strrchr(*argv, '/'); |
| 206 | if (!s0) { |
| 207 | s0 = *argv; |
| 208 | } else { |
| 209 | ++s0; |
| 210 | } |
| 211 | s1 = strrchr(s0, '.'); |
| 212 | if (!s1) { |
| 213 | n = strlen(s0); |
| 214 | } else { |
| 215 | n = s1 - s0; |
| 216 | } |
| 217 | |
| 218 | /* if ((numsets == 0) && strncmp("ASCII", s0, n)) { */ |
| 219 | /* printf("error - first codeset isn't ASCII!\n"); */ |
| 220 | /* return EXIT_FAILURE; */ |
| 221 | /* } */ |
| 222 | |
| 223 | if (numsets >= sizeof(codeset_index)) { |
| 224 | fprintf(stderr, "error - too many codesets!\n"); |
| 225 | return EXIT_FAILURE; |
| 226 | } |
| 227 | |
| 228 | if (codeset_list_end + n + 1 + numsets + 1 + 1 >= 256) { |
| 229 | fprintf(stderr, "error - codeset list to big!\n"); |
| 230 | return EXIT_FAILURE; |
| 231 | } |
| 232 | |
| 233 | codeset_index[numsets+1] = codeset_index[numsets] + n+1; |
| 234 | strncpy(codeset_list + codeset_list_end, s0, n); |
| 235 | codeset_list_end += (n+1); |
| 236 | codeset_list[codeset_list_end - 1] = 0; |
| 237 | |
| 238 | printf("\t{ /* %.*s */", n, s0); |
| 239 | } |
| 240 | |
| 241 | memset(&csd[numsets], 0, sizeof(charset_data)); |
| 242 | memset(xi, 0, sizeof(xi)); |
| 243 | { |
| 244 | unsigned long c, wc; |
| 245 | int lines; |
| 246 | lines = 0; |
| 247 | while (fgets(buf,sizeof(buf),fp)) { |
| 248 | if ((2 != sscanf(buf, "{ %lx , %lx", &c, &wc)) |
| 249 | || (c >= 256) || (wc > MAX_WCHAR)) { |
| 250 | fprintf(stderr, "error: scanf failure! \"%s\"\n", buf); |
| 251 | return EXIT_FAILURE; |
| 252 | } |
| 253 | |
| 254 | /* don't put in w2c... dynamicly build tt instead. */ |
| 255 | |
| 256 | if (c <= 0x7f) { /* check the 7bit entries but don't store */ |
| 257 | if (c != wc) { |
| 258 | fprintf(stderr, "error: c != wc in %s\n", buf); |
| 259 | return EXIT_FAILURE; |
| 260 | } |
| 261 | csd[numsets].c2w[c] = wc; |
| 262 | csd[numsets].w2c[wc] = 0; /* ignore */ |
| 263 | if (wc > max_wchar) { |
| 264 | max_wchar = wc; |
| 265 | } |
| 266 | } else { |
| 267 | csd[numsets].c2w[c] = wc; |
| 268 | csd[numsets].w2c[wc] = c; |
| 269 | if (wc > max_wchar) { |
| 270 | max_wchar = wc; |
| 271 | } |
| 272 | } |
| 273 | ++lines; |
| 274 | } |
| 275 | fprintf(stderr, "%d lines ", lines); |
| 276 | |
| 277 | for (i = 0 ; i <= MAX_WCHAR ; i += (1 << TT_SHIFT)) { |
| 278 | p = &csd[numsets].w2c[i]; |
| 279 | for (j = 0 ; j < tt_num ; j++) { |
| 280 | if (!memcmp(p, &tt[j << TT_SHIFT], (1 << TT_SHIFT))) { |
| 281 | break; |
| 282 | } |
| 283 | } |
| 284 | if (j == tt_num) { /* new entry */ |
| 285 | memcpy(&tt[j << TT_SHIFT], p, (1 << TT_SHIFT)); |
| 286 | ++tt_num; |
| 287 | } |
| 288 | xi[i >> TT_SHIFT] = j; |
| 289 | } |
| 290 | |
| 291 | for (i = 0 ; i <= (MAX_WCHAR >> TT_SHIFT) ; i += (1 << TI_SHIFT)) { |
| 292 | p = &xi[i]; |
| 293 | for (j = 0 ; j < ti_num ; j++) { |
| 294 | if (!memcmp(p, &ti[j << TI_SHIFT], (1 << TI_SHIFT))) { |
| 295 | break; |
| 296 | } |
| 297 | } |
| 298 | if (j == ti_num) { /* new entry */ |
| 299 | memcpy(&ti[j << TI_SHIFT], p, (1 << TI_SHIFT)); |
| 300 | ++ti_num; |
| 301 | } |
| 302 | csd[numsets].ii[i >> TI_SHIFT] = j; |
| 303 | /* fprintf(stderr, "%d ", i >> TI_SHIFT); */ |
| 304 | } |
| 305 | |
| 306 | #if 1 |
| 307 | printf("\n\t\t/* idx8ctype data */\n\t\t{"); |
| 308 | for (i = 128 ; i < 256 ; i++) { |
| 309 | wchar_t c; |
| 310 | unsigned int d; |
| 311 | |
| 312 | /* if (!(i & 0x7)) { */ |
| 313 | /* printf("\n"); */ |
| 314 | /* } */ |
| 315 | |
| 316 | c = csd[numsets].c2w[i]; |
| 317 | |
| 318 | if (c == 0) { /* non-existant char in codeset */ |
| 319 | d = __CTYPE_unclassified; |
| 320 | } else if (iswdigit(c)) { |
| 321 | d = __CTYPE_digit; |
| 322 | } else if (iswalpha(c)) { |
| 323 | d = __CTYPE_alpha_nonupper_nonlower; |
| 324 | if (iswlower(c)) { |
| 325 | d = __CTYPE_alpha_lower; |
| 326 | if (iswupper(c)) { |
| 327 | d = __CTYPE_alpha_upper_lower; |
| 328 | } |
| 329 | } else if (iswupper(c)) { |
| 330 | d = __CTYPE_alpha_upper; |
| 331 | } |
| 332 | } else if (iswpunct(c)) { |
| 333 | d = __CTYPE_punct; |
| 334 | } else if (iswgraph(c)) { |
| 335 | d = __CTYPE_graph; |
| 336 | } else if (iswprint(c)) { |
| 337 | d = __CTYPE_print_space_nonblank; |
| 338 | if (iswblank(c)) { |
| 339 | d = __CTYPE_print_space_blank; |
| 340 | } |
| 341 | } else if (iswspace(c) && !iswcntrl(c)) { |
| 342 | d = __CTYPE_space_nonblank_noncntrl; |
| 343 | if (iswblank(c)) { |
| 344 | d = __CTYPE_space_blank_noncntrl; |
| 345 | } |
| 346 | } else if (iswcntrl(c)) { |
| 347 | d = __CTYPE_cntrl_nonspace; |
| 348 | if (iswspace(c)) { |
| 349 | d = __CTYPE_cntrl_space_nonblank; |
| 350 | if (iswblank(c)) { |
| 351 | d = __CTYPE_cntrl_space_blank; |
| 352 | } |
| 353 | } |
| 354 | } else { |
| 355 | d = __CTYPE_unclassified; |
| 356 | } |
| 357 | |
| 358 | #if 1 |
| 359 | row[i & (CTYPE_ROW_LEN-1)] = d; |
| 360 | if ((i & (CTYPE_ROW_LEN-1)) == (CTYPE_ROW_LEN-1)) { |
| 361 | p = ctype_tbl; |
| 362 | for (j=0 ; j < n_ctype_rows ; j++) { |
| 363 | if (!memcmp(p, row, CTYPE_ROW_LEN)) { |
| 364 | break; |
| 365 | } |
| 366 | p += CTYPE_ROW_LEN; |
| 367 | } |
| 368 | if (j == n_ctype_rows) { /* new entry */ |
| 369 | if (++n_ctype_rows > 256) { |
| 370 | fprintf(stderr, "error -- to many ctype rows!\n"); |
| 371 | return EXIT_FAILURE; |
| 372 | } |
| 373 | memcpy(p, row, CTYPE_ROW_LEN); |
| 374 | } |
| 375 | csd[numsets].ctype_idx[i >> CTYPE_IDX_SHIFT] = j; |
| 376 | if (!((i >> CTYPE_IDX_SHIFT) & 0x7) |
| 377 | && (i != (127 + CTYPE_ROW_LEN)) |
| 378 | ) { |
| 379 | printf("\n\t\t "); |
| 380 | } |
| 381 | printf(" %#4x,", j); |
| 382 | } |
| 383 | #else |
| 384 | printf(" %#4x,", d); |
| 385 | #endif |
| 386 | } |
| 387 | #endif |
| 388 | printf(" }"); |
| 389 | |
| 390 | #if 1 |
| 391 | printf(",\n\t\t/* idx8uplow data */\n\t\t{"); |
| 392 | for (i = 128 ; i < 256 ; i++) { |
| 393 | wchar_t c, u, l; |
| 394 | /* if (!(i & 0x7)) { */ |
| 395 | /* printf("\n"); */ |
| 396 | /* } */ |
| 397 | c = csd[numsets].c2w[i]; |
| 398 | if ((c != 0) || 1) { |
| 399 | u = towupper(c); |
| 400 | l = towlower(c); |
| 401 | |
| 402 | if (u >= 0x80) u = csd[numsets].w2c[u]; |
| 403 | if (l >= 0x80) l = csd[numsets].w2c[l]; |
| 404 | |
| 405 | if (u == 0) u = i; /* upper is missing, so ignore */ |
| 406 | if (l == 0) l = i; /* lower is missing, so ignore */ |
| 407 | |
| 408 | #if 1 |
| 409 | /* store as unsigned char and let overflow handle it. */ |
| 410 | /* if ((((u-i) < CHAR_MIN) || ((u-i) > CHAR_MAX)) */ |
| 411 | /* || (((i-l) < CHAR_MIN) || ((i-l) > CHAR_MAX)) */ |
| 412 | /* ) { */ |
| 413 | /* fprintf(stderr, "error - uplow diff out of range! %d %ld %ld\n", */ |
| 414 | /* i, u, l); */ |
| 415 | /* return EXIT_FAILURE; */ |
| 416 | /* } */ |
| 417 | |
| 418 | row[i & (UPLOW_ROW_LEN-1)] = ((l==i) ? (u-i) : (i-l)); |
| 419 | if ((i & (UPLOW_ROW_LEN-1)) == (UPLOW_ROW_LEN-1)) { |
| 420 | p = uplow_tbl; |
| 421 | for (j=0 ; j < n_uplow_rows ; j++) { |
| 422 | if (!memcmp(p, row, UPLOW_ROW_LEN)) { |
| 423 | break; |
| 424 | } |
| 425 | p += UPLOW_ROW_LEN; |
| 426 | } |
| 427 | if (j == n_uplow_rows) { /* new entry */ |
| 428 | if (++n_uplow_rows > 256) { |
| 429 | fprintf(stderr, "error -- to many uplow rows!\n"); |
| 430 | return EXIT_FAILURE; |
| 431 | } |
| 432 | memcpy(p, row, UPLOW_ROW_LEN); |
| 433 | } |
| 434 | csd[numsets].uplow_idx[i >> UPLOW_IDX_SHIFT] = j; |
| 435 | if (!((i >> UPLOW_IDX_SHIFT) & 0x7) |
| 436 | && (i != (127 + UPLOW_ROW_LEN)) |
| 437 | ) { |
| 438 | printf("\n\t\t "); |
| 439 | } |
| 440 | printf(" %#4x,", j); |
| 441 | } |
| 442 | |
| 443 | #elif 0 |
| 444 | if (!(i & 0x7) && i) { |
| 445 | printf("\n"); |
| 446 | } |
| 447 | printf(" %4ld,", (l==i) ? (u-i) : (i-l)); |
| 448 | /* printf(" %4ld,", (l==i) ? u : l); */ |
| 449 | #else |
| 450 | if ((u != i) || (l != i)) { |
| 451 | #if 0 |
| 452 | printf(" %#08lx, %#08lx, %#08lx, %#08lx, %#08lx, %#08lx, \n", |
| 453 | (unsigned long) i, |
| 454 | (unsigned long) c, |
| 455 | (unsigned long) l, |
| 456 | (unsigned long) towlower(c), |
| 457 | (unsigned long) u, |
| 458 | (unsigned long) towupper(c)); |
| 459 | |
| 460 | #else |
| 461 | printf(" %#08lx, %8ld, %d, %8ld, %d, %#08lx\n", |
| 462 | (unsigned long) i, |
| 463 | (long) (l - i), |
| 464 | iswupper(c), |
| 465 | (long) (i - u), |
| 466 | iswlower(c), |
| 467 | (unsigned long) c); |
| 468 | #endif |
| 469 | } |
| 470 | #endif |
| 471 | } |
| 472 | } |
| 473 | printf(" }"); |
| 474 | #endif |
| 475 | |
| 476 | #ifndef DO_WIDE_CHAR |
| 477 | printf("\n"); |
| 478 | #else /* DO_WIDE_CHAR */ |
| 479 | |
| 480 | #if 1 |
| 481 | printf(",\n\t\t/* idx8c2wc data */\n\t\t{"); |
| 482 | for (i = 128 ; i < 256 ; i++) { |
| 483 | #if 1 |
| 484 | wrow[i & (C2WC_ROW_LEN-1)] = csd[numsets].c2w[i]; |
| 485 | if ((i & (C2WC_ROW_LEN-1)) == (C2WC_ROW_LEN-1)) { |
| 486 | p = (unsigned char *) c2wc_tbl; |
| 487 | for (j=0 ; j < n_c2wc_rows ; j++) { |
| 488 | if (!memcmp(p, (char *) wrow, 2*C2WC_ROW_LEN)) { |
| 489 | break; |
| 490 | } |
| 491 | p += 2*C2WC_ROW_LEN; |
| 492 | } |
| 493 | if (j == n_c2wc_rows) { /* new entry */ |
| 494 | if (++n_c2wc_rows > 256) { |
| 495 | fprintf(stderr, "error -- to many c2wc rows!\n"); |
| 496 | return EXIT_FAILURE; |
| 497 | } |
| 498 | memcpy(p, (char *) wrow, 2*C2WC_ROW_LEN); |
| 499 | } |
| 500 | csd[numsets].c2wc_idx[i >> C2WC_IDX_SHIFT] = j; |
| 501 | if (!((i >> C2WC_IDX_SHIFT) & 0x7) |
| 502 | && (i != (127 + C2WC_ROW_LEN)) |
| 503 | ) { |
| 504 | printf("\n\t\t "); |
| 505 | } |
| 506 | printf(" %#4x,", j); |
| 507 | } |
| 508 | #else |
| 509 | if (!(i & 0x7) && i) { |
| 510 | printf("\n"); |
| 511 | } |
| 512 | printf(" %#6lx,", csd[numsets].c2w[i]); |
| 513 | #endif |
| 514 | } |
| 515 | printf(" },\n"); |
| 516 | #endif |
| 517 | |
| 518 | #if 1 |
| 519 | /* fprintf(stderr, "\nII_LEN = %d\n", II_LEN); */ |
| 520 | printf("\t\t/* idx8wc2c data */\n\t\t{"); |
| 521 | for (i = 0 ; i < II_LEN ; i++) { |
| 522 | if (!(i & 0x7) && i) { |
| 523 | printf("\n\t\t "); |
| 524 | } |
| 525 | printf(" %#4x,", csd[numsets].ii[i]); |
| 526 | } |
| 527 | printf(" }\n"); |
| 528 | #endif |
| 529 | |
| 530 | #endif /* DO_WIDE_CHAR */ |
| 531 | printf("\t},\n"); |
| 532 | |
| 533 | } |
| 534 | ++numsets; |
| 535 | fprintf(stderr, "done\n"); |
| 536 | } |
| 537 | printf("};\n"); |
| 538 | printf("\n#endif /* WANT_DATA */\n"); |
| 539 | |
| 540 | #ifdef DO_WIDE_CHAR |
| 541 | printf("\n"); |
| 542 | printf("#define __LOCALE_DATA_Cwc2c_DOMAIN_MAX\t%#x\n", RANGE); |
| 543 | printf("#define __LOCALE_DATA_Cwc2c_TI_SHIFT\t\t%d\n", TI_SHIFT); |
| 544 | printf("#define __LOCALE_DATA_Cwc2c_TT_SHIFT\t\t%d\n", TT_SHIFT); |
| 545 | printf("#define __LOCALE_DATA_Cwc2c_II_LEN\t\t%d\n", II_LEN); |
| 546 | printf("#define __LOCALE_DATA_Cwc2c_TI_LEN\t\t%d\n", ti_num << TI_SHIFT); |
| 547 | printf("#define __LOCALE_DATA_Cwc2c_TT_LEN\t\t%d\n", tt_num << TT_SHIFT); |
| 548 | printf("\n"); |
| 549 | |
| 550 | printf("\n#define __LOCALE_DATA_Cwc2c_TBL_LEN\t\t%d\n", |
| 551 | (ti_num << TI_SHIFT) + (tt_num << TT_SHIFT)); |
| 552 | |
| 553 | printf("#ifdef WANT_DATA\n\n"); |
| 554 | printf("static const unsigned char __LOCALE_DATA_Cwc2c_data[%d] = {\n", |
| 555 | (ti_num << TI_SHIFT) + (tt_num << TT_SHIFT)); |
| 556 | printf("\t/* ti_table */\n\t"); |
| 557 | for (i=0 ; i < ti_num << TI_SHIFT ; i++) { |
| 558 | if (!(i & 7) && i) { |
| 559 | printf("\n\t"); |
| 560 | } |
| 561 | printf(" %#4x,", ti[i]); |
| 562 | } |
| 563 | printf("\n"); |
| 564 | printf("\t/* tt_table */\n\t"); |
| 565 | for (i=0 ; i < tt_num << TT_SHIFT ; i++) { |
| 566 | if (!(i & 7) && i) { |
| 567 | printf("\n\t"); |
| 568 | } |
| 569 | printf(" %#4x,", tt[i]); |
| 570 | } |
| 571 | printf("\n};\n"); |
| 572 | |
| 573 | printf("\n#endif /* WANT_DATA */\n"); |
| 574 | #endif /* DO_WIDE_CHAR */ |
| 575 | |
| 576 | printf("\n#define __LOCALE_DATA_Cuplow_TBL_LEN\t\t%d\n", |
| 577 | n_uplow_rows * UPLOW_ROW_LEN); |
| 578 | printf("\n#ifdef WANT_DATA\n\n"); |
| 579 | |
| 580 | printf("\nstatic const unsigned char __LOCALE_DATA_Cuplow_data[%d] = {\n", |
| 581 | n_uplow_rows * UPLOW_ROW_LEN); |
| 582 | p = uplow_tbl; |
| 583 | for (j=0 ; j < n_uplow_rows ; j++) { |
| 584 | printf("\t"); |
| 585 | for (i=0 ; i < UPLOW_ROW_LEN ; i++) { |
| 586 | printf(" %#4x,", (unsigned int)((unsigned char) p[i])); |
| 587 | } |
| 588 | printf("\n"); |
| 589 | p += UPLOW_ROW_LEN; |
| 590 | } |
| 591 | printf("};\n"); |
| 592 | |
| 593 | printf("\n#endif /* WANT_DATA */\n"); |
| 594 | printf("\n#define __LOCALE_DATA_Cctype_TBL_LEN\t\t%d\n", |
| 595 | #ifdef CTYPE_PACKED |
| 596 | n_ctype_rows * CTYPE_ROW_LEN / 2 |
| 597 | #else |
| 598 | n_ctype_rows * CTYPE_ROW_LEN |
| 599 | #endif |
| 600 | ); |
| 601 | printf("\n#ifdef WANT_DATA\n\n"); |
| 602 | |
| 603 | |
| 604 | printf("\nstatic const unsigned char __LOCALE_DATA_Cctype_data[%d] = {\n", |
| 605 | #ifdef CTYPE_PACKED |
| 606 | n_ctype_rows * CTYPE_ROW_LEN / 2 |
| 607 | #else |
| 608 | n_ctype_rows * CTYPE_ROW_LEN |
| 609 | #endif |
| 610 | ); |
| 611 | p = ctype_tbl; |
| 612 | for (j=0 ; j < n_ctype_rows ; j++) { |
| 613 | printf("\t"); |
| 614 | for (i=0 ; i < CTYPE_ROW_LEN ; i++) { |
| 615 | #ifdef CTYPE_PACKED |
| 616 | printf(" %#4x,", (unsigned int)(p[i] + (p[i+1] << 4))); |
| 617 | ++i; |
| 618 | #else |
| 619 | printf(" %#4x,", (unsigned int)p[i]); |
| 620 | #endif |
| 621 | } |
| 622 | printf("\n"); |
| 623 | p += CTYPE_ROW_LEN; |
| 624 | } |
| 625 | printf("};\n"); |
| 626 | |
| 627 | printf("\n#endif /* WANT_DATA */\n"); |
| 628 | |
| 629 | #ifdef DO_WIDE_CHAR |
| 630 | |
| 631 | printf("\n#define __LOCALE_DATA_Cc2wc_TBL_LEN\t\t%d\n", |
| 632 | n_c2wc_rows * C2WC_ROW_LEN); |
| 633 | printf("\n#ifdef WANT_DATA\n\n"); |
| 634 | |
| 635 | printf("\nstatic const unsigned short __LOCALE_DATA_Cc2wc_data[%d] = {\n", |
| 636 | n_c2wc_rows * C2WC_ROW_LEN); |
| 637 | p = (unsigned char *) c2wc_tbl; |
| 638 | for (j=0 ; j < n_c2wc_rows ; j++) { |
| 639 | printf("\t"); |
| 640 | for (i=0 ; i < C2WC_ROW_LEN ; i++) { |
| 641 | printf(" %#6x,", (unsigned int)(((unsigned short *)p)[i])); |
| 642 | } |
| 643 | printf("\n"); |
| 644 | p += 2*C2WC_ROW_LEN; |
| 645 | } |
| 646 | printf("};\n"); |
| 647 | printf("\n#endif /* WANT_DATA */\n"); |
| 648 | #endif /* DO_WIDE_CHAR */ |
| 649 | printf("\n\n"); |
| 650 | |
| 651 | printf("#define __LOCALE_DATA_NUM_CODESETS\t\t%d\n", numsets); |
| 652 | printf("#define __LOCALE_DATA_CODESET_LIST \\\n\t\""); |
| 653 | for (i=0 ; i < numsets ; i++) { |
| 654 | printf("\\x%02x", numsets + 1 + (unsigned char) codeset_index[i]); |
| 655 | if (((i & 7) == 7) && (i + 1 < numsets)) { |
| 656 | printf("\" \\\n\t\""); |
| 657 | } |
| 658 | } |
| 659 | printf("\" \\\n\t\"\\0\""); |
| 660 | for (i=0 ; i < numsets ; i++) { |
| 661 | printf(" \\\n\t\"%s\\0\"", |
| 662 | codeset_list + ((unsigned char)codeset_index[i])); |
| 663 | } |
| 664 | |
| 665 | printf("\n\n"); |
| 666 | for (i=0 ; i < numsets ; i++) { |
| 667 | char buf[30]; |
| 668 | char *z; |
| 669 | strcpy(buf, codeset_list + ((unsigned char)codeset_index[i])); |
| 670 | for (z=buf ; *z ; z++) { |
| 671 | if (*z == '-') { |
| 672 | *z = '_'; |
| 673 | } |
| 674 | } |
| 675 | printf("#define __CTYPE_HAS_CODESET_%s\n", buf); |
| 676 | } |
| 677 | #ifdef DO_WIDE_CHAR |
| 678 | printf("#define __CTYPE_HAS_CODESET_UTF_8\n"); |
| 679 | #endif /* DO_WIDE_CHAR */ |
| 680 | |
| 681 | #if 0 |
| 682 | printf("\n#endif /* __CTYPE_HAS_8_BIT_LOCALES */\n\n"); |
| 683 | #endif |
| 684 | |
| 685 | total_size = 0; |
| 686 | #ifdef DO_WIDE_CHAR |
| 687 | fprintf(stderr, "tt_num = %d ti_num = %d\n", tt_num, ti_num); |
| 688 | fprintf(stderr, "max_wchar = %#lx\n", max_wchar); |
| 689 | |
| 690 | fprintf(stderr, "size is %d * %d + %d * %d + %d * %d = %d\n", |
| 691 | tt_num, 1 << TT_SHIFT, ti_num, 1 << TI_SHIFT, |
| 692 | ((MAX_WCHAR >> (TT_SHIFT + TI_SHIFT)) + 1), numsets, |
| 693 | j = tt_num * (1 << TT_SHIFT) + ti_num * (1 << TI_SHIFT) |
| 694 | + ((MAX_WCHAR >> (TT_SHIFT + TI_SHIFT)) + 1) * numsets); |
| 695 | total_size += j; |
| 696 | #endif /* DO_WIDE_CHAR */ |
| 697 | |
| 698 | #ifdef CTYPE_PACKED |
| 699 | i = 2; |
| 700 | #else |
| 701 | i = 1; |
| 702 | #endif |
| 703 | |
| 704 | fprintf(stderr, "ctype - CTYPE_IDX_SHIFT = %d -- %d * %d + %d * %d = %d\n", |
| 705 | CTYPE_IDX_SHIFT, numsets, CTYPE_IDX_LEN, n_ctype_rows, CTYPE_ROW_LEN / i, |
| 706 | j = numsets * CTYPE_IDX_LEN + n_ctype_rows * CTYPE_ROW_LEN / i); |
| 707 | total_size += j; |
| 708 | |
| 709 | fprintf(stderr, "uplow - UPLOW_IDX_SHIFT = %d -- %d * %d + %d * %d = %d\n", |
| 710 | UPLOW_IDX_SHIFT, numsets, UPLOW_IDX_LEN, n_uplow_rows, UPLOW_ROW_LEN, |
| 711 | j = numsets * UPLOW_IDX_LEN + n_uplow_rows * UPLOW_ROW_LEN); |
| 712 | total_size += j; |
| 713 | |
| 714 | #ifdef DO_WIDE_CHAR |
| 715 | |
| 716 | fprintf(stderr, "c2wc - C2WC_IDX_SHIFT = %d -- %d * %d + 2 * %d * %d = %d\n", |
| 717 | C2WC_IDX_SHIFT, numsets, C2WC_IDX_LEN, n_c2wc_rows, C2WC_ROW_LEN, |
| 718 | j = numsets * C2WC_IDX_LEN + 2 * n_c2wc_rows * C2WC_ROW_LEN); |
| 719 | total_size += j; |
| 720 | |
| 721 | #endif /* DO_WIDE_CHAR */ |
| 722 | |
| 723 | fprintf(stderr, "total size = %d\n", total_size); |
| 724 | |
| 725 | /* for (i=0 ; i < numsets ; i++) { */ |
| 726 | /* printf("codeset_index[i] = %d codeset_list[ci[i]] = \"%s\"\n", */ |
| 727 | /* (unsigned char) codeset_index[i], */ |
| 728 | /* codeset_list + ((unsigned char)codeset_index[i])); */ |
| 729 | /* } */ |
| 730 | |
| 731 | return EXIT_SUCCESS; |
| 732 | } |