blob: 5e8936082a32b64a09457ca9df8df5e40e8ede63 [file] [log] [blame]
lh9ed821d2023-04-07 01:36:19 -07001#include <stdio.h>
2#include <stdlib.h>
3#include <string.h>
4#include <ctype.h>
5#include <limits.h>
6#include <assert.h>
7#include <stdarg.h>
8#include <locale.h>
9#include <langinfo.h>
10#include <nl_types.h>
11#include <stdint.h>
12
13#include "c8tables.h"
14
15
16#define __LOCALE_DATA_CATEGORIES 6
17
18/* must agree with ordering of gen_mmap! */
19static const char *lc_names[] = {
20 "LC_CTYPE",
21 "LC_NUMERIC",
22 "LC_MONETARY",
23 "LC_TIME",
24 "LC_COLLATE",
25 "LC_MESSAGES",
26#if __LOCALE_DATA_CATEGORIES == 12
27 "LC_PAPER",
28 "LC_NAME",
29 "LC_ADDRESS",
30 "LC_TELEPHONE",
31 "LC_MEASUREMENT",
32 "LC_IDENTIFICATION",
33#elif __LOCALE_DATA_CATEGORIES != 6
34#error unsupported __LOCALE_DATA_CATEGORIES value!
35#endif
36};
37
38
39
40typedef struct {
41 char *glibc_name;
42 char name[5];
43 char dot_cs; /* 0 if no codeset specified */
44 char cs;
45 unsigned char idx_name;
46 unsigned char lc_time_row;
47 unsigned char lc_numeric_row;
48 unsigned char lc_monetary_row;
49 unsigned char lc_messages_row;
50 unsigned char lc_ctype_row;
51#if __LOCALE_DATA_CATEGORIES != 6
52#error unsupported __LOCALE_DATA_CATEGORIES value
53#endif
54} locale_entry;
55
56static void read_at_mappings(void);
57static void read_enable_disable(void);
58static void read_locale_list(void);
59
60static int find_codeset_num(const char *cs);
61static int find_at_string_num(const char *as);
62static int le_cmp(const void *, const void *);
63static void dump_table8(const char *name, const char *tbl, int len);
64static void dump_table8c(const char *name, const char *tbl, int len);
65static void dump_table16(const char *name, const int *tbl, int len);
66
67static void do_lc_time(void);
68static void do_lc_numeric(void);
69static void do_lc_monetary(void);
70
71static void do_lc_messages(void);
72static void do_lc_ctype(void);
73
74
75static FILE *fp;
76static FILE *ofp;
77static char line_buf[80];
78static char at_mappings[256];
79static char at_mapto[256];
80static char at_strings[1024];
81static char *at_strings_end;
82static locale_entry locales[700];
83static char glibc_locale_names[60000];
84
85static int num_locales;
86
87static int default_utf8;
88static int default_8bit;
89
90static int total_size;
91static int null_count;
92
93static unsigned verbose = 0;
94enum {
95 VINFO = (1<<0),
96 VDETAIL = (1<<1),
97};
98static int verbose_msg(const unsigned lvl, const char *fmt, ...)
99{
100 va_list arg;
101 int ret = 0;
102
103 if (verbose & lvl) {
104 va_start(arg, fmt);
105 ret = vfprintf(stderr, fmt, arg);
106 va_end(arg);
107 }
108 return ret;
109}
110
111static void error_msg(const char *fmt, ...) __attribute__ ((noreturn, format (printf, 1, 2)));
112static void error_msg(const char *fmt, ...)
113{
114 va_list arg;
115
116 fprintf(stderr, "Error: ");
117/* if (fno >= 0) {
118 fprintf(stderr, "file %s (%d): ", fname[fno], lineno[fno]);
119 } */
120 va_start(arg, fmt);
121 vfprintf(stderr, fmt, arg);
122 va_end(arg);
123 fprintf(stderr, "\n");
124
125 exit(EXIT_FAILURE);
126}
127
128static void do_locale_names(void)
129{
130 /* "C" locale name is handled specially by the setlocale code. */
131 int uniq = 0;
132 int i;
133
134 if (num_locales <= 1) {
135/* error_msg("only C locale?"); */
136 fprintf(ofp, "static const unsigned char __locales[%d];\n", (3 + __LOCALE_DATA_CATEGORIES));
137 fprintf(ofp, "static const unsigned char __locale_names5[5];\n");
138 } else {
139 if (default_utf8) {
140 fprintf(ofp, "#define __CTYPE_HAS_UTF_8_LOCALES\t\t\t1\n");
141 }
142 fprintf(ofp, "#define __LOCALE_DATA_CATEGORIES\t\t\t%d\n", __LOCALE_DATA_CATEGORIES);
143 fprintf(ofp, "#define __LOCALE_DATA_WIDTH_LOCALES\t\t\t%d\n", 3+__LOCALE_DATA_CATEGORIES);
144 fprintf(ofp, "#define __LOCALE_DATA_NUM_LOCALES\t\t\t%d\n", num_locales);
145 fprintf(ofp, "static const unsigned char __locales[%d] = {\n",
146 (num_locales) * (3 + __LOCALE_DATA_CATEGORIES));
147 for (i=0 ; i < num_locales ; i++) {
148 if (memcmp(locales[i].name, locales[i-1].name, 5) != 0) {
149 locales[i].idx_name = uniq;
150 ++uniq;
151 } else {
152 locales[i].idx_name = uniq - 1;
153 }
154 fprintf(ofp, "\t%#4x, ", (int)((unsigned char) locales[i].idx_name));
155 fprintf(ofp, "\t%#4x, ", (int)((unsigned char) locales[i].dot_cs));
156 fprintf(ofp, "\t%#4x, ", (int)((unsigned char) locales[i].cs));
157 /* lc_ctype would store translit flags and turkish up/low flag. */
158 fprintf(ofp, "%#4x, ", (int)((unsigned char) locales[i].lc_ctype_row));
159 fprintf(ofp, "%#4x, ", (int)((unsigned char) locales[i].lc_numeric_row));
160 fprintf(ofp, "%#4x, ", (int)((unsigned char) locales[i].lc_monetary_row));
161 fprintf(ofp, "%#4x, ", (int)((unsigned char) locales[i].lc_time_row));
162#if 1
163 /* lc_collate */
164 if (strlen(locales[i].glibc_name) >= 5) {
165 fprintf(ofp, "COL_IDX_%.2s_%.2s, ", locales[i].glibc_name, locales[i].glibc_name+3);
166 } else if (!strcmp(locales[i].glibc_name, "C")) {
167 fprintf(ofp, "COL_IDX_C , ");
168 } else {
169 error_msg("don't know how to handle COL_IDX_ for %s", locales[i].glibc_name);
170 }
171#else
172 fprintf(ofp, "%#4x, ", 0); /* place holder for lc_collate */
173#endif
174 fprintf(ofp, "%#4x, ", (int)((unsigned char) locales[i].lc_messages_row));
175 fprintf(ofp, "\t/* %s */\n", locales[i].glibc_name);
176 }
177 fprintf(ofp, "};\n\n");
178
179 fprintf(ofp, "#define __LOCALE_DATA_NUM_LOCALE_NAMES\t\t%d\n", uniq );
180 fprintf(ofp, "static const unsigned char __locale_names5[%d] = \n\t", uniq * 5);
181 uniq = 0;
182 for (i=1 ; i < num_locales ; i++) {
183 if (memcmp(locales[i].name, locales[i-1].name, 5) != 0) {
184 fprintf(ofp, "\"%5.5s\" ", locales[i].name);
185 ++uniq;
186 if ((uniq % 8) == 0) {
187 fprintf(ofp, "\n\t");
188 }
189 }
190 }
191 fprintf(ofp,";\n\n");
192
193 if (at_strings_end > at_strings) {
194 int i, j;
195 char *p;
196 i = 0;
197 p = at_strings;
198 while (*p) {
199 ++i;
200 p += 1 + (unsigned char) *p;
201 }
202 /* len, char, string\0 */
203 fprintf(ofp, "#define __LOCALE_DATA_AT_MODIFIERS_LENGTH\t\t%d\n",
204 i + (at_strings_end - at_strings));
205 fprintf(ofp, "static const unsigned char __locale_at_modifiers[%d] = {",
206 i + (at_strings_end - at_strings));
207 i = 0;
208 p = at_strings;
209 while (*p) {
210 fprintf(ofp, "\n\t%4d, '%c',",
211 (unsigned char) *p, /* len of string\0 */
212 at_mapto[i]);
213 for (j=1 ; j < ((unsigned char) *p) ; j++) {
214 fprintf(ofp, " '%c',", p[j]);
215 }
216 fprintf(ofp, " 0,");
217 ++i;
218 p += 1 + (unsigned char) *p;
219 }
220 fprintf(ofp, "\n};\n\n");
221 }
222
223 {
224 int pos[__LOCALE_DATA_CATEGORIES];
225 pos[0] = __LOCALE_DATA_CATEGORIES;
226 for (i=0 ; i < __LOCALE_DATA_CATEGORIES ; i++) {
227 fprintf(ofp, "#define __%s\t\t%d\n", lc_names[i], i);
228 if (i + 1 < __LOCALE_DATA_CATEGORIES) {
229 pos[i+1] = 1 + strlen(lc_names[i]) + pos[i];
230 }
231 }
232 if (pos[__LOCALE_DATA_CATEGORIES-1] > 255) {
233 error_msg("lc_names is too big (%d)", pos[__LOCALE_DATA_CATEGORIES-1]);
234 }
235 fprintf(ofp, "#define __LC_ALL\t\t%d\n\n", i);
236
237 fprintf(ofp, "#define __lc_names_LEN\t\t%d\n",
238 pos[__LOCALE_DATA_CATEGORIES-1] + strlen(lc_names[__LOCALE_DATA_CATEGORIES-1]) + 1);
239 total_size += pos[__LOCALE_DATA_CATEGORIES-1] + strlen(lc_names[__LOCALE_DATA_CATEGORIES-1]) + 1;
240
241 fprintf(ofp, "static unsigned const char lc_names[%d] =\n",
242 pos[__LOCALE_DATA_CATEGORIES-1] + strlen(lc_names[__LOCALE_DATA_CATEGORIES-1]) + 1);
243 fprintf(ofp, "\t\"");
244 for (i=0 ; i < __LOCALE_DATA_CATEGORIES ; i++) {
245 fprintf(ofp, "\\x%02x", (unsigned char) pos[i]);
246 }
247 fprintf(ofp, "\"");
248 for (i=0 ; i < __LOCALE_DATA_CATEGORIES ; i++) {
249 fprintf(ofp, "\n\t\"%s\\0\"", lc_names[i]);
250 }
251 fprintf(ofp, ";\n\n");
252 }
253
254 verbose_msg(VDETAIL,"locale data = %d name data = %d for %d uniq\n",
255 num_locales * (3 + __LOCALE_DATA_CATEGORIES), uniq * 5, uniq);
256
257 total_size += num_locales * (3 + __LOCALE_DATA_CATEGORIES) + uniq * 5;
258 }
259
260}
261
262static void read_at_mappings(void)
263{
264 char *p;
265 char *m;
266 int mc = 0;
267
268 do {
269 if (!(p = strtok(line_buf, " \t\n")) || (*p == '#')) {
270 if (!fgets(line_buf, sizeof(line_buf), fp)) {
271 if (ferror(fp)) {
272 error_msg("reading file");
273 }
274 return; /* EOF */
275 }
276 if ((*line_buf == '#') && (line_buf[1] == '-')) {
277 break;
278 }
279 continue;
280 }
281 if (*p == '@') {
282 if (p[1] == 0) {
283 error_msg("missing @modifier name");
284 }
285 m = p; /* save the modifier name */
286 if (!(p = strtok(NULL, " \t\n")) || p[1] || (((unsigned char) *p) > 0x7f)) {
287 error_msg("missing or illegal @modifier mapping char");
288 }
289 if (at_mappings[(int)((unsigned char) *p)]) {
290 error_msg("reused @modifier mapping char");
291 }
292 at_mappings[(int)((unsigned char) *p)] = 1;
293 at_mapto[mc] = *p;
294 ++mc;
295 *at_strings_end = (char)( (unsigned char) (strlen(m)) );
296 strcpy(++at_strings_end, m+1);
297 at_strings_end += (unsigned char) at_strings_end[-1];
298
299 verbose_msg(VDETAIL,"@mapping: \"%s\" to '%c'\n", m, *p);
300
301 if (((p = strtok(NULL, " \t\n")) != NULL) && (*p != '#')) {
302 fprintf(stderr,"ignoring trailing text: %s...\n", p);
303 }
304 *line_buf = 0;
305 continue;
306 }
307 break;
308 } while (1);
309
310#if 0
311 {
312 p = at_strings;
313
314 if (!*p) {
315 verbose_msg(VDETAIL,"no @ strings\n");
316 return;
317 }
318
319 do {
320 verbose_msg(VDETAIL,"%s\n", p+1);
321 p += 1 + (unsigned char) *p;
322 } while (*p);
323 }
324#endif
325}
326
327static void read_enable_disable(void)
328{
329 char *p;
330
331 do {
332 if (!(p = strtok(line_buf, " =\t\n")) || (*p == '#')) {
333 if (!fgets(line_buf, sizeof(line_buf), fp)) {
334 if (ferror(fp)) {
335 error_msg("reading file");
336 }
337 return; /* EOF */
338 }
339 if ((*line_buf == '#') && (line_buf[1] == '-')) {
340 break;
341 }
342 continue;
343 }
344 if (!strcmp(p, "UTF-8")) {
345 if (!(p = strtok(NULL, " =\t\n"))
346 || ((toupper(*p) != 'Y') && (toupper(*p) != 'N'))) {
347 error_msg("missing or illegal UTF-8 setting");
348 }
349 default_utf8 = (toupper(*p) == 'Y');
350 verbose_msg(VINFO,"UTF-8 locales are %sabled\n", "dis\0en"+ (default_utf8 << 2));
351 } else if (!strcmp(p, "8-BIT")) {
352 if (!(p = strtok(NULL, " =\t\n"))
353 || ((toupper(*p) != 'Y') && (toupper(*p) != 'N'))) {
354 error_msg("missing or illegal 8-BIT setting");
355 }
356 default_8bit = (toupper(*p) == 'Y');
357 verbose_msg(VINFO,"8-BIT locales are %sabled\n", "dis\0en" + (default_8bit << 2));
358 } else {
359 break;
360 }
361
362 if (((p = strtok(NULL, " \t\n")) != NULL) && (*p != '#')) {
363 fprintf(stderr,"ignoring trailing text: %s...\n", p);
364 }
365 *line_buf = 0;
366 continue;
367
368 } while (1);
369}
370
371#ifdef __LOCALE_DATA_CODESET_LIST
372
373static int find_codeset_num(const char *cs)
374{
375 int r = 2;
376 char *s = __LOCALE_DATA_CODESET_LIST;
377
378 /* 7-bit is 1, UTF-8 is 2, 8-bits are > 2 */
379
380 if (strcmp(cs, "UTF-8") != 0) {
381 ++r;
382 while (*s && strcmp(__LOCALE_DATA_CODESET_LIST+ ((unsigned char) *s), cs)) {
383/* verbose_msg(VDETAIL,"tried %s\n", __LOCALE_DATA_CODESET_LIST + ((unsigned char) *s)); */
384 ++r;
385 ++s;
386 }
387 if (!*s) {
388 error_msg("unsupported codeset %s", cs);
389 }
390 }
391 return r;
392}
393
394#else
395
396static int find_codeset_num(const char *cs)
397{
398 int r = 2;
399
400 /* 7-bit is 1, UTF-8 is 2, 8-bits are > 2 */
401
402 if (strcmp(cs, "UTF-8") != 0) {
403 error_msg("unsupported codeset %s", cs);
404 }
405 return r;
406}
407
408#endif
409
410static int find_at_string_num(const char *as)
411{
412 int i = 0;
413 char *p = at_strings;
414
415 while (*p) {
416 if (!strcmp(p+1, as)) {
417 return i;
418 }
419 ++i;
420 p += 1 + (unsigned char) *p;
421 }
422
423 error_msg("error: unmapped @string %s", as);
424}
425
426static void read_locale_list(void)
427{
428 char *p;
429 char *s;
430 char *ln; /* locale name */
431 char *ls; /* locale name ll_CC */
432 char *as; /* at string */
433 char *ds; /* dot string */
434 char *cs; /* codeset */
435 int i;
436
437 typedef struct {
438 char *glibc_name;
439 char name[5];
440 char dot_cs; /* 0 if no codeset specified */
441 char cs;
442 } locale_entry;
443
444 /* First the C locale. */
445 locales[0].glibc_name = locales[0].name;
446 strncpy(locales[0].name,"C",5);
447 locales[0].dot_cs = 0;
448 locales[0].cs = 1; /* 7-bit encoding */
449 ++num_locales;
450
451 do {
452 if (!(p = strtok(line_buf, " \t\n")) || (*p == '#')) {
453 if (!fgets(line_buf, sizeof(line_buf), fp)) {
454 if (ferror(fp)) {
455 error_msg("reading file");
456 }
457 return; /* EOF */
458 }
459 if ((*line_buf == '#') && (line_buf[1] == '-')) {
460 break;
461 }
462 continue;
463 }
464
465 s = glibc_locale_names;
466 for (i=0 ; i < num_locales ; i++) {
467 if (!strcmp(s+1, p)) {
468 break;
469 }
470 s += 1 + ((unsigned char) *s);
471 }
472 if (i < num_locales) {
473 fprintf(stderr,"ignoring duplicate locale name: %s", p);
474 *line_buf = 0;
475 continue;
476 }
477
478 /* New locale, but don't increment num until codeset verified! */
479 *s = (char)((unsigned char) (strlen(p) + 1));
480 strcpy(s+1, p);
481 locales[num_locales].glibc_name = s+1;
482 ln = p; /* save locale name */
483
484 if (!(p = strtok(NULL, " \t\n"))) {
485 error_msg("missing codeset for locale %s", ln);
486 }
487 cs = p;
488 i = find_codeset_num(p);
489 if ((i == 2) && !default_utf8) {
490 fprintf(stderr,"ignoring UTF-8 locale %s\n", ln);
491 *line_buf = 0;
492 continue;
493 } else if ((i > 2) && !default_8bit) {
494 fprintf(stderr,"ignoring 8-bit codeset locale %s\n", ln);
495 *line_buf = 0;
496 continue;
497 }
498 locales[num_locales].cs = (char)((unsigned char) i);
499
500 if (((p = strtok(NULL, " \t\n")) != NULL) && (*p != '#')) {
501 verbose_msg(VINFO,"ignoring trailing text: %s...\n", p);
502 }
503
504 /* Now go back to locale string for .codeset and @modifier */
505 as = strtok(ln, "@");
506 if (as) {
507 as = strtok(NULL, "@");
508 }
509 ds = strtok(ln, ".");
510 if (ds) {
511 ds = strtok(NULL, ".");
512 }
513 ls = ln;
514
515 if ((strlen(ls) != 5) || (ls[2] != '_')) {
516 error_msg("illegal locale name %s", ls);
517 }
518
519 i = 0; /* value for unspecified codeset */
520 if (ds) {
521 i = find_codeset_num(ds);
522 if ((i == 2) && !default_utf8) {
523 fprintf(stderr,"ignoring UTF-8 locale %s\n", ln);
524 *line_buf = 0;
525 continue;
526 } else if ((i > 2) && !default_8bit) {
527 fprintf(stderr,"ignoring 8-bit codeset locale %s\n", ln);
528 *line_buf = 0;
529 continue;
530 }
531 }
532 locales[num_locales].dot_cs = (char)((unsigned char) i);
533
534 if (as) {
535 i = find_at_string_num(as);
536 ls[2] = at_mapto[i];
537 }
538 memcpy(locales[num_locales].name, ls, 5);
539/* verbose_msg(VDETAIL,"locale: %5.5s %2d %2d %s\n", */
540/* locales[num_locales].name, */
541/* locales[num_locales].cs, */
542/* locales[num_locales].dot_cs, */
543/* locales[num_locales].glibc_name */
544/* ); */
545 ++num_locales;
546 *line_buf = 0;
547 } while (1);
548}
549
550static int le_cmp(const void *a, const void *b)
551{
552 const locale_entry *p;
553 const locale_entry *q;
554 int r;
555
556 p = (const locale_entry *) a;
557 q = (const locale_entry *) b;
558
559 if (!(r = p->name[0] - q->name[0])
560 && !(r = p->name[1] - q->name[1])
561 && !(r = p->name[3] - q->name[3])
562 && !(r = p->name[4] - q->name[4])
563 && !(r = p->name[2] - q->name[2])
564 && !(r = -(p->cs - q->cs))
565 ) {
566 r = -(p->dot_cs - q->dot_cs);
567 /* Reverse the ordering of the codesets so UTF-8 comes last.
568 * Work-around (hopefully) for glibc bug affecting at least
569 * the euro currency symbol. */
570 }
571
572 return r;
573}
574
575int main(int argc, char **argv)
576{
577 char *output_file = NULL;
578
579 while (--argc) {
580 ++argv;
581 if (!strcmp(*argv, "-o")) {
582 --argc;
583 output_file = strdup(*++argv);
584 } else if (!strcmp(*argv, "-v")) {
585 verbose++;
586 } else if (!(fp = fopen(*argv, "r"))) {
587no_inputfile:
588 error_msg("missing filename or file!");
589 }
590 }
591 if (fp == NULL)
592 goto no_inputfile;
593 if (output_file == NULL)
594 output_file = strdup("locale_tables.h");
595
596 at_strings_end = at_strings;
597
598 read_at_mappings();
599 read_enable_disable();
600 read_locale_list();
601
602 fclose(fp);
603
604 /* handle C locale specially */
605 qsort(locales+1, num_locales-1, sizeof(locale_entry), le_cmp);
606
607#if 0
608 for (i=0 ; i < num_locales ; i++) {
609 verbose_msg(VDETAIL,"locale: %5.5s %2d %2d %s\n",
610 locales[i].name,
611 locales[i].cs,
612 locales[i].dot_cs,
613 locales[i].glibc_name
614 );
615 }
616#endif
617 if (argc == 3)
618 output_file = *++argv;
619 if (output_file == NULL || !(ofp = fopen(output_file, "w"))) {
620 error_msg("cannot open output file '%s'!", output_file);
621 }
622
623 do_lc_time();
624 do_lc_numeric();
625 do_lc_monetary();
626 do_lc_messages();
627 do_lc_ctype();
628
629 do_locale_names();
630
631 fclose(ofp);
632
633 verbose_msg(VINFO, "total data size = %d\n", total_size);
634 verbose_msg(VDETAIL, "null count = %d\n", null_count);
635
636 return EXIT_SUCCESS;
637}
638
639static char *idx[10000];
640static char buf[100000];
641static char *last;
642static int uniq;
643
644static int addblock(const char *s, size_t n) /* l includes nul terminator */
645{
646 int j;
647
648 if (!s) {
649 ++null_count;
650 return 0;
651 }
652
653 for (j=0 ; (j < uniq) && (idx[j] + n < last) ; j++) {
654 if (!memcmp(s, idx[j], n)) {
655 return idx[j] - buf;
656 }
657 }
658 if (uniq >= sizeof(idx)) {
659 error_msg("too many uniq strings!");
660 }
661 if (last + n >= buf + sizeof(buf)) {
662 error_msg("need to increase size of buf!");
663 }
664
665 idx[uniq] = last;
666 ++uniq;
667 memcpy(last, s, n);
668 last += n;
669 return idx[uniq - 1] - buf;
670}
671
672static int addstring(const char *s)
673{
674 int j;
675 size_t l;
676
677 if (!s) {
678 ++null_count;
679 return 0;
680 }
681
682 for (j=0 ; j < uniq ; j++) {
683 if (!strcmp(s, idx[j])) {
684 return idx[j] - buf;
685 }
686 }
687 if (uniq >= sizeof(idx)) {
688 error_msg("too many uniq strings!");
689 }
690 l = strlen(s) + 1;
691 if (last + l >= buf + sizeof(buf)) {
692 error_msg("need to increase size of buf!");
693 }
694
695 idx[uniq] = last;
696 ++uniq;
697 strcpy(last, s);
698 last += l;
699 return idx[uniq - 1] - buf;
700}
701
702#define DO_LC_COMMON(CATEGORY) \
703 verbose_msg(VDETAIL, "buf-size=%d uniq=%d rows=%d\n", \
704 (int)(last - buf), uniq, lc_##CATEGORY##_uniq); \
705 verbose_msg(VDETAIL, "total = %d + %d * %d + %d = %d\n", \
706 num_locales, lc_##CATEGORY##_uniq, NUM_NL_##CATEGORY, (int)(last - buf), \
707 i = num_locales + lc_##CATEGORY##_uniq*NUM_NL_##CATEGORY + (int)(last - buf)); \
708 total_size += i; \
709 dump_table8c("__lc_" #CATEGORY "_data", buf, (int)(last - buf)); \
710 for (i=0 ; i < lc_##CATEGORY##_uniq ; i++) { \
711 m = locales[i].lc_##CATEGORY##_row; \
712 for (k=0 ; k < NUM_NL_##CATEGORY ; k++) { \
713 buf[NUM_NL_##CATEGORY*i + k] = (char)((unsigned char) lc_##CATEGORY##_uniq_X[i][k]); \
714 } \
715 } \
716 dump_table8("__lc_" #CATEGORY "_rows", buf, lc_##CATEGORY##_uniq * NUM_NL_##CATEGORY); \
717 buf16[0] =0; \
718 for (i=0 ; i < NUM_NL_##CATEGORY - 1 ; i++) { \
719 buf16[i+1] = buf16[i] + lc_##CATEGORY##_count[i]; \
720 } \
721 dump_table16("__lc_" #CATEGORY "_item_offsets", buf16, NUM_NL_##CATEGORY); \
722 m = 0; \
723 for (k=0 ; k < NUM_NL_##CATEGORY ; k++) { \
724 for (i=0 ; i < lc_##CATEGORY##_count[k] ; i++) { \
725 buf16[m] = lc_##CATEGORY##_item[k][i]; \
726 ++m; \
727 } \
728 } \
729 dump_table16("__lc_" #CATEGORY "_item_idx", buf16, m);
730
731
732#define DL_LC_LOOPTAIL(CATEGORY) \
733 if (k > NUM_NL_##CATEGORY) { \
734 error_msg("lc_" #CATEGORY " nl_item count > %d!", NUM_NL_##CATEGORY); \
735 } \
736 { \
737 int r; \
738 for (r=0 ; r < lc_##CATEGORY##_uniq ; r++) { \
739 if (!memcmp(lc_##CATEGORY##_uniq_X[lc_##CATEGORY##_uniq], \
740 lc_##CATEGORY##_uniq_X[r], NUM_NL_##CATEGORY)) { \
741 break; \
742 } \
743 } \
744 if (r == lc_##CATEGORY##_uniq) { /* new locale row */ \
745 ++lc_##CATEGORY##_uniq; \
746 if (lc_##CATEGORY##_uniq > 255) { \
747 error_msg("too many unique lc_" #CATEGORY " rows!"); \
748 } \
749 } \
750 locales[i].lc_##CATEGORY##_row = r; \
751 }
752
753
754
755static int buf16[100*256];
756
757static void dump_table8(const char *name, const char *tbl, int len)
758{
759 int i;
760
761 fprintf(ofp, "#define %s_LEN\t\t%d\n", name, len);
762 fprintf(ofp, "static const unsigned char %s[%d] = {", name, len);
763 for (i=0 ; i < len ; i++) {
764 if ((i % 12) == 0) {
765 fprintf(ofp, "\n\t");
766 }
767 fprintf(ofp, "%#4x, ", (int)((unsigned char) tbl[i]));
768 }
769 fprintf(ofp, "\n};\n\n");
770}
771
772#define __C_isdigit(c) \
773 ((sizeof(c) == sizeof(char)) \
774 ? (((unsigned char)((c) - '0')) < 10) \
775 : (((unsigned int)((c) - '0')) < 10))
776#define __C_isalpha(c) \
777 ((sizeof(c) == sizeof(char)) \
778 ? (((unsigned char)(((c) | 0x20) - 'a')) < 26) \
779 : (((unsigned int)(((c) | 0x20) - 'a')) < 26))
780#define __C_isalnum(c) (__C_isalpha(c) || __C_isdigit(c))
781
782static void dump_table8c(const char *name, const char *tbl, int len)
783{
784 int i;
785
786 fprintf(ofp, "#define %s_LEN\t\t%d\n", name, len);
787 fprintf(ofp, "static const unsigned char %s[%d] = {", name, len);
788 for (i=0 ; i < len ; i++) {
789 if ((i % 12) == 0) {
790 fprintf(ofp, "\n\t");
791 }
792 if (__C_isalnum(tbl[i]) || (tbl[i] == ' ')) {
793 fprintf(ofp, " '%c', ", (int)((unsigned char) tbl[i]));
794 } else {
795 fprintf(ofp, "%#4x, ", (int)((unsigned char) tbl[i]));
796 }
797 }
798 fprintf(ofp, "\n};\n\n");
799}
800
801static void dump_table16(const char *name, const int *tbl, int len)
802{
803 int i;
804
805 fprintf(ofp, "#define %s_LEN\t\t%d\n", name, len);
806 fprintf(ofp, "static const uint16_t %s[%d] = {", name, len);
807 for (i=0 ; i < len ; i++) {
808 if ((i % 8) == 0) {
809 fprintf(ofp, "\n\t");
810 }
811 if (tbl[i] != (uint16_t) tbl[i]) {
812 error_msg("falls outside uint16 range!");
813 }
814 fprintf(ofp, "%#6x, ", tbl[i]);
815 }
816 fprintf(ofp, "\n};\n\n");
817}
818
819
820#define NUM_NL_time 50
821
822static int lc_time_item[NUM_NL_time][256];
823static int lc_time_count[NUM_NL_time];
824static unsigned char lc_time_uniq_X[700][NUM_NL_time];
825static int lc_time_uniq;
826
827#define DO_NL_S(X) lc_time_S(X, k++)
828
829static void lc_time_S(int X, int k)
830{
831 size_t len;
832 int j, m;
833 const char *s = nl_langinfo(X);
834 const char *p;
835 static const char nulbuf[] = "";
836
837 if (X == ALT_DIGITS) {
838 len = 1;
839 if (!s) {
840 s = nulbuf;
841 }
842 if (*s) {
843 p = s;
844 for (j = 0 ; j < 100 ; j++) {
845 while (*p) {
846 ++p;
847 }
848 ++p;
849 }
850 len = p - s;
851 }
852 j = addblock(s, len);
853/* if (len > 1) verbose_msg(VDETAIL, "alt_digit: called addblock with len %zd\n", len); */
854 } else if (X == ERA) {
855 if (!s) {
856 s = nulbuf;
857 }
858 p = s;
859 while (*p) {
860 while (*p) {
861 ++p;
862 }
863 ++p;
864 }
865 ++p;
866 j = addblock(s, p - s);
867/* if (p-s > 1) verbose_msg(VDETAIL, "era: called addblock with len %d\n", p-s); */
868 } else {
869 j = addstring(s);
870 }
871 for (m=0 ; m < lc_time_count[k] ; m++) {
872 if (lc_time_item[k][m] == j) {
873 break;
874 }
875 }
876 if (m == lc_time_count[k]) { /* new for this nl_item */
877 if (m > 255) {
878 error_msg("too many nl_item %d entries in lc_time", k);
879 }
880 lc_time_item[k][m] = j;
881 ++lc_time_count[k];
882 }
883 lc_time_uniq_X[lc_time_uniq][k] = m;
884}
885
886static void do_lc_time(void)
887{
888 int i, k, m;
889
890 last = buf+1;
891 uniq = 1;
892 *buf = 0;
893 *idx = buf;
894
895 for (i=0 ; i < num_locales ; i++) {
896 k = 0;
897
898 if (!setlocale(LC_ALL, locales[i].glibc_name)) {
899 verbose_msg(VDETAIL, "setlocale(LC_ALL,%s) failed!\n",
900 locales[i].glibc_name);
901 }
902
903 DO_NL_S(ABDAY_1);
904 DO_NL_S(ABDAY_2);
905 DO_NL_S(ABDAY_3);
906 DO_NL_S(ABDAY_4);
907 DO_NL_S(ABDAY_5);
908 DO_NL_S(ABDAY_6);
909 DO_NL_S(ABDAY_7);
910
911 DO_NL_S(DAY_1);
912 DO_NL_S(DAY_2);
913 DO_NL_S(DAY_3);
914 DO_NL_S(DAY_4);
915 DO_NL_S(DAY_5);
916 DO_NL_S(DAY_6);
917 DO_NL_S(DAY_7);
918
919 DO_NL_S(ABMON_1);
920 DO_NL_S(ABMON_2);
921 DO_NL_S(ABMON_3);
922 DO_NL_S(ABMON_4);
923 DO_NL_S(ABMON_5);
924 DO_NL_S(ABMON_6);
925 DO_NL_S(ABMON_7);
926 DO_NL_S(ABMON_8);
927 DO_NL_S(ABMON_9);
928 DO_NL_S(ABMON_10);
929 DO_NL_S(ABMON_11);
930 DO_NL_S(ABMON_12);
931
932 DO_NL_S(MON_1);
933 DO_NL_S(MON_2);
934 DO_NL_S(MON_3);
935 DO_NL_S(MON_4);
936 DO_NL_S(MON_5);
937 DO_NL_S(MON_6);
938 DO_NL_S(MON_7);
939 DO_NL_S(MON_8);
940 DO_NL_S(MON_9);
941 DO_NL_S(MON_10);
942 DO_NL_S(MON_11);
943 DO_NL_S(MON_12);
944
945 DO_NL_S(AM_STR);
946 DO_NL_S(PM_STR);
947
948 DO_NL_S(D_T_FMT);
949 DO_NL_S(D_FMT);
950 DO_NL_S(T_FMT);
951 DO_NL_S(T_FMT_AMPM);
952 DO_NL_S(ERA);
953
954 DO_NL_S(ERA_YEAR); /* non SuSv3 */
955 DO_NL_S(ERA_D_FMT);
956 DO_NL_S(ALT_DIGITS);
957 DO_NL_S(ERA_D_T_FMT);
958 DO_NL_S(ERA_T_FMT);
959
960 DL_LC_LOOPTAIL(time)
961 }
962
963 DO_LC_COMMON(time)
964}
965
966#undef DO_NL_S
967
968#define NUM_NL_numeric 3
969
970static int lc_numeric_item[NUM_NL_numeric][256];
971static int lc_numeric_count[NUM_NL_numeric];
972static unsigned char lc_numeric_uniq_X[700][NUM_NL_numeric];
973static int lc_numeric_uniq;
974
975#define DO_NL_S(X) lc_numeric_S(X, k++)
976
977static void lc_numeric_S(int X, int k)
978{
979 int j, m;
980 char buf[256];
981 char *e;
982 char *s;
983 char c;
984
985 s = nl_langinfo(X);
986 if (X == GROUPING) {
987 if (s) {
988 if ((*s == CHAR_MAX) || (*s == -1)) { /* stupid glibc... :-( */
989 s = "";
990 }
991 e = s;
992 c = 0;
993 while (*e) { /* find end of string */
994 if (*e == CHAR_MAX) {
995 c = CHAR_MAX;
996 ++e;
997 break;
998 }
999 ++e;
1000 }
1001 if ((e - s) > sizeof(buf)) {
1002 error_msg("grouping specifier too long");
1003 }
1004 strncpy(buf, s, (e-s));
1005 e = buf + (e-s);
1006 *e = 0; /* Make sure we're null-terminated. */
1007
1008 if (c != CHAR_MAX) { /* remove duplicate repeats */
1009 while (e > buf) {
1010 --e;
1011 if (*e != e[-1]) {
1012 break;
1013 }
1014 }
1015 *++e = 0;
1016 }
1017 s = buf;
1018 }
1019 }
1020 j = addstring(s);
1021 for (m=0 ; m < lc_numeric_count[k] ; m++) {
1022 if (lc_numeric_item[k][m] == j) {
1023 break;
1024 }
1025 }
1026 if (m == lc_numeric_count[k]) { /* new for this nl_item */
1027 if (m > 255) {
1028 error_msg("too many nl_item %d entries in lc_numeric", k);
1029 }
1030 lc_numeric_item[k][m] = j;
1031 ++lc_numeric_count[k];
1032 }
1033/* verbose_msg(VDETAIL, "\\x%02x", m); */
1034 lc_numeric_uniq_X[lc_numeric_uniq][k] = m;
1035}
1036
1037static void do_lc_numeric(void)
1038{
1039 int i, k, m;
1040
1041 last = buf+1;
1042 uniq = 1;
1043 *buf = 0;
1044 *idx = buf;
1045
1046 for (i=0 ; i < num_locales ; i++) {
1047 k = 0;
1048
1049 if (!setlocale(LC_ALL, locales[i].glibc_name)) {
1050 verbose_msg(VDETAIL,"setlocale(LC_ALL,%s) failed!\n",
1051 locales[i].glibc_name);
1052 }
1053
1054 DO_NL_S(RADIXCHAR); /* DECIMAL_POINT */
1055 DO_NL_S(THOUSEP); /* THOUSANDS_SEP */
1056 DO_NL_S(GROUPING);
1057
1058 DL_LC_LOOPTAIL(numeric)
1059 }
1060
1061 DO_LC_COMMON(numeric)
1062}
1063
1064#undef DO_NL_S
1065
1066#define NUM_NL_monetary (7+14+1)
1067
1068static int lc_monetary_item[NUM_NL_monetary][256];
1069static int lc_monetary_count[NUM_NL_monetary];
1070static unsigned char lc_monetary_uniq_X[700][NUM_NL_monetary];
1071static int lc_monetary_uniq;
1072
1073#define DO_NL_S(X) lc_monetary_S(X, k++)
1074
1075/* #define DO_NL_C(X) verbose_msg(VDETAIL,"%#02x", (int)(unsigned char)(*nl_langinfo(X))); */
1076#define DO_NL_C(X) lc_monetary_C(X, k++)
1077
1078static void lc_monetary_C(int X, int k)
1079{
1080 int j, m;
1081 char c_buf[2];
1082
1083#warning fix the char entries for monetary... target signedness of char may be different!
1084
1085 c_buf[1] = 0;
1086 c_buf[0] = *nl_langinfo(X);
1087 j = addstring(c_buf);
1088 for (m=0 ; m < lc_monetary_count[k] ; m++) {
1089 if (lc_monetary_item[k][m] == j) {
1090 break;
1091 }
1092 }
1093 if (m == lc_monetary_count[k]) { /* new for this nl_item */
1094 if (m > 255) {
1095 error_msg("too many nl_item %d entries in lc_monetary", k);
1096 }
1097 lc_monetary_item[k][m] = j;
1098 ++lc_monetary_count[k];
1099 }
1100/* verbose_msg(VDETAIL,"\\x%02x", m); */
1101 lc_monetary_uniq_X[lc_monetary_uniq][k] = m;
1102}
1103
1104
1105static void lc_monetary_S(int X, int k)
1106{
1107 int j, m;
1108 char buf[256];
1109 char *e;
1110 char *s;
1111 char c;
1112
1113 s = nl_langinfo(X);
1114 if (X == MON_GROUPING) {
1115 if (s) {
1116 if ((*s == CHAR_MAX) || (*s == -1)) { /* stupid glibc... :-( */
1117 s = "";
1118 }
1119 e = s;
1120 c = 0;
1121 while (*e) { /* find end of string */
1122 if (*e == CHAR_MAX) {
1123 c = CHAR_MAX;
1124 ++e;
1125 break;
1126 }
1127 ++e;
1128 }
1129 if ((e - s) > sizeof(buf)) {
1130 error_msg("mon_grouping specifier too long");
1131 }
1132 strncpy(buf, s, (e-s));
1133 e = buf + (e-s);
1134 *e = 0; /* Make sure we're null-terminated. */
1135
1136 if (c != CHAR_MAX) { /* remove duplicate repeats */
1137 while (e > buf) {
1138 --e;
1139 if (*e != e[-1]) {
1140 break;
1141 }
1142 }
1143 *++e = 0;
1144 }
1145 s = buf;
1146 }
1147 }
1148 j = addstring(s);
1149 for (m=0 ; m < lc_monetary_count[k] ; m++) {
1150 if (lc_monetary_item[k][m] == j) {
1151 break;
1152 }
1153 }
1154 if (m == lc_monetary_count[k]) { /* new for this nl_item */
1155 if (m > 255) {
1156 error_msg("too many nl_item %d entries in lc_monetary", k);
1157 }
1158 lc_monetary_item[k][m] = j;
1159 ++lc_monetary_count[k];
1160 }
1161/* verbose_msg(VDETAIL,"\\x%02x", m); */
1162 lc_monetary_uniq_X[lc_monetary_uniq][k] = m;
1163}
1164
1165static void do_lc_monetary(void)
1166{
1167 int i, k, m;
1168
1169 last = buf+1;
1170 uniq = 1;
1171 *buf = 0;
1172 *idx = buf;
1173
1174 for (i=0 ; i < num_locales ; i++) {
1175 k = 0;
1176
1177 if (!setlocale(LC_ALL, locales[i].glibc_name)) {
1178 verbose_msg(VDETAIL,"setlocale(LC_ALL,%s) failed!\n",
1179 locales[i].glibc_name);
1180 }
1181
1182
1183 /* non SUSv3 */
1184 DO_NL_S(INT_CURR_SYMBOL);
1185 DO_NL_S(CURRENCY_SYMBOL);
1186 DO_NL_S(MON_DECIMAL_POINT);
1187 DO_NL_S(MON_THOUSANDS_SEP);
1188 DO_NL_S(MON_GROUPING);
1189 DO_NL_S(POSITIVE_SIGN);
1190 DO_NL_S(NEGATIVE_SIGN);
1191 DO_NL_C(INT_FRAC_DIGITS);
1192 DO_NL_C(FRAC_DIGITS);
1193 DO_NL_C(P_CS_PRECEDES);
1194 DO_NL_C(P_SEP_BY_SPACE);
1195 DO_NL_C(N_CS_PRECEDES);
1196 DO_NL_C(N_SEP_BY_SPACE);
1197 DO_NL_C(P_SIGN_POSN);
1198 DO_NL_C(N_SIGN_POSN);
1199 DO_NL_C(INT_P_CS_PRECEDES);
1200 DO_NL_C(INT_P_SEP_BY_SPACE);
1201 DO_NL_C(INT_N_CS_PRECEDES);
1202 DO_NL_C(INT_N_SEP_BY_SPACE);
1203 DO_NL_C(INT_P_SIGN_POSN);
1204 DO_NL_C(INT_N_SIGN_POSN);
1205
1206 DO_NL_S(CRNCYSTR); /* CURRENCY_SYMBOL */
1207
1208 DL_LC_LOOPTAIL(monetary)
1209 }
1210
1211 DO_LC_COMMON(monetary)
1212}
1213
1214
1215#undef DO_NL_S
1216
1217#define NUM_NL_messages 4
1218
1219static int lc_messages_item[NUM_NL_messages][256];
1220static int lc_messages_count[NUM_NL_messages];
1221static unsigned char lc_messages_uniq_X[700][NUM_NL_messages];
1222static int lc_messages_uniq;
1223
1224#define DO_NL_S(X) lc_messages_S(X, k++)
1225
1226static void lc_messages_S(int X, int k)
1227{
1228 int j, m;
1229 j = addstring(nl_langinfo(X));
1230 for (m=0 ; m < lc_messages_count[k] ; m++) {
1231 if (lc_messages_item[k][m] == j) {
1232 break;
1233 }
1234 }
1235 if (m == lc_messages_count[k]) { /* new for this nl_item */
1236 if (m > 255) {
1237 error_msg("too many nl_item %d entries in lc_messages", k);
1238 }
1239 lc_messages_item[k][m] = j;
1240 ++lc_messages_count[k];
1241 }
1242/* verbose_msg(VDETAIL, "\\x%02x", m); */
1243 lc_messages_uniq_X[lc_messages_uniq][k] = m;
1244}
1245
1246static void do_lc_messages(void)
1247{
1248 int i, k, m;
1249
1250 last = buf+1;
1251 uniq = 1;
1252 *buf = 0;
1253 *idx = buf;
1254
1255 for (i=0 ; i < num_locales ; i++) {
1256 k = 0;
1257
1258 if (!setlocale(LC_ALL, locales[i].glibc_name)) {
1259 verbose_msg(VDETAIL, "setlocale(LC_ALL,%s) failed!\n",
1260 locales[i].glibc_name);
1261 }
1262
1263 DO_NL_S(YESEXPR);
1264 DO_NL_S(NOEXPR);
1265 DO_NL_S(YESSTR);
1266 DO_NL_S(NOSTR);
1267
1268 DL_LC_LOOPTAIL(messages)
1269 }
1270
1271 DO_LC_COMMON(messages)
1272}
1273
1274#undef DO_NL_S
1275
1276#define NUM_NL_ctype 10
1277
1278static int lc_ctype_item[NUM_NL_ctype][256];
1279static int lc_ctype_count[NUM_NL_ctype];
1280static unsigned char lc_ctype_uniq_X[700][NUM_NL_ctype];
1281static int lc_ctype_uniq;
1282
1283#define DO_NL_S(X) lc_ctype_S(X, k++)
1284
1285static void lc_ctype_S(int X, int k)
1286{
1287 int j, m;
1288 j = addstring(nl_langinfo(X));
1289 for (m=0 ; m < lc_ctype_count[k] ; m++) {
1290 if (lc_ctype_item[k][m] == j) {
1291 break;
1292 }
1293 }
1294 if (m == lc_ctype_count[k]) { /* new for this nl_item */
1295 if (m > 255) {
1296 error_msg("too many nl_item %d entries in lc_ctype", k);
1297 }
1298 lc_ctype_item[k][m] = j;
1299 ++lc_ctype_count[k];
1300 }
1301/* verbose_msg(VDETAIL, "\\x%02x", m); */
1302 lc_ctype_uniq_X[lc_ctype_uniq][k] = m;
1303}
1304
1305static void do_lc_ctype(void)
1306{
1307 int i, k, m;
1308
1309 last = buf+1;
1310 uniq = 1;
1311 *buf = 0;
1312 *idx = buf;
1313
1314 for (i=0 ; i < num_locales ; i++) {
1315 k = 0;
1316
1317 if (!setlocale(LC_ALL, locales[i].glibc_name)) {
1318 verbose_msg(VDETAIL, "setlocale(LC_ALL,%s) failed!\n",
1319 locales[i].glibc_name);
1320 }
1321
1322 DO_NL_S(_NL_CTYPE_OUTDIGIT0_MB);
1323 DO_NL_S(_NL_CTYPE_OUTDIGIT1_MB);
1324 DO_NL_S(_NL_CTYPE_OUTDIGIT2_MB);
1325 DO_NL_S(_NL_CTYPE_OUTDIGIT3_MB);
1326 DO_NL_S(_NL_CTYPE_OUTDIGIT4_MB);
1327 DO_NL_S(_NL_CTYPE_OUTDIGIT5_MB);
1328 DO_NL_S(_NL_CTYPE_OUTDIGIT6_MB);
1329 DO_NL_S(_NL_CTYPE_OUTDIGIT7_MB);
1330 DO_NL_S(_NL_CTYPE_OUTDIGIT8_MB);
1331 DO_NL_S(_NL_CTYPE_OUTDIGIT9_MB);
1332
1333 DL_LC_LOOPTAIL(ctype)
1334 }
1335
1336 DO_LC_COMMON(ctype)
1337}