blob: a39a94f2cc3508b42308c3b11eb116ef582e4a5b [file] [log] [blame]
lh9ed821d2023-04-07 01:36:19 -07001/* Copyright (C) 1995-2015 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <http://www.gnu.org/licenses/>. */
17
18#ifdef HAVE_CONFIG_H
19# include <config.h>
20#endif
21
22#include <errno.h>
23#include <error.h>
24#include <stdlib.h>
25#include <wchar.h>
26#include <stdint.h>
27#include <sys/param.h>
28
29#include "localedef.h"
30#include "charmap.h"
31#include "localeinfo.h"
32#include "linereader.h"
33#include "locfile.h"
34#include "elem-hash.h"
35#include "../localeinfo.h"
36
37/* Uncomment the following line in the production version. */
38/* #define NDEBUG 1 */
39#include <assert.h>
40
41#define obstack_chunk_alloc malloc
42#define obstack_chunk_free free
43
44static inline void
45__attribute ((always_inline))
46obstack_int32_grow (struct obstack *obstack, int32_t data)
47{
48 assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
49 data = maybe_swap_uint32 (data);
50 if (sizeof (int32_t) == sizeof (int))
51 obstack_int_grow (obstack, data);
52 else
53 obstack_grow (obstack, &data, sizeof (int32_t));
54}
55
56static inline void
57__attribute ((always_inline))
58obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
59{
60 assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
61 data = maybe_swap_uint32 (data);
62 if (sizeof (int32_t) == sizeof (int))
63 obstack_int_grow_fast (obstack, data);
64 else
65 obstack_grow (obstack, &data, sizeof (int32_t));
66}
67
68/* Forward declaration. */
69struct element_t;
70
71/* Data type for list of strings. */
72struct section_list
73{
74 /* Successor in the known_sections list. */
75 struct section_list *def_next;
76 /* Successor in the sections list. */
77 struct section_list *next;
78 /* Name of the section. */
79 const char *name;
80 /* First element of this section. */
81 struct element_t *first;
82 /* Last element of this section. */
83 struct element_t *last;
84 /* These are the rules for this section. */
85 enum coll_sort_rule *rules;
86 /* Index of the rule set in the appropriate section of the output file. */
87 int ruleidx;
88};
89
90struct element_t;
91
92struct element_list_t
93{
94 /* Number of elements. */
95 int cnt;
96
97 struct element_t **w;
98};
99
100/* Data type for collating element. */
101struct element_t
102{
103 const char *name;
104
105 const char *mbs;
106 size_t nmbs;
107 const uint32_t *wcs;
108 size_t nwcs;
109 int *mborder;
110 int wcorder;
111
112 /* The following is a bit mask which bits are set if this element is
113 used in the appropriate level. Interesting for the singlebyte
114 weight computation.
115
116 XXX The type here restricts the number of levels to 32. It could
117 be changed if necessary but I doubt this is necessary. */
118 unsigned int used_in_level;
119
120 struct element_list_t *weights;
121
122 /* Nonzero if this is a real character definition. */
123 int is_character;
124
125 /* Order of the character in the sequence. This information will
126 be used in range expressions. */
127 int mbseqorder;
128 int wcseqorder;
129
130 /* Where does the definition come from. */
131 const char *file;
132 size_t line;
133
134 /* Which section does this belong to. */
135 struct section_list *section;
136
137 /* Predecessor and successor in the order list. */
138 struct element_t *last;
139 struct element_t *next;
140
141 /* Next element in multibyte output list. */
142 struct element_t *mbnext;
143 struct element_t *mblast;
144
145 /* Next element in wide character output list. */
146 struct element_t *wcnext;
147 struct element_t *wclast;
148};
149
150/* Special element value. */
151#define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
152#define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
153#define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
154
155/* Data type for collating symbol. */
156struct symbol_t
157{
158 const char *name;
159
160 /* Point to place in the order list. */
161 struct element_t *order;
162
163 /* Where does the definition come from. */
164 const char *file;
165 size_t line;
166};
167
168/* Sparse table of struct element_t *. */
169#define TABLE wchead_table
170#define ELEMENT struct element_t *
171#define DEFAULT NULL
172#define ITERATE
173#define NO_ADD_LOCALE
174#include "3level.h"
175
176/* Sparse table of int32_t. */
177#define TABLE collidx_table
178#define ELEMENT int32_t
179#define DEFAULT 0
180#include "3level.h"
181
182/* Sparse table of uint32_t. */
183#define TABLE collseq_table
184#define ELEMENT uint32_t
185#define DEFAULT ~((uint32_t) 0)
186#include "3level.h"
187
188
189/* Simple name list for the preprocessor. */
190struct name_list
191{
192 struct name_list *next;
193 char str[0];
194};
195
196
197/* The real definition of the struct for the LC_COLLATE locale. */
198struct locale_collate_t
199{
200 int col_weight_max;
201 int cur_weight_max;
202
203 /* List of known scripts. */
204 struct section_list *known_sections;
205 /* List of used sections. */
206 struct section_list *sections;
207 /* Current section using definition. */
208 struct section_list *current_section;
209 /* There always can be an unnamed section. */
210 struct section_list unnamed_section;
211 /* Flag whether the unnamed section has been defined. */
212 bool unnamed_section_defined;
213 /* To make handling of errors easier we have another section. */
214 struct section_list error_section;
215 /* Sometimes we are defining the values for collating symbols before
216 the first actual section. */
217 struct section_list symbol_section;
218
219 /* Start of the order list. */
220 struct element_t *start;
221
222 /* The undefined element. */
223 struct element_t undefined;
224
225 /* This is the cursor for `reorder_after' insertions. */
226 struct element_t *cursor;
227
228 /* This value is used when handling ellipsis. */
229 struct element_t ellipsis_weight;
230
231 /* Known collating elements. */
232 hash_table elem_table;
233
234 /* Known collating symbols. */
235 hash_table sym_table;
236
237 /* Known collation sequences. */
238 hash_table seq_table;
239
240 struct obstack mempool;
241
242 /* The LC_COLLATE category is a bit special as it is sometimes possible
243 that the definitions from more than one input file contains information.
244 Therefore we keep all relevant input in a list. */
245 struct locale_collate_t *next;
246
247 /* Arrays with heads of the list for each of the leading bytes in
248 the multibyte sequences. */
249 struct element_t *mbheads[256];
250
251 /* Arrays with heads of the list for each of the leading bytes in
252 the multibyte sequences. */
253 struct wchead_table wcheads;
254
255 /* The arrays with the collation sequence order. */
256 unsigned char mbseqorder[256];
257 struct collseq_table wcseqorder;
258
259 /* State of the preprocessor. */
260 enum
261 {
262 else_none = 0,
263 else_ignore,
264 else_seen
265 }
266 else_action;
267};
268
269
270/* We have a few global variables which are used for reading all
271 LC_COLLATE category descriptions in all files. */
272static uint32_t nrules;
273
274/* List of defined preprocessor symbols. */
275static struct name_list *defined;
276
277
278/* We need UTF-8 encoding of numbers. */
279static inline int
280__attribute ((always_inline))
281utf8_encode (char *buf, int val)
282{
283 int retval;
284
285 if (val < 0x80)
286 {
287 *buf++ = (char) val;
288 retval = 1;
289 }
290 else
291 {
292 int step;
293
294 for (step = 2; step < 6; ++step)
295 if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
296 break;
297 retval = step;
298
299 *buf = (unsigned char) (~0xff >> step);
300 --step;
301 do
302 {
303 buf[step] = 0x80 | (val & 0x3f);
304 val >>= 6;
305 }
306 while (--step > 0);
307 *buf |= val;
308 }
309
310 return retval;
311}
312
313
314static struct section_list *
315make_seclist_elem (struct locale_collate_t *collate, const char *string,
316 struct section_list *next)
317{
318 struct section_list *newp;
319
320 newp = (struct section_list *) obstack_alloc (&collate->mempool,
321 sizeof (*newp));
322 newp->next = next;
323 newp->name = string;
324 newp->first = NULL;
325 newp->last = NULL;
326
327 return newp;
328}
329
330
331static struct element_t *
332new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
333 const uint32_t *wcs, const char *name, size_t namelen,
334 int is_character)
335{
336 struct element_t *newp;
337
338 newp = (struct element_t *) obstack_alloc (&collate->mempool,
339 sizeof (*newp));
340 newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
341 name, namelen);
342 if (mbs != NULL)
343 {
344 newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
345 newp->nmbs = mbslen;
346 }
347 else
348 {
349 newp->mbs = NULL;
350 newp->nmbs = 0;
351 }
352 if (wcs != NULL)
353 {
354 size_t nwcs = wcslen ((wchar_t *) wcs);
355 uint32_t zero = 0;
356 /* Handle <U0000> as a single character. */
357 if (nwcs == 0)
358 nwcs = 1;
359 obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
360 obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
361 newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
362 newp->nwcs = nwcs;
363 }
364 else
365 {
366 newp->wcs = NULL;
367 newp->nwcs = 0;
368 }
369 newp->mborder = NULL;
370 newp->wcorder = 0;
371 newp->used_in_level = 0;
372 newp->is_character = is_character;
373
374 /* Will be assigned later. XXX */
375 newp->mbseqorder = 0;
376 newp->wcseqorder = 0;
377
378 /* Will be allocated later. */
379 newp->weights = NULL;
380
381 newp->file = NULL;
382 newp->line = 0;
383
384 newp->section = collate->current_section;
385
386 newp->last = NULL;
387 newp->next = NULL;
388
389 newp->mbnext = NULL;
390 newp->mblast = NULL;
391
392 newp->wcnext = NULL;
393 newp->wclast = NULL;
394
395 return newp;
396}
397
398
399static struct symbol_t *
400new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
401{
402 struct symbol_t *newp;
403
404 newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
405
406 newp->name = obstack_copy0 (&collate->mempool, name, len);
407 newp->order = NULL;
408
409 newp->file = NULL;
410 newp->line = 0;
411
412 return newp;
413}
414
415
416/* Test whether this name is already defined somewhere. */
417static int
418check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
419 const struct charmap_t *charmap,
420 struct repertoire_t *repertoire, const char *symbol,
421 size_t symbol_len)
422{
423 void *ignore = NULL;
424
425 if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
426 {
427 lr_error (ldfile, _("`%.*s' already defined in charmap"),
428 (int) symbol_len, symbol);
429 return 1;
430 }
431
432 if (repertoire != NULL
433 && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
434 == 0))
435 {
436 lr_error (ldfile, _("`%.*s' already defined in repertoire"),
437 (int) symbol_len, symbol);
438 return 1;
439 }
440
441 if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
442 {
443 lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
444 (int) symbol_len, symbol);
445 return 1;
446 }
447
448 if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
449 {
450 lr_error (ldfile, _("`%.*s' already defined as collating element"),
451 (int) symbol_len, symbol);
452 return 1;
453 }
454
455 return 0;
456}
457
458
459/* Read the direction specification. */
460static void
461read_directions (struct linereader *ldfile, struct token *arg,
462 const struct charmap_t *charmap,
463 struct repertoire_t *repertoire, struct localedef_t *result)
464{
465 int cnt = 0;
466 int max = nrules ?: 10;
467 enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
468 int warned = 0;
469 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
470
471 while (1)
472 {
473 int valid = 0;
474
475 if (arg->tok == tok_forward)
476 {
477 if (rules[cnt] & sort_backward)
478 {
479 if (! warned)
480 {
481 lr_error (ldfile, _("\
482%s: `forward' and `backward' are mutually excluding each other"),
483 "LC_COLLATE");
484 warned = 1;
485 }
486 }
487 else if (rules[cnt] & sort_forward)
488 {
489 if (! warned)
490 {
491 lr_error (ldfile, _("\
492%s: `%s' mentioned more than once in definition of weight %d"),
493 "LC_COLLATE", "forward", cnt + 1);
494 }
495 }
496 else
497 rules[cnt] |= sort_forward;
498
499 valid = 1;
500 }
501 else if (arg->tok == tok_backward)
502 {
503 if (rules[cnt] & sort_forward)
504 {
505 if (! warned)
506 {
507 lr_error (ldfile, _("\
508%s: `forward' and `backward' are mutually excluding each other"),
509 "LC_COLLATE");
510 warned = 1;
511 }
512 }
513 else if (rules[cnt] & sort_backward)
514 {
515 if (! warned)
516 {
517 lr_error (ldfile, _("\
518%s: `%s' mentioned more than once in definition of weight %d"),
519 "LC_COLLATE", "backward", cnt + 1);
520 }
521 }
522 else
523 rules[cnt] |= sort_backward;
524
525 valid = 1;
526 }
527 else if (arg->tok == tok_position)
528 {
529 if (rules[cnt] & sort_position)
530 {
531 if (! warned)
532 {
533 lr_error (ldfile, _("\
534%s: `%s' mentioned more than once in definition of weight %d"),
535 "LC_COLLATE", "position", cnt + 1);
536 }
537 }
538 else
539 rules[cnt] |= sort_position;
540
541 valid = 1;
542 }
543
544 if (valid)
545 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
546
547 if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
548 || arg->tok == tok_semicolon)
549 {
550 if (! valid && ! warned)
551 {
552 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
553 warned = 1;
554 }
555
556 /* See whether we have to increment the counter. */
557 if (arg->tok != tok_comma && rules[cnt] != 0)
558 {
559 /* Add the default `forward' if we have seen only `position'. */
560 if (rules[cnt] == sort_position)
561 rules[cnt] = sort_position | sort_forward;
562
563 ++cnt;
564 }
565
566 if (arg->tok == tok_eof || arg->tok == tok_eol)
567 /* End of line or file, so we exit the loop. */
568 break;
569
570 if (nrules == 0)
571 {
572 /* See whether we have enough room in the array. */
573 if (cnt == max)
574 {
575 max += 10;
576 rules = (enum coll_sort_rule *) xrealloc (rules,
577 max
578 * sizeof (*rules));
579 memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
580 }
581 }
582 else
583 {
584 if (cnt == nrules)
585 {
586 /* There must not be any more rule. */
587 if (! warned)
588 {
589 lr_error (ldfile, _("\
590%s: too many rules; first entry only had %d"),
591 "LC_COLLATE", nrules);
592 warned = 1;
593 }
594
595 lr_ignore_rest (ldfile, 0);
596 break;
597 }
598 }
599 }
600 else
601 {
602 if (! warned)
603 {
604 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
605 warned = 1;
606 }
607 }
608
609 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
610 }
611
612 if (nrules == 0)
613 {
614 /* Now we know how many rules we have. */
615 nrules = cnt;
616 rules = (enum coll_sort_rule *) xrealloc (rules,
617 nrules * sizeof (*rules));
618 }
619 else
620 {
621 if (cnt < nrules)
622 {
623 /* Not enough rules in this specification. */
624 if (! warned)
625 lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
626
627 do
628 rules[cnt] = sort_forward;
629 while (++cnt < nrules);
630 }
631 }
632
633 collate->current_section->rules = rules;
634}
635
636
637static struct element_t *
638find_element (struct linereader *ldfile, struct locale_collate_t *collate,
639 const char *str, size_t len)
640{
641 void *result = NULL;
642
643 /* Search for the entries among the collation sequences already define. */
644 if (find_entry (&collate->seq_table, str, len, &result) != 0)
645 {
646 /* Nope, not define yet. So we see whether it is a
647 collation symbol. */
648 void *ptr;
649
650 if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
651 {
652 /* It's a collation symbol. */
653 struct symbol_t *sym = (struct symbol_t *) ptr;
654 result = sym->order;
655
656 if (result == NULL)
657 result = sym->order = new_element (collate, NULL, 0, NULL,
658 NULL, 0, 0);
659 }
660 else if (find_entry (&collate->elem_table, str, len, &result) != 0)
661 {
662 /* It's also no collation element. So it is a character
663 element defined later. */
664 result = new_element (collate, NULL, 0, NULL, str, len, 1);
665 /* Insert it into the sequence table. */
666 insert_entry (&collate->seq_table, str, len, result);
667 }
668 }
669
670 return (struct element_t *) result;
671}
672
673
674static void
675unlink_element (struct locale_collate_t *collate)
676{
677 if (collate->cursor == collate->start)
678 {
679 assert (collate->cursor->next == NULL);
680 assert (collate->cursor->last == NULL);
681 collate->cursor = NULL;
682 }
683 else
684 {
685 if (collate->cursor->next != NULL)
686 collate->cursor->next->last = collate->cursor->last;
687 if (collate->cursor->last != NULL)
688 collate->cursor->last->next = collate->cursor->next;
689 collate->cursor = collate->cursor->last;
690 }
691}
692
693
694static void
695insert_weights (struct linereader *ldfile, struct element_t *elem,
696 const struct charmap_t *charmap,
697 struct repertoire_t *repertoire, struct localedef_t *result,
698 enum token_t ellipsis)
699{
700 int weight_cnt;
701 struct token *arg;
702 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
703
704 /* Initialize all the fields. */
705 elem->file = ldfile->fname;
706 elem->line = ldfile->lineno;
707
708 elem->last = collate->cursor;
709 elem->next = collate->cursor ? collate->cursor->next : NULL;
710 if (collate->cursor != NULL && collate->cursor->next != NULL)
711 collate->cursor->next->last = elem;
712 if (collate->cursor != NULL)
713 collate->cursor->next = elem;
714 if (collate->start == NULL)
715 {
716 assert (collate->cursor == NULL);
717 collate->start = elem;
718 }
719
720 elem->section = collate->current_section;
721
722 if (collate->current_section->first == NULL)
723 collate->current_section->first = elem;
724 if (collate->current_section->last == collate->cursor)
725 collate->current_section->last = elem;
726
727 collate->cursor = elem;
728
729 elem->weights = (struct element_list_t *)
730 obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
731 memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
732
733 weight_cnt = 0;
734
735 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
736 do
737 {
738 if (arg->tok == tok_eof || arg->tok == tok_eol)
739 break;
740
741 if (arg->tok == tok_ignore)
742 {
743 /* The weight for this level has to be ignored. We use the
744 null pointer to indicate this. */
745 elem->weights[weight_cnt].w = (struct element_t **)
746 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
747 elem->weights[weight_cnt].w[0] = NULL;
748 elem->weights[weight_cnt].cnt = 1;
749 }
750 else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
751 {
752 char ucs4str[10];
753 struct element_t *val;
754 char *symstr;
755 size_t symlen;
756
757 if (arg->tok == tok_bsymbol)
758 {
759 symstr = arg->val.str.startmb;
760 symlen = arg->val.str.lenmb;
761 }
762 else
763 {
764 snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
765 symstr = ucs4str;
766 symlen = 9;
767 }
768
769 val = find_element (ldfile, collate, symstr, symlen);
770 if (val == NULL)
771 break;
772
773 elem->weights[weight_cnt].w = (struct element_t **)
774 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
775 elem->weights[weight_cnt].w[0] = val;
776 elem->weights[weight_cnt].cnt = 1;
777 }
778 else if (arg->tok == tok_string)
779 {
780 /* Split the string up in the individual characters and put
781 the element definitions in the list. */
782 const char *cp = arg->val.str.startmb;
783 int cnt = 0;
784 struct element_t *charelem;
785 struct element_t **weights = NULL;
786 int max = 0;
787
788 if (*cp == '\0')
789 {
790 lr_error (ldfile, _("%s: empty weight string not allowed"),
791 "LC_COLLATE");
792 lr_ignore_rest (ldfile, 0);
793 break;
794 }
795
796 do
797 {
798 if (*cp == '<')
799 {
800 /* Ahh, it's a bsymbol or an UCS4 value. If it's
801 the latter we have to unify the name. */
802 const char *startp = ++cp;
803 size_t len;
804
805 while (*cp != '>')
806 {
807 if (*cp == ldfile->escape_char)
808 ++cp;
809 if (*cp == '\0')
810 /* It's a syntax error. */
811 goto syntax;
812
813 ++cp;
814 }
815
816 if (cp - startp == 5 && startp[0] == 'U'
817 && isxdigit (startp[1]) && isxdigit (startp[2])
818 && isxdigit (startp[3]) && isxdigit (startp[4]))
819 {
820 unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
821 char *newstr;
822
823 newstr = (char *) xmalloc (10);
824 snprintf (newstr, 10, "U%08X", ucs4);
825 startp = newstr;
826
827 len = 9;
828 }
829 else
830 len = cp - startp;
831
832 charelem = find_element (ldfile, collate, startp, len);
833 ++cp;
834 }
835 else
836 {
837 /* People really shouldn't use characters directly in
838 the string. Especially since it's not really clear
839 what this means. We interpret all characters in the
840 string as if that would be bsymbols. Otherwise we
841 would have to match back to bsymbols somehow and this
842 is normally not what people normally expect. */
843 charelem = find_element (ldfile, collate, cp++, 1);
844 }
845
846 if (charelem == NULL)
847 {
848 /* We ignore the rest of the line. */
849 lr_ignore_rest (ldfile, 0);
850 break;
851 }
852
853 /* Add the pointer. */
854 if (cnt >= max)
855 {
856 struct element_t **newp;
857 max += 10;
858 newp = (struct element_t **)
859 alloca (max * sizeof (struct element_t *));
860 memcpy (newp, weights, cnt * sizeof (struct element_t *));
861 weights = newp;
862 }
863 weights[cnt++] = charelem;
864 }
865 while (*cp != '\0');
866
867 /* Now store the information. */
868 elem->weights[weight_cnt].w = (struct element_t **)
869 obstack_alloc (&collate->mempool,
870 cnt * sizeof (struct element_t *));
871 memcpy (elem->weights[weight_cnt].w, weights,
872 cnt * sizeof (struct element_t *));
873 elem->weights[weight_cnt].cnt = cnt;
874
875 /* We don't need the string anymore. */
876 free (arg->val.str.startmb);
877 }
878 else if (ellipsis != tok_none
879 && (arg->tok == tok_ellipsis2
880 || arg->tok == tok_ellipsis3
881 || arg->tok == tok_ellipsis4))
882 {
883 /* It must be the same ellipsis as used in the initial column. */
884 if (arg->tok != ellipsis)
885 lr_error (ldfile, _("\
886%s: weights must use the same ellipsis symbol as the name"),
887 "LC_COLLATE");
888
889 /* The weight for this level will depend on the element
890 iterating over the range. Put a placeholder. */
891 elem->weights[weight_cnt].w = (struct element_t **)
892 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
893 elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
894 elem->weights[weight_cnt].cnt = 1;
895 }
896 else
897 {
898 syntax:
899 /* It's a syntax error. */
900 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
901 lr_ignore_rest (ldfile, 0);
902 break;
903 }
904
905 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
906 /* This better should be the end of the line or a semicolon. */
907 if (arg->tok == tok_semicolon)
908 /* OK, ignore this and read the next token. */
909 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
910 else if (arg->tok != tok_eof && arg->tok != tok_eol)
911 {
912 /* It's a syntax error. */
913 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
914 lr_ignore_rest (ldfile, 0);
915 break;
916 }
917 }
918 while (++weight_cnt < nrules);
919
920 if (weight_cnt < nrules)
921 {
922 /* This means the rest of the line uses the current element as
923 the weight. */
924 do
925 {
926 elem->weights[weight_cnt].w = (struct element_t **)
927 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
928 if (ellipsis == tok_none)
929 elem->weights[weight_cnt].w[0] = elem;
930 else
931 elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
932 elem->weights[weight_cnt].cnt = 1;
933 }
934 while (++weight_cnt < nrules);
935 }
936 else
937 {
938 if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
939 {
940 /* Too many rule values. */
941 lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
942 lr_ignore_rest (ldfile, 0);
943 }
944 else
945 lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
946 }
947}
948
949
950static int
951insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
952 const struct charmap_t *charmap, struct repertoire_t *repertoire,
953 struct localedef_t *result)
954{
955 /* First find out what kind of symbol this is. */
956 struct charseq *seq;
957 uint32_t wc;
958 struct element_t *elem = NULL;
959 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
960
961 /* Try to find the character in the charmap. */
962 seq = charmap_find_value (charmap, symstr, symlen);
963
964 /* Determine the wide character. */
965 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
966 {
967 wc = repertoire_find_value (repertoire, symstr, symlen);
968 if (seq != NULL)
969 seq->ucs4 = wc;
970 }
971 else
972 wc = seq->ucs4;
973
974 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
975 {
976 /* It's no character, so look through the collation elements and
977 symbol list. */
978 void *ptr = elem;
979 if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0)
980 {
981 void *result;
982 struct symbol_t *sym = NULL;
983
984 /* It's also collation element. Therefore it's either a
985 collating symbol or it's a character which is not
986 supported by the character set. In the later case we
987 simply create a dummy entry. */
988 if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
989 {
990 /* It's a collation symbol. */
991 sym = (struct symbol_t *) result;
992
993 elem = sym->order;
994 }
995
996 if (elem == NULL)
997 {
998 elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
999
1000 if (sym != NULL)
1001 sym->order = elem;
1002 else
1003 /* Enter a fake element in the sequence table. This
1004 won't cause anything in the output since there is
1005 no multibyte or wide character associated with
1006 it. */
1007 insert_entry (&collate->seq_table, symstr, symlen, elem);
1008 }
1009 }
1010 else
1011 /* Copy the result back. */
1012 elem = ptr;
1013 }
1014 else
1015 {
1016 /* Otherwise the symbols stands for a character. */
1017 void *ptr = elem;
1018 if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0)
1019 {
1020 uint32_t wcs[2] = { wc, 0 };
1021
1022 /* We have to allocate an entry. */
1023 elem = new_element (collate,
1024 seq != NULL ? (char *) seq->bytes : NULL,
1025 seq != NULL ? seq->nbytes : 0,
1026 wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
1027 symstr, symlen, 1);
1028
1029 /* And add it to the table. */
1030 if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
1031 /* This cannot happen. */
1032 assert (! "Internal error");
1033 }
1034 else
1035 {
1036 /* Copy the result back. */
1037 elem = ptr;
1038
1039 /* Maybe the character was used before the definition. In this case
1040 we have to insert the byte sequences now. */
1041 if (elem->mbs == NULL && seq != NULL)
1042 {
1043 elem->mbs = obstack_copy0 (&collate->mempool,
1044 seq->bytes, seq->nbytes);
1045 elem->nmbs = seq->nbytes;
1046 }
1047
1048 if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1049 {
1050 uint32_t wcs[2] = { wc, 0 };
1051
1052 elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1053 elem->nwcs = 1;
1054 }
1055 }
1056 }
1057
1058 /* Test whether this element is not already in the list. */
1059 if (elem->next != NULL || elem == collate->cursor)
1060 {
1061 lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1062 (int) symlen, symstr, elem->file, elem->line);
1063 lr_ignore_rest (ldfile, 0);
1064 return 1;
1065 }
1066
1067 insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1068
1069 return 0;
1070}
1071
1072
1073static void
1074handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1075 enum token_t ellipsis, const struct charmap_t *charmap,
1076 struct repertoire_t *repertoire,
1077 struct localedef_t *result)
1078{
1079 struct element_t *startp;
1080 struct element_t *endp;
1081 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1082
1083 /* Unlink the entry added for the ellipsis. */
1084 unlink_element (collate);
1085 startp = collate->cursor;
1086
1087 /* Process and add the end-entry. */
1088 if (symstr != NULL
1089 && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1090 /* Something went wrong with inserting the to-value. This means
1091 we cannot process the ellipsis. */
1092 return;
1093
1094 /* Reset the cursor. */
1095 collate->cursor = startp;
1096
1097 /* Now we have to handle many different situations:
1098 - we have to distinguish between the three different ellipsis forms
1099 - the is the ellipsis at the beginning, in the middle, or at the end.
1100 */
1101 endp = collate->cursor->next;
1102 assert (symstr == NULL || endp != NULL);
1103
1104 /* XXX The following is probably very wrong since also collating symbols
1105 can appear in ranges. But do we want/can refine the test for that? */
1106#if 0
1107 /* Both, the start and the end symbol, must stand for characters. */
1108 if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1109 || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1110 {
1111 lr_error (ldfile, _("\
1112%s: the start and the end symbol of a range must stand for characters"),
1113 "LC_COLLATE");
1114 return;
1115 }
1116#endif
1117
1118 if (ellipsis == tok_ellipsis3)
1119 {
1120 /* One requirement we make here: the length of the byte
1121 sequences for the first and end character must be the same.
1122 This is mainly to prevent unwanted effects and this is often
1123 not what is wanted. */
1124 size_t len = (startp->mbs != NULL ? startp->nmbs
1125 : (endp->mbs != NULL ? endp->nmbs : 0));
1126 char mbcnt[len + 1];
1127 char mbend[len + 1];
1128
1129 /* Well, this should be caught somewhere else already. Just to
1130 make sure. */
1131 assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1132 assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1133
1134 if (startp != NULL && endp != NULL
1135 && startp->mbs != NULL && endp->mbs != NULL
1136 && startp->nmbs != endp->nmbs)
1137 {
1138 lr_error (ldfile, _("\
1139%s: byte sequences of first and last character must have the same length"),
1140 "LC_COLLATE");
1141 return;
1142 }
1143
1144 /* Determine whether we have to generate multibyte sequences. */
1145 if ((startp == NULL || startp->mbs != NULL)
1146 && (endp == NULL || endp->mbs != NULL))
1147 {
1148 int cnt;
1149 int ret;
1150
1151 /* Prepare the beginning byte sequence. This is either from the
1152 beginning byte sequence or it is all nulls if it was an
1153 initial ellipsis. */
1154 if (startp == NULL || startp->mbs == NULL)
1155 memset (mbcnt, '\0', len);
1156 else
1157 {
1158 memcpy (mbcnt, startp->mbs, len);
1159
1160 /* And increment it so that the value is the first one we will
1161 try to insert. */
1162 for (cnt = len - 1; cnt >= 0; --cnt)
1163 if (++mbcnt[cnt] != '\0')
1164 break;
1165 }
1166 mbcnt[len] = '\0';
1167
1168 /* And the end sequence. */
1169 if (endp == NULL || endp->mbs == NULL)
1170 memset (mbend, '\0', len);
1171 else
1172 memcpy (mbend, endp->mbs, len);
1173 mbend[len] = '\0';
1174
1175 /* Test whether we have a correct range. */
1176 ret = memcmp (mbcnt, mbend, len);
1177 if (ret >= 0)
1178 {
1179 if (ret > 0)
1180 lr_error (ldfile, _("%s: byte sequence of first character of \
1181range is not lower than that of the last character"), "LC_COLLATE");
1182 return;
1183 }
1184
1185 /* Generate the byte sequences data. */
1186 while (1)
1187 {
1188 struct charseq *seq;
1189
1190 /* Quite a bit of work ahead. We have to find the character
1191 definition for the byte sequence and then determine the
1192 wide character belonging to it. */
1193 seq = charmap_find_symbol (charmap, mbcnt, len);
1194 if (seq != NULL)
1195 {
1196 struct element_t *elem;
1197 size_t namelen;
1198
1199 /* I don't think this can ever happen. */
1200 assert (seq->name != NULL);
1201 namelen = strlen (seq->name);
1202
1203 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1204 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1205 namelen);
1206
1207 /* Now we are ready to insert the new value in the
1208 sequence. Find out whether the element is
1209 already known. */
1210 void *ptr;
1211 if (find_entry (&collate->seq_table, seq->name, namelen,
1212 &ptr) != 0)
1213 {
1214 uint32_t wcs[2] = { seq->ucs4, 0 };
1215
1216 /* We have to allocate an entry. */
1217 elem = new_element (collate, mbcnt, len,
1218 seq->ucs4 == ILLEGAL_CHAR_VALUE
1219 ? NULL : wcs, seq->name,
1220 namelen, 1);
1221
1222 /* And add it to the table. */
1223 if (insert_entry (&collate->seq_table, seq->name,
1224 namelen, elem) != 0)
1225 /* This cannot happen. */
1226 assert (! "Internal error");
1227 }
1228 else
1229 /* Copy the result. */
1230 elem = ptr;
1231
1232 /* Test whether this element is not already in the list. */
1233 if (elem->next != NULL || (collate->cursor != NULL
1234 && elem->next == collate->cursor))
1235 {
1236 lr_error (ldfile, _("\
1237order for `%.*s' already defined at %s:%Zu"),
1238 (int) namelen, seq->name,
1239 elem->file, elem->line);
1240 goto increment;
1241 }
1242
1243 /* Enqueue the new element. */
1244 elem->last = collate->cursor;
1245 if (collate->cursor == NULL)
1246 elem->next = NULL;
1247 else
1248 {
1249 elem->next = collate->cursor->next;
1250 elem->last->next = elem;
1251 if (elem->next != NULL)
1252 elem->next->last = elem;
1253 }
1254 if (collate->start == NULL)
1255 {
1256 assert (collate->cursor == NULL);
1257 collate->start = elem;
1258 }
1259 collate->cursor = elem;
1260
1261 /* Add the weight value. We take them from the
1262 `ellipsis_weights' member of `collate'. */
1263 elem->weights = (struct element_list_t *)
1264 obstack_alloc (&collate->mempool,
1265 nrules * sizeof (struct element_list_t));
1266 for (cnt = 0; cnt < nrules; ++cnt)
1267 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1268 && (collate->ellipsis_weight.weights[cnt].w[0]
1269 == ELEMENT_ELLIPSIS2))
1270 {
1271 elem->weights[cnt].w = (struct element_t **)
1272 obstack_alloc (&collate->mempool,
1273 sizeof (struct element_t *));
1274 elem->weights[cnt].w[0] = elem;
1275 elem->weights[cnt].cnt = 1;
1276 }
1277 else
1278 {
1279 /* Simply use the weight from `ellipsis_weight'. */
1280 elem->weights[cnt].w =
1281 collate->ellipsis_weight.weights[cnt].w;
1282 elem->weights[cnt].cnt =
1283 collate->ellipsis_weight.weights[cnt].cnt;
1284 }
1285 }
1286
1287 /* Increment for the next round. */
1288 increment:
1289 for (cnt = len - 1; cnt >= 0; --cnt)
1290 if (++mbcnt[cnt] != '\0')
1291 break;
1292
1293 /* Find out whether this was all. */
1294 if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1295 /* Yep, that's all. */
1296 break;
1297 }
1298 }
1299 }
1300 else
1301 {
1302 /* For symbolic range we naturally must have a beginning and an
1303 end specified by the user. */
1304 if (startp == NULL)
1305 lr_error (ldfile, _("\
1306%s: symbolic range ellipsis must not directly follow `order_start'"),
1307 "LC_COLLATE");
1308 else if (endp == NULL)
1309 lr_error (ldfile, _("\
1310%s: symbolic range ellipsis must not be directly followed by `order_end'"),
1311 "LC_COLLATE");
1312 else
1313 {
1314 /* Determine the range. To do so we have to determine the
1315 common prefix of the both names and then the numeric
1316 values of both ends. */
1317 size_t lenfrom = strlen (startp->name);
1318 size_t lento = strlen (endp->name);
1319 char buf[lento + 1];
1320 int preflen = 0;
1321 long int from;
1322 long int to;
1323 char *cp;
1324 int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1325
1326 if (lenfrom != lento)
1327 {
1328 invalid_range:
1329 lr_error (ldfile, _("\
1330`%s' and `%.*s' are not valid names for symbolic range"),
1331 startp->name, (int) lento, endp->name);
1332 return;
1333 }
1334
1335 while (startp->name[preflen] == endp->name[preflen])
1336 if (startp->name[preflen] == '\0')
1337 /* Nothing to be done. The start and end point are identical
1338 and while inserting the end point we have already given
1339 the user an error message. */
1340 return;
1341 else
1342 ++preflen;
1343
1344 errno = 0;
1345 from = strtol (startp->name + preflen, &cp, base);
1346 if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1347 goto invalid_range;
1348
1349 errno = 0;
1350 to = strtol (endp->name + preflen, &cp, base);
1351 if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1352 goto invalid_range;
1353
1354 /* Copy the prefix. */
1355 memcpy (buf, startp->name, preflen);
1356
1357 /* Loop over all values. */
1358 for (++from; from < to; ++from)
1359 {
1360 struct element_t *elem = NULL;
1361 struct charseq *seq;
1362 uint32_t wc;
1363 int cnt;
1364
1365 /* Generate the name. */
1366 sprintf (buf + preflen, base == 10 ? "%0*ld" : "%0*lX",
1367 (int) (lenfrom - preflen), from);
1368
1369 /* Look whether this name is already defined. */
1370 void *ptr;
1371 if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0)
1372 {
1373 /* Copy back the result. */
1374 elem = ptr;
1375
1376 if (elem->next != NULL || (collate->cursor != NULL
1377 && elem->next == collate->cursor))
1378 {
1379 lr_error (ldfile, _("\
1380%s: order for `%.*s' already defined at %s:%Zu"),
1381 "LC_COLLATE", (int) lenfrom, buf,
1382 elem->file, elem->line);
1383 continue;
1384 }
1385
1386 if (elem->name == NULL)
1387 {
1388 lr_error (ldfile, _("%s: `%s' must be a character"),
1389 "LC_COLLATE", buf);
1390 continue;
1391 }
1392 }
1393
1394 if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1395 {
1396 /* Search for a character of this name. */
1397 seq = charmap_find_value (charmap, buf, lenfrom);
1398 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1399 {
1400 wc = repertoire_find_value (repertoire, buf, lenfrom);
1401
1402 if (seq != NULL)
1403 seq->ucs4 = wc;
1404 }
1405 else
1406 wc = seq->ucs4;
1407
1408 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1409 /* We don't know anything about a character with this
1410 name. XXX Should we warn? */
1411 continue;
1412
1413 if (elem == NULL)
1414 {
1415 uint32_t wcs[2] = { wc, 0 };
1416
1417 /* We have to allocate an entry. */
1418 elem = new_element (collate,
1419 seq != NULL
1420 ? (char *) seq->bytes : NULL,
1421 seq != NULL ? seq->nbytes : 0,
1422 wc == ILLEGAL_CHAR_VALUE
1423 ? NULL : wcs, buf, lenfrom, 1);
1424 }
1425 else
1426 {
1427 /* Update the element. */
1428 if (seq != NULL)
1429 {
1430 elem->mbs = obstack_copy0 (&collate->mempool,
1431 seq->bytes, seq->nbytes);
1432 elem->nmbs = seq->nbytes;
1433 }
1434
1435 if (wc != ILLEGAL_CHAR_VALUE)
1436 {
1437 uint32_t zero = 0;
1438
1439 obstack_grow (&collate->mempool,
1440 &wc, sizeof (uint32_t));
1441 obstack_grow (&collate->mempool,
1442 &zero, sizeof (uint32_t));
1443 elem->wcs = obstack_finish (&collate->mempool);
1444 elem->nwcs = 1;
1445 }
1446 }
1447
1448 elem->file = ldfile->fname;
1449 elem->line = ldfile->lineno;
1450 elem->section = collate->current_section;
1451 }
1452
1453 /* Enqueue the new element. */
1454 elem->last = collate->cursor;
1455 elem->next = collate->cursor->next;
1456 elem->last->next = elem;
1457 if (elem->next != NULL)
1458 elem->next->last = elem;
1459 collate->cursor = elem;
1460
1461 /* Now add the weights. They come from the `ellipsis_weights'
1462 member of `collate'. */
1463 elem->weights = (struct element_list_t *)
1464 obstack_alloc (&collate->mempool,
1465 nrules * sizeof (struct element_list_t));
1466 for (cnt = 0; cnt < nrules; ++cnt)
1467 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1468 && (collate->ellipsis_weight.weights[cnt].w[0]
1469 == ELEMENT_ELLIPSIS2))
1470 {
1471 elem->weights[cnt].w = (struct element_t **)
1472 obstack_alloc (&collate->mempool,
1473 sizeof (struct element_t *));
1474 elem->weights[cnt].w[0] = elem;
1475 elem->weights[cnt].cnt = 1;
1476 }
1477 else
1478 {
1479 /* Simly use the weight from `ellipsis_weight'. */
1480 elem->weights[cnt].w =
1481 collate->ellipsis_weight.weights[cnt].w;
1482 elem->weights[cnt].cnt =
1483 collate->ellipsis_weight.weights[cnt].cnt;
1484 }
1485 }
1486 }
1487 }
1488}
1489
1490
1491static void
1492collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1493 struct localedef_t *copy_locale, int ignore_content)
1494{
1495 if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1496 {
1497 struct locale_collate_t *collate;
1498
1499 if (copy_locale == NULL)
1500 {
1501 collate = locale->categories[LC_COLLATE].collate =
1502 (struct locale_collate_t *)
1503 xcalloc (1, sizeof (struct locale_collate_t));
1504
1505 /* Init the various data structures. */
1506 init_hash (&collate->elem_table, 100);
1507 init_hash (&collate->sym_table, 100);
1508 init_hash (&collate->seq_table, 500);
1509 obstack_init (&collate->mempool);
1510
1511 collate->col_weight_max = -1;
1512 }
1513 else
1514 /* Reuse the copy_locale's data structures. */
1515 collate = locale->categories[LC_COLLATE].collate =
1516 copy_locale->categories[LC_COLLATE].collate;
1517 }
1518
1519 ldfile->translate_strings = 0;
1520 ldfile->return_widestr = 0;
1521}
1522
1523
1524void
1525collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
1526{
1527 /* Now is the time when we can assign the individual collation
1528 values for all the symbols. We have possibly different values
1529 for the wide- and the multibyte-character symbols. This is done
1530 since it might make a difference in the encoding if there is in
1531 some cases no multibyte-character but there are wide-characters.
1532 (The other way around it is not important since theencoded
1533 collation value in the wide-character case is 32 bits wide and
1534 therefore requires no encoding).
1535
1536 The lowest collation value assigned is 2. Zero is reserved for
1537 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1538 functions and 1 is used to separate the individual passes for the
1539 different rules.
1540
1541 We also have to construct is list with all the bytes/words which
1542 can come first in a sequence, followed by all the elements which
1543 also start with this byte/word. The order is reverse which has
1544 among others the important effect that longer strings are located
1545 first in the list. This is required for the output data since
1546 the algorithm used in `strcoll' etc depends on this.
1547
1548 The multibyte case is easy. We simply sort into an array with
1549 256 elements. */
1550 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1551 int mbact[nrules];
1552 int wcact;
1553 int mbseqact;
1554 int wcseqact;
1555 struct element_t *runp;
1556 int i;
1557 int need_undefined = 0;
1558 struct section_list *sect;
1559 int ruleidx;
1560 int nr_wide_elems = 0;
1561
1562 if (collate == NULL)
1563 {
1564 /* No data, no check. */
1565 if (! be_quiet)
1566 WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
1567 "LC_COLLATE"));
1568 return;
1569 }
1570
1571 /* If this assertion is hit change the type in `element_t'. */
1572 assert (nrules <= sizeof (runp->used_in_level) * 8);
1573
1574 /* Make sure that the `position' rule is used either in all sections
1575 or in none. */
1576 for (i = 0; i < nrules; ++i)
1577 for (sect = collate->sections; sect != NULL; sect = sect->next)
1578 if (sect != collate->current_section
1579 && sect->rules != NULL
1580 && ((sect->rules[i] & sort_position)
1581 != (collate->current_section->rules[i] & sort_position)))
1582 {
1583 WITH_CUR_LOCALE (error (0, 0, _("\
1584%s: `position' must be used for a specific level in all sections or none"),
1585 "LC_COLLATE"));
1586 break;
1587 }
1588
1589 /* Find out which elements are used at which level. At the same
1590 time we find out whether we have any undefined symbols. */
1591 runp = collate->start;
1592 while (runp != NULL)
1593 {
1594 if (runp->mbs != NULL)
1595 {
1596 for (i = 0; i < nrules; ++i)
1597 {
1598 int j;
1599
1600 for (j = 0; j < runp->weights[i].cnt; ++j)
1601 /* A NULL pointer as the weight means IGNORE. */
1602 if (runp->weights[i].w[j] != NULL)
1603 {
1604 if (runp->weights[i].w[j]->weights == NULL)
1605 {
1606 WITH_CUR_LOCALE (error_at_line (0, 0, runp->file,
1607 runp->line,
1608 _("symbol `%s' not defined"),
1609 runp->weights[i].w[j]->name));
1610
1611 need_undefined = 1;
1612 runp->weights[i].w[j] = &collate->undefined;
1613 }
1614 else
1615 /* Set the bit for the level. */
1616 runp->weights[i].w[j]->used_in_level |= 1 << i;
1617 }
1618 }
1619 }
1620
1621 /* Up to the next entry. */
1622 runp = runp->next;
1623 }
1624
1625 /* Walk through the list of defined sequences and assign weights. Also
1626 create the data structure which will allow generating the single byte
1627 character based tables.
1628
1629 Since at each time only the weights for each of the rules are
1630 only compared to other weights for this rule it is possible to
1631 assign more compact weight values than simply counting all
1632 weights in sequence. We can assign weights from 3, one for each
1633 rule individually and only for those elements, which are actually
1634 used for this rule.
1635
1636 Why is this important? It is not for the wide char table. But
1637 it is for the singlebyte output since here larger numbers have to
1638 be encoded to make it possible to emit the value as a byte
1639 string. */
1640 for (i = 0; i < nrules; ++i)
1641 mbact[i] = 2;
1642 wcact = 2;
1643 mbseqact = 0;
1644 wcseqact = 0;
1645 runp = collate->start;
1646 while (runp != NULL)
1647 {
1648 /* Determine the order. */
1649 if (runp->used_in_level != 0)
1650 {
1651 runp->mborder = (int *) obstack_alloc (&collate->mempool,
1652 nrules * sizeof (int));
1653
1654 for (i = 0; i < nrules; ++i)
1655 if ((runp->used_in_level & (1 << i)) != 0)
1656 runp->mborder[i] = mbact[i]++;
1657 else
1658 runp->mborder[i] = 0;
1659 }
1660
1661 if (runp->mbs != NULL)
1662 {
1663 struct element_t **eptr;
1664 struct element_t *lastp = NULL;
1665
1666 /* Find the point where to insert in the list. */
1667 eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1668 while (*eptr != NULL)
1669 {
1670 if ((*eptr)->nmbs < runp->nmbs)
1671 break;
1672
1673 if ((*eptr)->nmbs == runp->nmbs)
1674 {
1675 int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1676
1677 if (c == 0)
1678 {
1679 /* This should not happen. It means that we have
1680 to symbols with the same byte sequence. It is
1681 of course an error. */
1682 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1683 (*eptr)->line,
1684 _("\
1685symbol `%s' has the same encoding as"), (*eptr)->name);
1686 error_at_line (0, 0, runp->file,
1687 runp->line,
1688 _("symbol `%s'"),
1689 runp->name));
1690 goto dont_insert;
1691 }
1692 else if (c < 0)
1693 /* Insert it here. */
1694 break;
1695 }
1696
1697 /* To the next entry. */
1698 lastp = *eptr;
1699 eptr = &(*eptr)->mbnext;
1700 }
1701
1702 /* Set the pointers. */
1703 runp->mbnext = *eptr;
1704 runp->mblast = lastp;
1705 if (*eptr != NULL)
1706 (*eptr)->mblast = runp;
1707 *eptr = runp;
1708 dont_insert:
1709 ;
1710 }
1711
1712 if (runp->used_in_level)
1713 {
1714 runp->wcorder = wcact++;
1715
1716 /* We take the opportunity to count the elements which have
1717 wide characters. */
1718 ++nr_wide_elems;
1719 }
1720
1721 if (runp->is_character)
1722 {
1723 if (runp->nmbs == 1)
1724 collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1725
1726 runp->wcseqorder = wcseqact++;
1727 }
1728 else if (runp->mbs != NULL && runp->weights != NULL)
1729 /* This is for collation elements. */
1730 runp->wcseqorder = wcseqact++;
1731
1732 /* Up to the next entry. */
1733 runp = runp->next;
1734 }
1735
1736 /* Find out whether any of the `mbheads' entries is unset. In this
1737 case we use the UNDEFINED entry. */
1738 for (i = 1; i < 256; ++i)
1739 if (collate->mbheads[i] == NULL)
1740 {
1741 need_undefined = 1;
1742 collate->mbheads[i] = &collate->undefined;
1743 }
1744
1745 /* Now to the wide character case. */
1746 collate->wcheads.p = 6;
1747 collate->wcheads.q = 10;
1748 wchead_table_init (&collate->wcheads);
1749
1750 collate->wcseqorder.p = 6;
1751 collate->wcseqorder.q = 10;
1752 collseq_table_init (&collate->wcseqorder);
1753
1754 /* Start adding. */
1755 runp = collate->start;
1756 while (runp != NULL)
1757 {
1758 if (runp->wcs != NULL)
1759 {
1760 struct element_t *e;
1761 struct element_t **eptr;
1762 struct element_t *lastp;
1763
1764 /* Insert the collation sequence value. */
1765 if (runp->is_character)
1766 collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1767 runp->wcseqorder);
1768
1769 /* Find the point where to insert in the list. */
1770 e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1771 eptr = &e;
1772 lastp = NULL;
1773 while (*eptr != NULL)
1774 {
1775 if ((*eptr)->nwcs < runp->nwcs)
1776 break;
1777
1778 if ((*eptr)->nwcs == runp->nwcs)
1779 {
1780 int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1781 (wchar_t *) runp->wcs, runp->nwcs);
1782
1783 if (c == 0)
1784 {
1785 /* This should not happen. It means that we have
1786 two symbols with the same byte sequence. It is
1787 of course an error. */
1788 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1789 (*eptr)->line,
1790 _("\
1791symbol `%s' has the same encoding as"), (*eptr)->name);
1792 error_at_line (0, 0, runp->file,
1793 runp->line,
1794 _("symbol `%s'"),
1795 runp->name));
1796 goto dont_insertwc;
1797 }
1798 else if (c < 0)
1799 /* Insert it here. */
1800 break;
1801 }
1802
1803 /* To the next entry. */
1804 lastp = *eptr;
1805 eptr = &(*eptr)->wcnext;
1806 }
1807
1808 /* Set the pointers. */
1809 runp->wcnext = *eptr;
1810 runp->wclast = lastp;
1811 if (*eptr != NULL)
1812 (*eptr)->wclast = runp;
1813 *eptr = runp;
1814 if (eptr == &e)
1815 wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1816 dont_insertwc:
1817 ;
1818 }
1819
1820 /* Up to the next entry. */
1821 runp = runp->next;
1822 }
1823
1824 /* Now determine whether the UNDEFINED entry is needed and if yes,
1825 whether it was defined. */
1826 collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1827 if (collate->undefined.file == NULL)
1828 {
1829 if (need_undefined)
1830 {
1831 /* This seems not to be enforced by recent standards. Don't
1832 emit an error, simply append UNDEFINED at the end. */
1833 if (0)
1834 WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
1835
1836 /* Add UNDEFINED at the end. */
1837 collate->undefined.mborder =
1838 (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1839
1840 for (i = 0; i < nrules; ++i)
1841 collate->undefined.mborder[i] = mbact[i]++;
1842 }
1843
1844 /* In any case we will need the definition for the wide character
1845 case. But we will not complain that it is missing since the
1846 specification strangely enough does not seem to account for
1847 this. */
1848 collate->undefined.wcorder = wcact++;
1849 }
1850
1851 /* Finally, try to unify the rules for the sections. Whenever the rules
1852 for a section are the same as those for another section give the
1853 ruleset the same index. Since there are never many section we can
1854 use an O(n^2) algorithm here. */
1855 sect = collate->sections;
1856 while (sect != NULL && sect->rules == NULL)
1857 sect = sect->next;
1858
1859 /* Bail out if we have no sections because of earlier errors. */
1860 if (sect == NULL)
1861 {
1862 WITH_CUR_LOCALE (error (EXIT_FAILURE, 0,
1863 _("too many errors; giving up")));
1864 return;
1865 }
1866
1867 ruleidx = 0;
1868 do
1869 {
1870 struct section_list *osect = collate->sections;
1871
1872 while (osect != sect)
1873 if (osect->rules != NULL
1874 && memcmp (osect->rules, sect->rules,
1875 nrules * sizeof (osect->rules[0])) == 0)
1876 break;
1877 else
1878 osect = osect->next;
1879
1880 if (osect == sect)
1881 sect->ruleidx = ruleidx++;
1882 else
1883 sect->ruleidx = osect->ruleidx;
1884
1885 /* Next section. */
1886 do
1887 sect = sect->next;
1888 while (sect != NULL && sect->rules == NULL);
1889 }
1890 while (sect != NULL);
1891 /* We are currently not prepared for more than 128 rulesets. But this
1892 should never really be a problem. */
1893 assert (ruleidx <= 128);
1894}
1895
1896
1897static int32_t
1898output_weight (struct obstack *pool, struct locale_collate_t *collate,
1899 struct element_t *elem)
1900{
1901 size_t cnt;
1902 int32_t retval;
1903
1904 /* Optimize the use of UNDEFINED. */
1905 if (elem == &collate->undefined)
1906 /* The weights are already inserted. */
1907 return 0;
1908
1909 /* This byte can start exactly one collation element and this is
1910 a single byte. We can directly give the index to the weights. */
1911 retval = obstack_object_size (pool);
1912
1913 /* Construct the weight. */
1914 for (cnt = 0; cnt < nrules; ++cnt)
1915 {
1916 char buf[elem->weights[cnt].cnt * 7];
1917 int len = 0;
1918 int i;
1919
1920 for (i = 0; i < elem->weights[cnt].cnt; ++i)
1921 /* Encode the weight value. We do nothing for IGNORE entries. */
1922 if (elem->weights[cnt].w[i] != NULL)
1923 len += utf8_encode (&buf[len],
1924 elem->weights[cnt].w[i]->mborder[cnt]);
1925
1926 /* And add the buffer content. */
1927 obstack_1grow (pool, len);
1928 obstack_grow (pool, buf, len);
1929 }
1930
1931 return retval | ((elem->section->ruleidx & 0x7f) << 24);
1932}
1933
1934
1935static int32_t
1936output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1937 struct element_t *elem)
1938{
1939 size_t cnt;
1940 int32_t retval;
1941
1942 /* Optimize the use of UNDEFINED. */
1943 if (elem == &collate->undefined)
1944 /* The weights are already inserted. */
1945 return 0;
1946
1947 /* This byte can start exactly one collation element and this is
1948 a single byte. We can directly give the index to the weights. */
1949 retval = obstack_object_size (pool) / sizeof (int32_t);
1950
1951 /* Construct the weight. */
1952 for (cnt = 0; cnt < nrules; ++cnt)
1953 {
1954 int32_t buf[elem->weights[cnt].cnt];
1955 int i;
1956 int32_t j;
1957
1958 for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1959 if (elem->weights[cnt].w[i] != NULL)
1960 buf[j++] = elem->weights[cnt].w[i]->wcorder;
1961
1962 /* And add the buffer content. */
1963 obstack_int32_grow (pool, j);
1964
1965 obstack_grow (pool, buf, j * sizeof (int32_t));
1966 maybe_swap_uint32_obstack (pool, j);
1967 }
1968
1969 return retval | ((elem->section->ruleidx & 0x7f) << 24);
1970}
1971
1972/* If localedef is every threaded, this would need to be __thread var. */
1973static struct
1974{
1975 struct obstack *weightpool;
1976 struct obstack *extrapool;
1977 struct obstack *indpool;
1978 struct locale_collate_t *collate;
1979 struct collidx_table *tablewc;
1980} atwc;
1981
1982static void add_to_tablewc (uint32_t ch, struct element_t *runp);
1983
1984static void
1985add_to_tablewc (uint32_t ch, struct element_t *runp)
1986{
1987 if (runp->wcnext == NULL && runp->nwcs == 1)
1988 {
1989 int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
1990 runp);
1991 collidx_table_add (atwc.tablewc, ch, weigthidx);
1992 }
1993 else
1994 {
1995 /* As for the singlebyte table, we recognize sequences and
1996 compress them. */
1997
1998 collidx_table_add (atwc.tablewc, ch,
1999 -(obstack_object_size (atwc.extrapool)
2000 / sizeof (uint32_t)));
2001
2002 do
2003 {
2004 /* Store the current index in the weight table. We know that
2005 the current position in the `extrapool' is aligned on a
2006 32-bit address. */
2007 int32_t weightidx;
2008 int added;
2009
2010 /* Find out wether this is a single entry or we have more than
2011 one consecutive entry. */
2012 if (runp->wcnext != NULL
2013 && runp->nwcs == runp->wcnext->nwcs
2014 && wmemcmp ((wchar_t *) runp->wcs,
2015 (wchar_t *)runp->wcnext->wcs,
2016 runp->nwcs - 1) == 0
2017 && (runp->wcs[runp->nwcs - 1]
2018 == runp->wcnext->wcs[runp->nwcs - 1] + 1))
2019 {
2020 int i;
2021 struct element_t *series_startp = runp;
2022 struct element_t *curp;
2023
2024 /* Now add first the initial byte sequence. */
2025 added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
2026 if (sizeof (int32_t) == sizeof (int))
2027 obstack_make_room (atwc.extrapool, added);
2028
2029 /* More than one consecutive entry. We mark this by having
2030 a negative index into the indirect table. */
2031 obstack_int32_grow_fast (atwc.extrapool,
2032 -(obstack_object_size (atwc.indpool)
2033 / sizeof (int32_t)));
2034 obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2035
2036 do
2037 runp = runp->wcnext;
2038 while (runp->wcnext != NULL
2039 && runp->nwcs == runp->wcnext->nwcs
2040 && wmemcmp ((wchar_t *) runp->wcs,
2041 (wchar_t *)runp->wcnext->wcs,
2042 runp->nwcs - 1) == 0
2043 && (runp->wcs[runp->nwcs - 1]
2044 == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2045
2046 /* Now walk backward from here to the beginning. */
2047 curp = runp;
2048
2049 for (i = 1; i < runp->nwcs; ++i)
2050 obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
2051
2052 /* Now find the end of the consecutive sequence and
2053 add all the indeces in the indirect pool. */
2054 do
2055 {
2056 weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2057 curp);
2058 obstack_int32_grow (atwc.indpool, weightidx);
2059
2060 curp = curp->wclast;
2061 }
2062 while (curp != series_startp);
2063
2064 /* Add the final weight. */
2065 weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2066 curp);
2067 obstack_int32_grow (atwc.indpool, weightidx);
2068
2069 /* And add the end byte sequence. Without length this
2070 time. */
2071 for (i = 1; i < curp->nwcs; ++i)
2072 obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
2073 }
2074 else
2075 {
2076 /* A single entry. Simply add the index and the length and
2077 string (except for the first character which is already
2078 tested for). */
2079 int i;
2080
2081 /* Output the weight info. */
2082 weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2083 runp);
2084
2085 assert (runp->nwcs > 0);
2086 added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2087 if (sizeof (int) == sizeof (int32_t))
2088 obstack_make_room (atwc.extrapool, added);
2089
2090 obstack_int32_grow_fast (atwc.extrapool, weightidx);
2091 obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2092 for (i = 1; i < runp->nwcs; ++i)
2093 obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
2094 }
2095
2096 /* Next entry. */
2097 runp = runp->wcnext;
2098 }
2099 while (runp != NULL);
2100 }
2101}
2102
2103void
2104collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
2105 const char *output_path)
2106{
2107 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2108 const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
2109 struct locale_file file;
2110 size_t ch;
2111 int32_t tablemb[256];
2112 struct obstack weightpool;
2113 struct obstack extrapool;
2114 struct obstack indirectpool;
2115 struct section_list *sect;
2116 struct collidx_table tablewc;
2117 uint32_t elem_size;
2118 uint32_t *elem_table;
2119 int i;
2120 struct element_t *runp;
2121
2122 init_locale_data (&file, nelems);
2123 add_locale_uint32 (&file, nrules);
2124
2125 /* If we have no LC_COLLATE data emit only the number of rules as zero. */
2126 if (collate == NULL)
2127 {
2128 size_t idx;
2129 for (idx = 1; idx < nelems; idx++)
2130 {
2131 /* The words have to be handled specially. */
2132 if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
2133 add_locale_uint32 (&file, 0);
2134 else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_ENCODING_TYPE))
2135 add_locale_uint32 (&file, __cet_other);
2136 else
2137 add_locale_empty (&file);
2138 }
2139 write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2140 return;
2141 }
2142
2143 obstack_init (&weightpool);
2144 obstack_init (&extrapool);
2145 obstack_init (&indirectpool);
2146
2147 /* Since we are using the sign of an integer to mark indirection the
2148 offsets in the arrays we are indirectly referring to must not be
2149 zero since -0 == 0. Therefore we add a bit of dummy content. */
2150 obstack_int32_grow (&extrapool, 0);
2151 obstack_int32_grow (&indirectpool, 0);
2152
2153 /* Prepare the ruleset table. */
2154 for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
2155 if (sect->rules != NULL && sect->ruleidx == i)
2156 {
2157 int j;
2158
2159 obstack_make_room (&weightpool, nrules);
2160
2161 for (j = 0; j < nrules; ++j)
2162 obstack_1grow_fast (&weightpool, sect->rules[j]);
2163 ++i;
2164 }
2165 /* And align the output. */
2166 i = (nrules * i) % LOCFILE_ALIGN;
2167 if (i > 0)
2168 do
2169 obstack_1grow (&weightpool, '\0');
2170 while (++i < LOCFILE_ALIGN);
2171
2172 add_locale_raw_obstack (&file, &weightpool);
2173
2174 /* Generate the 8-bit table. Walk through the lists of sequences
2175 starting with the same byte and add them one after the other to
2176 the table. In case we have more than one sequence starting with
2177 the same byte we have to use extra indirection.
2178
2179 First add a record for the NUL byte. This entry will never be used
2180 so it does not matter. */
2181 tablemb[0] = 0;
2182
2183 /* Now insert the `UNDEFINED' value if it is used. Since this value
2184 will probably be used more than once it is good to store the
2185 weights only once. */
2186 if (collate->undefined.used_in_level != 0)
2187 output_weight (&weightpool, collate, &collate->undefined);
2188
2189 for (ch = 1; ch < 256; ++ch)
2190 if (collate->mbheads[ch]->mbnext == NULL
2191 && collate->mbheads[ch]->nmbs <= 1)
2192 {
2193 tablemb[ch] = output_weight (&weightpool, collate,
2194 collate->mbheads[ch]);
2195 }
2196 else
2197 {
2198 /* The entries in the list are sorted by length and then
2199 alphabetically. This is the order in which we will add the
2200 elements to the collation table. This allows simply walking
2201 the table in sequence and stopping at the first matching
2202 entry. Since the longer sequences are coming first in the
2203 list they have the possibility to match first, just as it
2204 has to be. In the worst case we are walking to the end of
2205 the list where we put, if no singlebyte sequence is defined
2206 in the locale definition, the weights for UNDEFINED.
2207
2208 To reduce the length of the search list we compress them a bit.
2209 This happens by collecting sequences of consecutive byte
2210 sequences in one entry (having and begin and end byte sequence)
2211 and add only one index into the weight table. We can find the
2212 consecutive entries since they are also consecutive in the list. */
2213 struct element_t *runp = collate->mbheads[ch];
2214 struct element_t *lastp;
2215
2216 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2217
2218 tablemb[ch] = -obstack_object_size (&extrapool);
2219
2220 do
2221 {
2222 /* Store the current index in the weight table. We know that
2223 the current position in the `extrapool' is aligned on a
2224 32-bit address. */
2225 int32_t weightidx;
2226 int added;
2227
2228 /* Find out wether this is a single entry or we have more than
2229 one consecutive entry. */
2230 if (runp->mbnext != NULL
2231 && runp->nmbs == runp->mbnext->nmbs
2232 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2233 && (runp->mbs[runp->nmbs - 1]
2234 == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2235 {
2236 int i;
2237 struct element_t *series_startp = runp;
2238 struct element_t *curp;
2239
2240 /* Compute how much space we will need. */
2241 added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2242 + 2 * (runp->nmbs - 1));
2243 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2244 obstack_make_room (&extrapool, added);
2245
2246 /* More than one consecutive entry. We mark this by having
2247 a negative index into the indirect table. */
2248 obstack_int32_grow_fast (&extrapool,
2249 -(obstack_object_size (&indirectpool)
2250 / sizeof (int32_t)));
2251
2252 /* Now search first the end of the series. */
2253 do
2254 runp = runp->mbnext;
2255 while (runp->mbnext != NULL
2256 && runp->nmbs == runp->mbnext->nmbs
2257 && memcmp (runp->mbs, runp->mbnext->mbs,
2258 runp->nmbs - 1) == 0
2259 && (runp->mbs[runp->nmbs - 1]
2260 == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2261
2262 /* Now walk backward from here to the beginning. */
2263 curp = runp;
2264
2265 assert (runp->nmbs <= 256);
2266 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2267 for (i = 1; i < curp->nmbs; ++i)
2268 obstack_1grow_fast (&extrapool, curp->mbs[i]);
2269
2270 /* Now find the end of the consecutive sequence and
2271 add all the indeces in the indirect pool. */
2272 do
2273 {
2274 weightidx = output_weight (&weightpool, collate, curp);
2275 obstack_int32_grow (&indirectpool, weightidx);
2276
2277 curp = curp->mblast;
2278 }
2279 while (curp != series_startp);
2280
2281 /* Add the final weight. */
2282 weightidx = output_weight (&weightpool, collate, curp);
2283 obstack_int32_grow (&indirectpool, weightidx);
2284
2285 /* And add the end byte sequence. Without length this
2286 time. */
2287 for (i = 1; i < curp->nmbs; ++i)
2288 obstack_1grow_fast (&extrapool, curp->mbs[i]);
2289 }
2290 else
2291 {
2292 /* A single entry. Simply add the index and the length and
2293 string (except for the first character which is already
2294 tested for). */
2295 int i;
2296
2297 /* Output the weight info. */
2298 weightidx = output_weight (&weightpool, collate, runp);
2299
2300 added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2301 + runp->nmbs - 1);
2302 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2303 obstack_make_room (&extrapool, added);
2304
2305 obstack_int32_grow_fast (&extrapool, weightidx);
2306 assert (runp->nmbs <= 256);
2307 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2308
2309 for (i = 1; i < runp->nmbs; ++i)
2310 obstack_1grow_fast (&extrapool, runp->mbs[i]);
2311 }
2312
2313 /* Add alignment bytes if necessary. */
2314 while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2315 obstack_1grow_fast (&extrapool, '\0');
2316
2317 /* Next entry. */
2318 lastp = runp;
2319 runp = runp->mbnext;
2320 }
2321 while (runp != NULL);
2322
2323 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2324
2325 /* If the final entry in the list is not a single character we
2326 add an UNDEFINED entry here. */
2327 if (lastp->nmbs != 1)
2328 {
2329 int added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + 1);
2330 obstack_make_room (&extrapool, added);
2331
2332 obstack_int32_grow_fast (&extrapool, 0);
2333 /* XXX What rule? We just pick the first. */
2334 obstack_1grow_fast (&extrapool, 0);
2335 /* Length is zero. */
2336 obstack_1grow_fast (&extrapool, 0);
2337
2338 /* Add alignment bytes if necessary. */
2339 while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2340 obstack_1grow_fast (&extrapool, '\0');
2341 }
2342 }
2343
2344 /* Add padding to the tables if necessary. */
2345 while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool)))
2346 obstack_1grow (&weightpool, 0);
2347
2348 /* Now add the four tables. */
2349 add_locale_uint32_array (&file, (const uint32_t *) tablemb, 256);
2350 add_locale_raw_obstack (&file, &weightpool);
2351 add_locale_raw_obstack (&file, &extrapool);
2352 add_locale_raw_obstack (&file, &indirectpool);
2353
2354 /* Now the same for the wide character table. We need to store some
2355 more information here. */
2356 add_locale_empty (&file);
2357 add_locale_empty (&file);
2358 add_locale_empty (&file);
2359
2360 /* Since we are using the sign of an integer to mark indirection the
2361 offsets in the arrays we are indirectly referring to must not be
2362 zero since -0 == 0. Therefore we add a bit of dummy content. */
2363 obstack_int32_grow (&extrapool, 0);
2364 obstack_int32_grow (&indirectpool, 0);
2365
2366 /* Now insert the `UNDEFINED' value if it is used. Since this value
2367 will probably be used more than once it is good to store the
2368 weights only once. */
2369 if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2370 abort ();
2371
2372 /* Generate the table. Walk through the lists of sequences starting
2373 with the same wide character and add them one after the other to
2374 the table. In case we have more than one sequence starting with
2375 the same byte we have to use extra indirection. */
2376 tablewc.p = 6;
2377 tablewc.q = 10;
2378 collidx_table_init (&tablewc);
2379
2380 atwc.weightpool = &weightpool;
2381 atwc.extrapool = &extrapool;
2382 atwc.indpool = &indirectpool;
2383 atwc.collate = collate;
2384 atwc.tablewc = &tablewc;
2385
2386 wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2387
2388 memset (&atwc, 0, sizeof (atwc));
2389
2390 /* Now add the four tables. */
2391 add_locale_collidx_table (&file, &tablewc);
2392 add_locale_raw_obstack (&file, &weightpool);
2393 add_locale_raw_obstack (&file, &extrapool);
2394 add_locale_raw_obstack (&file, &indirectpool);
2395
2396 /* Finally write the table with collation element names out. It is
2397 a hash table with a simple function which gets the name of the
2398 character as the input. One character might have many names. The
2399 value associated with the name is an index into the weight table
2400 where we are then interested in the first-level weight value.
2401
2402 To determine how large the table should be we are counting the
2403 elements have to put in. Since we are using internal chaining
2404 using a secondary hash function we have to make the table a bit
2405 larger to avoid extremely long search times. We can achieve
2406 good results with a 40% larger table than there are entries. */
2407 elem_size = 0;
2408 runp = collate->start;
2409 while (runp != NULL)
2410 {
2411 if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2412 /* Yep, the element really counts. */
2413 ++elem_size;
2414
2415 runp = runp->next;
2416 }
2417 /* Add 40% and find the next prime number. */
2418 elem_size = next_prime (elem_size * 1.4);
2419
2420 /* Allocate the table. Each entry consists of two words: the hash
2421 value and an index in a secondary table which provides the index
2422 into the weight table and the string itself (so that a match can
2423 be determined). */
2424 elem_table = (uint32_t *) obstack_alloc (&extrapool,
2425 elem_size * 2 * sizeof (uint32_t));
2426 memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2427
2428 /* Now add the elements. */
2429 runp = collate->start;
2430 while (runp != NULL)
2431 {
2432 if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2433 {
2434 /* Compute the hash value of the name. */
2435 uint32_t namelen = strlen (runp->name);
2436 uint32_t hash = elem_hash (runp->name, namelen);
2437 size_t idx = hash % elem_size;
2438#ifndef NDEBUG
2439 size_t start_idx = idx;
2440#endif
2441
2442 if (elem_table[idx * 2] != 0)
2443 {
2444 /* The spot is already taken. Try iterating using the value
2445 from the secondary hashing function. */
2446 size_t iter = hash % (elem_size - 2) + 1;
2447
2448 do
2449 {
2450 idx += iter;
2451 if (idx >= elem_size)
2452 idx -= elem_size;
2453 assert (idx != start_idx);
2454 }
2455 while (elem_table[idx * 2] != 0);
2456 }
2457 /* This is the spot where we will insert the value. */
2458 elem_table[idx * 2] = hash;
2459 elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2460
2461 /* The string itself including length. */
2462 obstack_1grow (&extrapool, namelen);
2463 obstack_grow (&extrapool, runp->name, namelen);
2464
2465 /* And the multibyte representation. */
2466 obstack_1grow (&extrapool, runp->nmbs);
2467 obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2468
2469 /* And align again to 32 bits. */
2470 if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2471 obstack_grow (&extrapool, "\0\0",
2472 (sizeof (int32_t)
2473 - ((1 + namelen + 1 + runp->nmbs)
2474 % sizeof (int32_t))));
2475
2476 /* Now some 32-bit values: multibyte collation sequence,
2477 wide char string (including length), and wide char
2478 collation sequence. */
2479 obstack_int32_grow (&extrapool, runp->mbseqorder);
2480
2481 obstack_int32_grow (&extrapool, runp->nwcs);
2482 obstack_grow (&extrapool, runp->wcs,
2483 runp->nwcs * sizeof (uint32_t));
2484 maybe_swap_uint32_obstack (&extrapool, runp->nwcs);
2485
2486 obstack_int32_grow (&extrapool, runp->wcseqorder);
2487 }
2488
2489 runp = runp->next;
2490 }
2491
2492 /* Prepare to write out this data. */
2493 add_locale_uint32 (&file, elem_size);
2494 add_locale_uint32_array (&file, elem_table, 2 * elem_size);
2495 add_locale_raw_obstack (&file, &extrapool);
2496 add_locale_raw_data (&file, collate->mbseqorder, 256);
2497 add_locale_collseq_table (&file, &collate->wcseqorder);
2498 add_locale_string (&file, charmap->code_set_name);
2499 if (strcmp (charmap->code_set_name, "UTF-8") == 0)
2500 add_locale_uint32 (&file, __cet_utf8);
2501 else if (charmap->mb_cur_max == 1)
2502 add_locale_uint32 (&file, __cet_8bit);
2503 else
2504 add_locale_uint32 (&file, __cet_other);
2505 write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2506
2507 obstack_free (&weightpool, NULL);
2508 obstack_free (&extrapool, NULL);
2509 obstack_free (&indirectpool, NULL);
2510}
2511
2512
2513static enum token_t
2514skip_to (struct linereader *ldfile, struct locale_collate_t *collate,
2515 const struct charmap_t *charmap, int to_endif)
2516{
2517 while (1)
2518 {
2519 struct token *now = lr_token (ldfile, charmap, NULL, NULL, 0);
2520 enum token_t nowtok = now->tok;
2521
2522 if (nowtok == tok_eof || nowtok == tok_end)
2523 return nowtok;
2524
2525 if (nowtok == tok_ifdef || nowtok == tok_ifndef)
2526 {
2527 lr_error (ldfile, _("%s: nested conditionals not supported"),
2528 "LC_COLLATE");
2529 nowtok = skip_to (ldfile, collate, charmap, tok_endif);
2530 if (nowtok == tok_eof || nowtok == tok_end)
2531 return nowtok;
2532 }
2533 else if (nowtok == tok_endif || (!to_endif && nowtok == tok_else))
2534 {
2535 lr_ignore_rest (ldfile, 1);
2536 return nowtok;
2537 }
2538 else if (!to_endif && (nowtok == tok_elifdef || nowtok == tok_elifndef))
2539 {
2540 /* Do not read the rest of the line. */
2541 return nowtok;
2542 }
2543 else if (nowtok == tok_else)
2544 {
2545 lr_error (ldfile, _("%s: more than one 'else'"), "LC_COLLATE");
2546 }
2547
2548 lr_ignore_rest (ldfile, 0);
2549 }
2550}
2551
2552
2553void
2554collate_read (struct linereader *ldfile, struct localedef_t *result,
2555 const struct charmap_t *charmap, const char *repertoire_name,
2556 int ignore_content)
2557{
2558 struct repertoire_t *repertoire = NULL;
2559 struct locale_collate_t *collate;
2560 struct token *now;
2561 struct token *arg = NULL;
2562 enum token_t nowtok;
2563 enum token_t was_ellipsis = tok_none;
2564 struct localedef_t *copy_locale = NULL;
2565 /* Parsing state:
2566 0 - start
2567 1 - between `order-start' and `order-end'
2568 2 - after `order-end'
2569 3 - after `reorder-after', waiting for `reorder-end'
2570 4 - after `reorder-end'
2571 5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2572 6 - after `reorder-sections-end'
2573 */
2574 int state = 0;
2575
2576 /* Get the repertoire we have to use. */
2577 if (repertoire_name != NULL)
2578 repertoire = repertoire_read (repertoire_name);
2579
2580 /* The rest of the line containing `LC_COLLATE' must be free. */
2581 lr_ignore_rest (ldfile, 1);
2582
2583 while (1)
2584 {
2585 do
2586 {
2587 now = lr_token (ldfile, charmap, result, NULL, verbose);
2588 nowtok = now->tok;
2589 }
2590 while (nowtok == tok_eol);
2591
2592 if (nowtok != tok_define)
2593 break;
2594
2595 if (ignore_content)
2596 lr_ignore_rest (ldfile, 0);
2597 else
2598 {
2599 arg = lr_token (ldfile, charmap, result, NULL, verbose);
2600 if (arg->tok != tok_ident)
2601 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2602 else
2603 {
2604 /* Simply add the new symbol. */
2605 struct name_list *newsym = xmalloc (sizeof (*newsym)
2606 + arg->val.str.lenmb + 1);
2607 memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
2608 newsym->str[arg->val.str.lenmb] = '\0';
2609 newsym->next = defined;
2610 defined = newsym;
2611
2612 lr_ignore_rest (ldfile, 1);
2613 }
2614 }
2615 }
2616
2617 if (nowtok == tok_copy)
2618 {
2619 now = lr_token (ldfile, charmap, result, NULL, verbose);
2620 if (now->tok != tok_string)
2621 {
2622 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2623
2624 skip_category:
2625 do
2626 now = lr_token (ldfile, charmap, result, NULL, verbose);
2627 while (now->tok != tok_eof && now->tok != tok_end);
2628
2629 if (now->tok != tok_eof
2630 || (now = lr_token (ldfile, charmap, result, NULL, verbose),
2631 now->tok == tok_eof))
2632 lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2633 else if (now->tok != tok_lc_collate)
2634 {
2635 lr_error (ldfile, _("\
2636%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2637 lr_ignore_rest (ldfile, 0);
2638 }
2639 else
2640 lr_ignore_rest (ldfile, 1);
2641
2642 return;
2643 }
2644
2645 if (! ignore_content)
2646 {
2647 /* Get the locale definition. */
2648 copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2649 repertoire_name, charmap, NULL);
2650 if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2651 {
2652 /* Not yet loaded. So do it now. */
2653 if (locfile_read (copy_locale, charmap) != 0)
2654 goto skip_category;
2655 }
2656
2657 if (copy_locale->categories[LC_COLLATE].collate == NULL)
2658 return;
2659 }
2660
2661 lr_ignore_rest (ldfile, 1);
2662
2663 now = lr_token (ldfile, charmap, result, NULL, verbose);
2664 nowtok = now->tok;
2665 }
2666
2667 /* Prepare the data structures. */
2668 collate_startup (ldfile, result, copy_locale, ignore_content);
2669 collate = result->categories[LC_COLLATE].collate;
2670
2671 while (1)
2672 {
2673 char ucs4buf[10];
2674 char *symstr;
2675 size_t symlen;
2676
2677 /* Of course we don't proceed beyond the end of file. */
2678 if (nowtok == tok_eof)
2679 break;
2680
2681 /* Ingore empty lines. */
2682 if (nowtok == tok_eol)
2683 {
2684 now = lr_token (ldfile, charmap, result, NULL, verbose);
2685 nowtok = now->tok;
2686 continue;
2687 }
2688
2689 switch (nowtok)
2690 {
2691 case tok_copy:
2692 /* Allow copying other locales. */
2693 now = lr_token (ldfile, charmap, result, NULL, verbose);
2694 if (now->tok != tok_string)
2695 goto err_label;
2696
2697 if (! ignore_content)
2698 load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2699 charmap, result);
2700
2701 lr_ignore_rest (ldfile, 1);
2702 break;
2703
2704 case tok_coll_weight_max:
2705 /* Ignore the rest of the line if we don't need the input of
2706 this line. */
2707 if (ignore_content)
2708 {
2709 lr_ignore_rest (ldfile, 0);
2710 break;
2711 }
2712
2713 if (state != 0)
2714 goto err_label;
2715
2716 arg = lr_token (ldfile, charmap, result, NULL, verbose);
2717 if (arg->tok != tok_number)
2718 goto err_label;
2719 if (collate->col_weight_max != -1)
2720 lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2721 "LC_COLLATE", "col_weight_max");
2722 else
2723 collate->col_weight_max = arg->val.num;
2724 lr_ignore_rest (ldfile, 1);
2725 break;
2726
2727 case tok_section_symbol:
2728 /* Ignore the rest of the line if we don't need the input of
2729 this line. */
2730 if (ignore_content)
2731 {
2732 lr_ignore_rest (ldfile, 0);
2733 break;
2734 }
2735
2736 if (state != 0)
2737 goto err_label;
2738
2739 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2740 if (arg->tok != tok_bsymbol)
2741 goto err_label;
2742 else if (!ignore_content)
2743 {
2744 /* Check whether this section is already known. */
2745 struct section_list *known = collate->sections;
2746 while (known != NULL)
2747 {
2748 if (strcmp (known->name, arg->val.str.startmb) == 0)
2749 break;
2750 known = known->next;
2751 }
2752
2753 if (known != NULL)
2754 {
2755 lr_error (ldfile,
2756 _("%s: duplicate declaration of section `%s'"),
2757 "LC_COLLATE", arg->val.str.startmb);
2758 free (arg->val.str.startmb);
2759 }
2760 else
2761 collate->sections = make_seclist_elem (collate,
2762 arg->val.str.startmb,
2763 collate->sections);
2764
2765 lr_ignore_rest (ldfile, known == NULL);
2766 }
2767 else
2768 {
2769 free (arg->val.str.startmb);
2770 lr_ignore_rest (ldfile, 0);
2771 }
2772 break;
2773
2774 case tok_collating_element:
2775 /* Ignore the rest of the line if we don't need the input of
2776 this line. */
2777 if (ignore_content)
2778 {
2779 lr_ignore_rest (ldfile, 0);
2780 break;
2781 }
2782
2783 if (state != 0 && state != 2)
2784 goto err_label;
2785
2786 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2787 if (arg->tok != tok_bsymbol)
2788 goto err_label;
2789 else
2790 {
2791 const char *symbol = arg->val.str.startmb;
2792 size_t symbol_len = arg->val.str.lenmb;
2793
2794 /* Next the `from' keyword. */
2795 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2796 if (arg->tok != tok_from)
2797 {
2798 free ((char *) symbol);
2799 goto err_label;
2800 }
2801
2802 ldfile->return_widestr = 1;
2803 ldfile->translate_strings = 1;
2804
2805 /* Finally the string with the replacement. */
2806 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2807
2808 ldfile->return_widestr = 0;
2809 ldfile->translate_strings = 0;
2810
2811 if (arg->tok != tok_string)
2812 goto err_label;
2813
2814 if (!ignore_content && symbol != NULL)
2815 {
2816 /* The name is already defined. */
2817 if (check_duplicate (ldfile, collate, charmap,
2818 repertoire, symbol, symbol_len))
2819 goto col_elem_free;
2820
2821 if (arg->val.str.startmb != NULL)
2822 insert_entry (&collate->elem_table, symbol, symbol_len,
2823 new_element (collate,
2824 arg->val.str.startmb,
2825 arg->val.str.lenmb - 1,
2826 arg->val.str.startwc,
2827 symbol, symbol_len, 0));
2828 }
2829 else
2830 {
2831 col_elem_free:
2832 free ((char *) symbol);
2833 free (arg->val.str.startmb);
2834 free (arg->val.str.startwc);
2835 }
2836 lr_ignore_rest (ldfile, 1);
2837 }
2838 break;
2839
2840 case tok_collating_symbol:
2841 /* Ignore the rest of the line if we don't need the input of
2842 this line. */
2843 if (ignore_content)
2844 {
2845 lr_ignore_rest (ldfile, 0);
2846 break;
2847 }
2848
2849 if (state != 0 && state != 2)
2850 goto err_label;
2851
2852 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2853 if (arg->tok != tok_bsymbol)
2854 goto err_label;
2855 else
2856 {
2857 char *symbol = arg->val.str.startmb;
2858 size_t symbol_len = arg->val.str.lenmb;
2859 char *endsymbol = NULL;
2860 size_t endsymbol_len = 0;
2861 enum token_t ellipsis = tok_none;
2862
2863 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2864 if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2865 {
2866 ellipsis = arg->tok;
2867
2868 arg = lr_token (ldfile, charmap, result, repertoire,
2869 verbose);
2870 if (arg->tok != tok_bsymbol)
2871 {
2872 free (symbol);
2873 goto err_label;
2874 }
2875
2876 endsymbol = arg->val.str.startmb;
2877 endsymbol_len = arg->val.str.lenmb;
2878
2879 lr_ignore_rest (ldfile, 1);
2880 }
2881 else if (arg->tok != tok_eol)
2882 {
2883 free (symbol);
2884 goto err_label;
2885 }
2886
2887 if (!ignore_content)
2888 {
2889 if (symbol == NULL
2890 || (ellipsis != tok_none && endsymbol == NULL))
2891 {
2892 lr_error (ldfile, _("\
2893%s: unknown character in collating symbol name"),
2894 "LC_COLLATE");
2895 goto col_sym_free;
2896 }
2897 else if (ellipsis == tok_none)
2898 {
2899 /* A single symbol, no ellipsis. */
2900 if (check_duplicate (ldfile, collate, charmap,
2901 repertoire, symbol, symbol_len))
2902 /* The name is already defined. */
2903 goto col_sym_free;
2904
2905 insert_entry (&collate->sym_table, symbol, symbol_len,
2906 new_symbol (collate, symbol, symbol_len));
2907 }
2908 else if (symbol_len != endsymbol_len)
2909 {
2910 col_sym_inv_range:
2911 lr_error (ldfile,
2912 _("invalid names for character range"));
2913 goto col_sym_free;
2914 }
2915 else
2916 {
2917 /* Oh my, we have to handle an ellipsis. First, as
2918 usual, determine the common prefix and then
2919 convert the rest into a range. */
2920 size_t prefixlen;
2921 unsigned long int from;
2922 unsigned long int to;
2923 char *endp;
2924
2925 for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
2926 if (symbol[prefixlen] != endsymbol[prefixlen])
2927 break;
2928
2929 /* Convert the rest into numbers. */
2930 symbol[symbol_len] = '\0';
2931 from = strtoul (&symbol[prefixlen], &endp,
2932 ellipsis == tok_ellipsis2 ? 16 : 10);
2933 if (*endp != '\0')
2934 goto col_sym_inv_range;
2935
2936 endsymbol[symbol_len] = '\0';
2937 to = strtoul (&endsymbol[prefixlen], &endp,
2938 ellipsis == tok_ellipsis2 ? 16 : 10);
2939 if (*endp != '\0')
2940 goto col_sym_inv_range;
2941
2942 if (from > to)
2943 goto col_sym_inv_range;
2944
2945 /* Now loop over all entries. */
2946 while (from <= to)
2947 {
2948 char *symbuf;
2949
2950 symbuf = (char *) obstack_alloc (&collate->mempool,
2951 symbol_len + 1);
2952
2953 /* Create the name. */
2954 sprintf (symbuf,
2955 ellipsis == tok_ellipsis2
2956 ? "%.*s%.*lX" : "%.*s%.*lu",
2957 (int) prefixlen, symbol,
2958 (int) (symbol_len - prefixlen), from);
2959
2960 if (check_duplicate (ldfile, collate, charmap,
2961 repertoire, symbuf, symbol_len))
2962 /* The name is already defined. */
2963 goto col_sym_free;
2964
2965 insert_entry (&collate->sym_table, symbuf,
2966 symbol_len,
2967 new_symbol (collate, symbuf,
2968 symbol_len));
2969
2970 /* Increment the counter. */
2971 ++from;
2972 }
2973
2974 goto col_sym_free;
2975 }
2976 }
2977 else
2978 {
2979 col_sym_free:
2980 free (symbol);
2981 free (endsymbol);
2982 }
2983 }
2984 break;
2985
2986 case tok_symbol_equivalence:
2987 /* Ignore the rest of the line if we don't need the input of
2988 this line. */
2989 if (ignore_content)
2990 {
2991 lr_ignore_rest (ldfile, 0);
2992 break;
2993 }
2994
2995 if (state != 0)
2996 goto err_label;
2997
2998 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2999 if (arg->tok != tok_bsymbol)
3000 goto err_label;
3001 else
3002 {
3003 const char *newname = arg->val.str.startmb;
3004 size_t newname_len = arg->val.str.lenmb;
3005 const char *symname;
3006 size_t symname_len;
3007 void *symval; /* Actually struct symbol_t* */
3008
3009 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3010 if (arg->tok != tok_bsymbol)
3011 {
3012 free ((char *) newname);
3013 goto err_label;
3014 }
3015
3016 symname = arg->val.str.startmb;
3017 symname_len = arg->val.str.lenmb;
3018
3019 if (newname == NULL)
3020 {
3021 lr_error (ldfile, _("\
3022%s: unknown character in equivalent definition name"),
3023 "LC_COLLATE");
3024
3025 sym_equiv_free:
3026 free ((char *) newname);
3027 free ((char *) symname);
3028 break;
3029 }
3030 if (symname == NULL)
3031 {
3032 lr_error (ldfile, _("\
3033%s: unknown character in equivalent definition value"),
3034 "LC_COLLATE");
3035 goto sym_equiv_free;
3036 }
3037
3038 /* See whether the symbol name is already defined. */
3039 if (find_entry (&collate->sym_table, symname, symname_len,
3040 &symval) != 0)
3041 {
3042 lr_error (ldfile, _("\
3043%s: unknown symbol `%s' in equivalent definition"),
3044 "LC_COLLATE", symname);
3045 goto sym_equiv_free;
3046 }
3047
3048 if (insert_entry (&collate->sym_table,
3049 newname, newname_len, symval) < 0)
3050 {
3051 lr_error (ldfile, _("\
3052error while adding equivalent collating symbol"));
3053 goto sym_equiv_free;
3054 }
3055
3056 free ((char *) symname);
3057 }
3058 lr_ignore_rest (ldfile, 1);
3059 break;
3060
3061 case tok_script:
3062 /* Ignore the rest of the line if we don't need the input of
3063 this line. */
3064 if (ignore_content)
3065 {
3066 lr_ignore_rest (ldfile, 0);
3067 break;
3068 }
3069
3070 /* We get told about the scripts we know. */
3071 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3072 if (arg->tok != tok_bsymbol)
3073 goto err_label;
3074 else
3075 {
3076 struct section_list *runp = collate->known_sections;
3077 char *name;
3078
3079 while (runp != NULL)
3080 if (strncmp (runp->name, arg->val.str.startmb,
3081 arg->val.str.lenmb) == 0
3082 && runp->name[arg->val.str.lenmb] == '\0')
3083 break;
3084 else
3085 runp = runp->def_next;
3086
3087 if (runp != NULL)
3088 {
3089 lr_error (ldfile, _("duplicate definition of script `%s'"),
3090 runp->name);
3091 lr_ignore_rest (ldfile, 0);
3092 break;
3093 }
3094
3095 runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3096 name = (char *) xmalloc (arg->val.str.lenmb + 1);
3097 memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3098 name[arg->val.str.lenmb] = '\0';
3099 runp->name = name;
3100
3101 runp->def_next = collate->known_sections;
3102 collate->known_sections = runp;
3103 }
3104 lr_ignore_rest (ldfile, 1);
3105 break;
3106
3107 case tok_order_start:
3108 /* Ignore the rest of the line if we don't need the input of
3109 this line. */
3110 if (ignore_content)
3111 {
3112 lr_ignore_rest (ldfile, 0);
3113 break;
3114 }
3115
3116 if (state != 0 && state != 1 && state != 2)
3117 goto err_label;
3118 state = 1;
3119
3120 /* The 14652 draft does not specify whether all `order_start' lines
3121 must contain the same number of sort-rules, but 14651 does. So
3122 we require this here as well. */
3123 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3124 if (arg->tok == tok_bsymbol)
3125 {
3126 /* This better should be a section name. */
3127 struct section_list *sp = collate->known_sections;
3128 while (sp != NULL
3129 && (sp->name == NULL
3130 || strncmp (sp->name, arg->val.str.startmb,
3131 arg->val.str.lenmb) != 0
3132 || sp->name[arg->val.str.lenmb] != '\0'))
3133 sp = sp->def_next;
3134
3135 if (sp == NULL)
3136 {
3137 lr_error (ldfile, _("\
3138%s: unknown section name `%.*s'"),
3139 "LC_COLLATE", (int) arg->val.str.lenmb,
3140 arg->val.str.startmb);
3141 /* We use the error section. */
3142 collate->current_section = &collate->error_section;
3143
3144 if (collate->error_section.first == NULL)
3145 {
3146 /* Insert &collate->error_section at the end of
3147 the collate->sections list. */
3148 if (collate->sections == NULL)
3149 collate->sections = &collate->error_section;
3150 else
3151 {
3152 sp = collate->sections;
3153 while (sp->next != NULL)
3154 sp = sp->next;
3155
3156 sp->next = &collate->error_section;
3157 }
3158 collate->error_section.next = NULL;
3159 }
3160 }
3161 else
3162 {
3163 /* One should not be allowed to open the same
3164 section twice. */
3165 if (sp->first != NULL)
3166 lr_error (ldfile, _("\
3167%s: multiple order definitions for section `%s'"),
3168 "LC_COLLATE", sp->name);
3169 else
3170 {
3171 /* Insert sp in the collate->sections list,
3172 right after collate->current_section. */
3173 if (collate->current_section != NULL)
3174 {
3175 sp->next = collate->current_section->next;
3176 collate->current_section->next = sp;
3177 }
3178 else if (collate->sections == NULL)
3179 /* This is the first section to be defined. */
3180 collate->sections = sp;
3181
3182 collate->current_section = sp;
3183 }
3184
3185 /* Next should come the end of the line or a semicolon. */
3186 arg = lr_token (ldfile, charmap, result, repertoire,
3187 verbose);
3188 if (arg->tok == tok_eol)
3189 {
3190 uint32_t cnt;
3191
3192 /* This means we have exactly one rule: `forward'. */
3193 if (nrules > 1)
3194 lr_error (ldfile, _("\
3195%s: invalid number of sorting rules"),
3196 "LC_COLLATE");
3197 else
3198 nrules = 1;
3199 sp->rules = obstack_alloc (&collate->mempool,
3200 (sizeof (enum coll_sort_rule)
3201 * nrules));
3202 for (cnt = 0; cnt < nrules; ++cnt)
3203 sp->rules[cnt] = sort_forward;
3204
3205 /* Next line. */
3206 break;
3207 }
3208
3209 /* Get the next token. */
3210 arg = lr_token (ldfile, charmap, result, repertoire,
3211 verbose);
3212 }
3213 }
3214 else
3215 {
3216 /* There is no section symbol. Therefore we use the unnamed
3217 section. */
3218 collate->current_section = &collate->unnamed_section;
3219
3220 if (collate->unnamed_section_defined)
3221 lr_error (ldfile, _("\
3222%s: multiple order definitions for unnamed section"),
3223 "LC_COLLATE");
3224 else
3225 {
3226 /* Insert &collate->unnamed_section at the beginning of
3227 the collate->sections list. */
3228 collate->unnamed_section.next = collate->sections;
3229 collate->sections = &collate->unnamed_section;
3230 collate->unnamed_section_defined = true;
3231 }
3232 }
3233
3234 /* Now read the direction names. */
3235 read_directions (ldfile, arg, charmap, repertoire, result);
3236
3237 /* From now we need the strings untranslated. */
3238 ldfile->translate_strings = 0;
3239 break;
3240
3241 case tok_order_end:
3242 /* Ignore the rest of the line if we don't need the input of
3243 this line. */
3244 if (ignore_content)
3245 {
3246 lr_ignore_rest (ldfile, 0);
3247 break;
3248 }
3249
3250 if (state != 1)
3251 goto err_label;
3252
3253 /* Handle ellipsis at end of list. */
3254 if (was_ellipsis != tok_none)
3255 {
3256 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3257 repertoire, result);
3258 was_ellipsis = tok_none;
3259 }
3260
3261 state = 2;
3262 lr_ignore_rest (ldfile, 1);
3263 break;
3264
3265 case tok_reorder_after:
3266 /* Ignore the rest of the line if we don't need the input of
3267 this line. */
3268 if (ignore_content)
3269 {
3270 lr_ignore_rest (ldfile, 0);
3271 break;
3272 }
3273
3274 if (state == 1)
3275 {
3276 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3277 "LC_COLLATE");
3278 state = 2;
3279
3280 /* Handle ellipsis at end of list. */
3281 if (was_ellipsis != tok_none)
3282 {
3283 handle_ellipsis (ldfile, arg->val.str.startmb,
3284 arg->val.str.lenmb, was_ellipsis, charmap,
3285 repertoire, result);
3286 was_ellipsis = tok_none;
3287 }
3288 }
3289 else if (state == 0 && copy_locale == NULL)
3290 goto err_label;
3291 else if (state != 0 && state != 2 && state != 3)
3292 goto err_label;
3293 state = 3;
3294
3295 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3296 if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3297 {
3298 /* Find this symbol in the sequence table. */
3299 char ucsbuf[10];
3300 char *startmb;
3301 size_t lenmb;
3302 struct element_t *insp;
3303 int no_error = 1;
3304 void *ptr;
3305
3306 if (arg->tok == tok_bsymbol)
3307 {
3308 startmb = arg->val.str.startmb;
3309 lenmb = arg->val.str.lenmb;
3310 }
3311 else
3312 {
3313 sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3314 startmb = ucsbuf;
3315 lenmb = 9;
3316 }
3317
3318 if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0)
3319 /* Yes, the symbol exists. Simply point the cursor
3320 to it. */
3321 collate->cursor = (struct element_t *) ptr;
3322 else
3323 {
3324 struct symbol_t *symbp;
3325 void *ptr;
3326
3327 if (find_entry (&collate->sym_table, startmb, lenmb,
3328 &ptr) == 0)
3329 {
3330 symbp = ptr;
3331
3332 if (symbp->order->last != NULL
3333 || symbp->order->next != NULL)
3334 collate->cursor = symbp->order;
3335 else
3336 {
3337 /* This is a collating symbol but its position
3338 is not yet defined. */
3339 lr_error (ldfile, _("\
3340%s: order for collating symbol %.*s not yet defined"),
3341 "LC_COLLATE", (int) lenmb, startmb);
3342 collate->cursor = NULL;
3343 no_error = 0;
3344 }
3345 }
3346 else if (find_entry (&collate->elem_table, startmb, lenmb,
3347 &ptr) == 0)
3348 {
3349 insp = (struct element_t *) ptr;
3350
3351 if (insp->last != NULL || insp->next != NULL)
3352 collate->cursor = insp;
3353 else
3354 {
3355 /* This is a collating element but its position
3356 is not yet defined. */
3357 lr_error (ldfile, _("\
3358%s: order for collating element %.*s not yet defined"),
3359 "LC_COLLATE", (int) lenmb, startmb);
3360 collate->cursor = NULL;
3361 no_error = 0;
3362 }
3363 }
3364 else
3365 {
3366 /* This is bad. The symbol after which we have to
3367 insert does not exist. */
3368 lr_error (ldfile, _("\
3369%s: cannot reorder after %.*s: symbol not known"),
3370 "LC_COLLATE", (int) lenmb, startmb);
3371 collate->cursor = NULL;
3372 no_error = 0;
3373 }
3374 }
3375
3376 lr_ignore_rest (ldfile, no_error);
3377 }
3378 else
3379 /* This must not happen. */
3380 goto err_label;
3381 break;
3382
3383 case tok_reorder_end:
3384 /* Ignore the rest of the line if we don't need the input of
3385 this line. */
3386 if (ignore_content)
3387 break;
3388
3389 if (state != 3)
3390 goto err_label;
3391 state = 4;
3392 lr_ignore_rest (ldfile, 1);
3393 break;
3394
3395 case tok_reorder_sections_after:
3396 /* Ignore the rest of the line if we don't need the input of
3397 this line. */
3398 if (ignore_content)
3399 {
3400 lr_ignore_rest (ldfile, 0);
3401 break;
3402 }
3403
3404 if (state == 1)
3405 {
3406 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3407 "LC_COLLATE");
3408 state = 2;
3409
3410 /* Handle ellipsis at end of list. */
3411 if (was_ellipsis != tok_none)
3412 {
3413 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3414 repertoire, result);
3415 was_ellipsis = tok_none;
3416 }
3417 }
3418 else if (state == 3)
3419 {
3420 WITH_CUR_LOCALE (error (0, 0, _("\
3421%s: missing `reorder-end' keyword"), "LC_COLLATE"));
3422 state = 4;
3423 }
3424 else if (state != 2 && state != 4)
3425 goto err_label;
3426 state = 5;
3427
3428 /* Get the name of the sections we are adding after. */
3429 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3430 if (arg->tok == tok_bsymbol)
3431 {
3432 /* Now find a section with this name. */
3433 struct section_list *runp = collate->sections;
3434
3435 while (runp != NULL)
3436 {
3437 if (runp->name != NULL
3438 && strlen (runp->name) == arg->val.str.lenmb
3439 && memcmp (runp->name, arg->val.str.startmb,
3440 arg->val.str.lenmb) == 0)
3441 break;
3442
3443 runp = runp->next;
3444 }
3445
3446 if (runp != NULL)
3447 collate->current_section = runp;
3448 else
3449 {
3450 /* This is bad. The section after which we have to
3451 reorder does not exist. Therefore we cannot
3452 process the whole rest of this reorder
3453 specification. */
3454 lr_error (ldfile, _("%s: section `%.*s' not known"),
3455 "LC_COLLATE", (int) arg->val.str.lenmb,
3456 arg->val.str.startmb);
3457
3458 do
3459 {
3460 lr_ignore_rest (ldfile, 0);
3461
3462 now = lr_token (ldfile, charmap, result, NULL, verbose);
3463 }
3464 while (now->tok == tok_reorder_sections_after
3465 || now->tok == tok_reorder_sections_end
3466 || now->tok == tok_end);
3467
3468 /* Process the token we just saw. */
3469 nowtok = now->tok;
3470 continue;
3471 }
3472 }
3473 else
3474 /* This must not happen. */
3475 goto err_label;
3476 break;
3477
3478 case tok_reorder_sections_end:
3479 /* Ignore the rest of the line if we don't need the input of
3480 this line. */
3481 if (ignore_content)
3482 break;
3483
3484 if (state != 5)
3485 goto err_label;
3486 state = 6;
3487 lr_ignore_rest (ldfile, 1);
3488 break;
3489
3490 case tok_bsymbol:
3491 case tok_ucs4:
3492 /* Ignore the rest of the line if we don't need the input of
3493 this line. */
3494 if (ignore_content)
3495 {
3496 lr_ignore_rest (ldfile, 0);
3497 break;
3498 }
3499
3500 if (state != 0 && state != 1 && state != 3 && state != 5)
3501 goto err_label;
3502
3503 if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3504 goto err_label;
3505
3506 if (nowtok == tok_ucs4)
3507 {
3508 snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3509 symstr = ucs4buf;
3510 symlen = 9;
3511 }
3512 else if (arg != NULL)
3513 {
3514 symstr = arg->val.str.startmb;
3515 symlen = arg->val.str.lenmb;
3516 }
3517 else
3518 {
3519 lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3520 (int) ldfile->token.val.str.lenmb,
3521 ldfile->token.val.str.startmb);
3522 break;
3523 }
3524
3525 struct element_t *seqp;
3526 if (state == 0)
3527 {
3528 /* We are outside an `order_start' region. This means
3529 we must only accept definitions of values for
3530 collation symbols since these are purely abstract
3531 values and don't need directions associated. */
3532 void *ptr;
3533
3534 if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3535 {
3536 seqp = ptr;
3537
3538 /* It's already defined. First check whether this
3539 is really a collating symbol. */
3540 if (seqp->is_character)
3541 goto err_label;
3542
3543 goto move_entry;
3544 }
3545 else
3546 {
3547 void *result;
3548
3549 if (find_entry (&collate->sym_table, symstr, symlen,
3550 &result) != 0)
3551 /* No collating symbol, it's an error. */
3552 goto err_label;
3553
3554 /* Maybe this is the first time we define a symbol
3555 value and it is before the first actual section. */
3556 if (collate->sections == NULL)
3557 collate->sections = collate->current_section =
3558 &collate->symbol_section;
3559 }
3560
3561 if (was_ellipsis != tok_none)
3562 {
3563 handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3564 charmap, repertoire, result);
3565
3566 /* Remember that we processed the ellipsis. */
3567 was_ellipsis = tok_none;
3568
3569 /* And don't add the value a second time. */
3570 break;
3571 }
3572 }
3573 else if (state == 3)
3574 {
3575 /* It is possible that we already have this collation sequence.
3576 In this case we move the entry. */
3577 void *sym;
3578 void *ptr;
3579
3580 /* If the symbol after which we have to insert was not found
3581 ignore all entries. */
3582 if (collate->cursor == NULL)
3583 {
3584 lr_ignore_rest (ldfile, 0);
3585 break;
3586 }
3587
3588 if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3589 {
3590 seqp = (struct element_t *) ptr;
3591 goto move_entry;
3592 }
3593
3594 if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3595 && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3596 goto move_entry;
3597
3598 if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0
3599 && (seqp = (struct element_t *) ptr,
3600 seqp->last != NULL || seqp->next != NULL
3601 || (collate->start != NULL && seqp == collate->start)))
3602 {
3603 move_entry:
3604 /* Remove the entry from the old position. */
3605 if (seqp->last == NULL)
3606 collate->start = seqp->next;
3607 else
3608 seqp->last->next = seqp->next;
3609 if (seqp->next != NULL)
3610 seqp->next->last = seqp->last;
3611
3612 /* We also have to check whether this entry is the
3613 first or last of a section. */
3614 if (seqp->section->first == seqp)
3615 {
3616 if (seqp->section->first == seqp->section->last)
3617 /* This section has no content anymore. */
3618 seqp->section->first = seqp->section->last = NULL;
3619 else
3620 seqp->section->first = seqp->next;
3621 }
3622 else if (seqp->section->last == seqp)
3623 seqp->section->last = seqp->last;
3624
3625 /* Now insert it in the new place. */
3626 insert_weights (ldfile, seqp, charmap, repertoire, result,
3627 tok_none);
3628 break;
3629 }
3630
3631 /* Otherwise we just add a new entry. */
3632 }
3633 else if (state == 5)
3634 {
3635 /* We are reordering sections. Find the named section. */
3636 struct section_list *runp = collate->sections;
3637 struct section_list *prevp = NULL;
3638
3639 while (runp != NULL)
3640 {
3641 if (runp->name != NULL
3642 && strlen (runp->name) == symlen
3643 && memcmp (runp->name, symstr, symlen) == 0)
3644 break;
3645
3646 prevp = runp;
3647 runp = runp->next;
3648 }
3649
3650 if (runp == NULL)
3651 {
3652 lr_error (ldfile, _("%s: section `%.*s' not known"),
3653 "LC_COLLATE", (int) symlen, symstr);
3654 lr_ignore_rest (ldfile, 0);
3655 }
3656 else
3657 {
3658 if (runp != collate->current_section)
3659 {
3660 /* Remove the named section from the old place and
3661 insert it in the new one. */
3662 prevp->next = runp->next;
3663
3664 runp->next = collate->current_section->next;
3665 collate->current_section->next = runp;
3666 collate->current_section = runp;
3667 }
3668
3669 /* Process the rest of the line which might change
3670 the collation rules. */
3671 arg = lr_token (ldfile, charmap, result, repertoire,
3672 verbose);
3673 if (arg->tok != tok_eof && arg->tok != tok_eol)
3674 read_directions (ldfile, arg, charmap, repertoire,
3675 result);
3676 }
3677 break;
3678 }
3679 else if (was_ellipsis != tok_none)
3680 {
3681 /* Using the information in the `ellipsis_weight'
3682 element and this and the last value we have to handle
3683 the ellipsis now. */
3684 assert (state == 1);
3685
3686 handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3687 repertoire, result);
3688
3689 /* Remember that we processed the ellipsis. */
3690 was_ellipsis = tok_none;
3691
3692 /* And don't add the value a second time. */
3693 break;
3694 }
3695
3696 /* Now insert in the new place. */
3697 insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3698 break;
3699
3700 case tok_undefined:
3701 /* Ignore the rest of the line if we don't need the input of
3702 this line. */
3703 if (ignore_content)
3704 {
3705 lr_ignore_rest (ldfile, 0);
3706 break;
3707 }
3708
3709 if (state != 1)
3710 goto err_label;
3711
3712 if (was_ellipsis != tok_none)
3713 {
3714 lr_error (ldfile,
3715 _("%s: cannot have `%s' as end of ellipsis range"),
3716 "LC_COLLATE", "UNDEFINED");
3717
3718 unlink_element (collate);
3719 was_ellipsis = tok_none;
3720 }
3721
3722 /* See whether UNDEFINED already appeared somewhere. */
3723 if (collate->undefined.next != NULL
3724 || &collate->undefined == collate->cursor)
3725 {
3726 lr_error (ldfile,
3727 _("%s: order for `%.*s' already defined at %s:%Zu"),
3728 "LC_COLLATE", 9, "UNDEFINED",
3729 collate->undefined.file,
3730 collate->undefined.line);
3731 lr_ignore_rest (ldfile, 0);
3732 }
3733 else
3734 /* Parse the weights. */
3735 insert_weights (ldfile, &collate->undefined, charmap,
3736 repertoire, result, tok_none);
3737 break;
3738
3739 case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
3740 case tok_ellipsis3: /* absolute ellipsis */
3741 case tok_ellipsis4: /* symbolic decimal ellipsis */
3742 /* This is the symbolic (decimal or hexadecimal) or absolute
3743 ellipsis. */
3744 if (was_ellipsis != tok_none)
3745 goto err_label;
3746
3747 if (state != 0 && state != 1 && state != 3)
3748 goto err_label;
3749
3750 was_ellipsis = nowtok;
3751
3752 insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3753 repertoire, result, nowtok);
3754 break;
3755
3756 case tok_end:
3757 seen_end:
3758 /* Next we assume `LC_COLLATE'. */
3759 if (!ignore_content)
3760 {
3761 if (state == 0 && copy_locale == NULL)
3762 /* We must either see a copy statement or have
3763 ordering values. */
3764 lr_error (ldfile,
3765 _("%s: empty category description not allowed"),
3766 "LC_COLLATE");
3767 else if (state == 1)
3768 {
3769 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3770 "LC_COLLATE");
3771
3772 /* Handle ellipsis at end of list. */
3773 if (was_ellipsis != tok_none)
3774 {
3775 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3776 repertoire, result);
3777 was_ellipsis = tok_none;
3778 }
3779 }
3780 else if (state == 3)
3781 WITH_CUR_LOCALE (error (0, 0, _("\
3782%s: missing `reorder-end' keyword"), "LC_COLLATE"));
3783 else if (state == 5)
3784 WITH_CUR_LOCALE (error (0, 0, _("\
3785%s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3786 }
3787 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3788 if (arg->tok == tok_eof)
3789 break;
3790 if (arg->tok == tok_eol)
3791 lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3792 else if (arg->tok != tok_lc_collate)
3793 lr_error (ldfile, _("\
3794%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3795 lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3796 return;
3797
3798 case tok_define:
3799 if (ignore_content)
3800 {
3801 lr_ignore_rest (ldfile, 0);
3802 break;
3803 }
3804
3805 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3806 if (arg->tok != tok_ident)
3807 goto err_label;
3808
3809 /* Simply add the new symbol. */
3810 struct name_list *newsym = xmalloc (sizeof (*newsym)
3811 + arg->val.str.lenmb + 1);
3812 memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
3813 newsym->str[arg->val.str.lenmb] = '\0';
3814 newsym->next = defined;
3815 defined = newsym;
3816
3817 lr_ignore_rest (ldfile, 1);
3818 break;
3819
3820 case tok_undef:
3821 if (ignore_content)
3822 {
3823 lr_ignore_rest (ldfile, 0);
3824 break;
3825 }
3826
3827 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3828 if (arg->tok != tok_ident)
3829 goto err_label;
3830
3831 /* Remove _all_ occurrences of the symbol from the list. */
3832 struct name_list *prevdef = NULL;
3833 struct name_list *curdef = defined;
3834 while (curdef != NULL)
3835 if (strncmp (arg->val.str.startmb, curdef->str,
3836 arg->val.str.lenmb) == 0
3837 && curdef->str[arg->val.str.lenmb] == '\0')
3838 {
3839 if (prevdef == NULL)
3840 defined = curdef->next;
3841 else
3842 prevdef->next = curdef->next;
3843
3844 struct name_list *olddef = curdef;
3845 curdef = curdef->next;
3846
3847 free (olddef);
3848 }
3849 else
3850 {
3851 prevdef = curdef;
3852 curdef = curdef->next;
3853 }
3854
3855 lr_ignore_rest (ldfile, 1);
3856 break;
3857
3858 case tok_ifdef:
3859 case tok_ifndef:
3860 if (ignore_content)
3861 {
3862 lr_ignore_rest (ldfile, 0);
3863 break;
3864 }
3865
3866 found_ifdef:
3867 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3868 if (arg->tok != tok_ident)
3869 goto err_label;
3870 lr_ignore_rest (ldfile, 1);
3871
3872 if (collate->else_action == else_none)
3873 {
3874 curdef = defined;
3875 while (curdef != NULL)
3876 if (strncmp (arg->val.str.startmb, curdef->str,
3877 arg->val.str.lenmb) == 0
3878 && curdef->str[arg->val.str.lenmb] == '\0')
3879 break;
3880 else
3881 curdef = curdef->next;
3882
3883 if ((nowtok == tok_ifdef && curdef != NULL)
3884 || (nowtok == tok_ifndef && curdef == NULL))
3885 {
3886 /* We have to use the if-branch. */
3887 collate->else_action = else_ignore;
3888 }
3889 else
3890 {
3891 /* We have to use the else-branch, if there is one. */
3892 nowtok = skip_to (ldfile, collate, charmap, 0);
3893 if (nowtok == tok_else)
3894 collate->else_action = else_seen;
3895 else if (nowtok == tok_elifdef)
3896 {
3897 nowtok = tok_ifdef;
3898 goto found_ifdef;
3899 }
3900 else if (nowtok == tok_elifndef)
3901 {
3902 nowtok = tok_ifndef;
3903 goto found_ifdef;
3904 }
3905 else if (nowtok == tok_eof)
3906 goto seen_eof;
3907 else if (nowtok == tok_end)
3908 goto seen_end;
3909 }
3910 }
3911 else
3912 {
3913 /* XXX Should it really become necessary to support nested
3914 preprocessor handling we will push the state here. */
3915 lr_error (ldfile, _("%s: nested conditionals not supported"),
3916 "LC_COLLATE");
3917 nowtok = skip_to (ldfile, collate, charmap, 1);
3918 if (nowtok == tok_eof)
3919 goto seen_eof;
3920 else if (nowtok == tok_end)
3921 goto seen_end;
3922 }
3923 break;
3924
3925 case tok_elifdef:
3926 case tok_elifndef:
3927 case tok_else:
3928 if (ignore_content)
3929 {
3930 lr_ignore_rest (ldfile, 0);
3931 break;
3932 }
3933
3934 lr_ignore_rest (ldfile, 1);
3935
3936 if (collate->else_action == else_ignore)
3937 {
3938 /* Ignore everything until the endif. */
3939 nowtok = skip_to (ldfile, collate, charmap, 1);
3940 if (nowtok == tok_eof)
3941 goto seen_eof;
3942 else if (nowtok == tok_end)
3943 goto seen_end;
3944 }
3945 else
3946 {
3947 assert (collate->else_action == else_none);
3948 lr_error (ldfile, _("\
3949%s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
3950 nowtok == tok_else ? "else"
3951 : nowtok == tok_elifdef ? "elifdef" : "elifndef");
3952 }
3953 break;
3954
3955 case tok_endif:
3956 if (ignore_content)
3957 {
3958 lr_ignore_rest (ldfile, 0);
3959 break;
3960 }
3961
3962 lr_ignore_rest (ldfile, 1);
3963
3964 if (collate->else_action != else_ignore
3965 && collate->else_action != else_seen)
3966 lr_error (ldfile, _("\
3967%s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
3968
3969 /* XXX If we support nested preprocessor directives we pop
3970 the state here. */
3971 collate->else_action = else_none;
3972 break;
3973
3974 default:
3975 err_label:
3976 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3977 }
3978
3979 /* Prepare for the next round. */
3980 now = lr_token (ldfile, charmap, result, NULL, verbose);
3981 nowtok = now->tok;
3982 }
3983
3984 seen_eof:
3985 /* When we come here we reached the end of the file. */
3986 lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
3987}