Blame - ap/libc/glibc/glibc-2.22/locale/programs/ld-collate.c - T106_DC

blob: a39a94f2cc3508b42308c3b11eb116ef582e4a5b [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	1	/* Copyright (C) 1995-2015 Free Software Foundation, Inc.
				2	This file is part of the GNU C Library.
				3	Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
				4
				5	This program is free software; you can redistribute it and/or modify
				6	it under the terms of the GNU General Public License as published
				7	by the Free Software Foundation; version 2 of the License, or
				8	(at your option) any later version.
				9
				10	This program is distributed in the hope that it will be useful,
				11	but WITHOUT ANY WARRANTY; without even the implied warranty of
				12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				13	GNU General Public License for more details.
				14
				15	You should have received a copy of the GNU General Public License
				16	along with this program; if not, see <http://www.gnu.org/licenses/>. */
				17
				18	#ifdef HAVE_CONFIG_H
				19	# include <config.h>
				20	#endif
				21
				22	#include <errno.h>
				23	#include <error.h>
				24	#include <stdlib.h>
				25	#include <wchar.h>
				26	#include <stdint.h>
				27	#include <sys/param.h>
				28
				29	#include "localedef.h"
				30	#include "charmap.h"
				31	#include "localeinfo.h"
				32	#include "linereader.h"
				33	#include "locfile.h"
				34	#include "elem-hash.h"
				35	#include "../localeinfo.h"
				36
				37	/* Uncomment the following line in the production version. */
				38	/* #define NDEBUG 1 */
				39	#include <assert.h>
				40
				41	#define obstack_chunk_alloc malloc
				42	#define obstack_chunk_free free
				43
				44	static inline void
				45	__attribute ((always_inline))
				46	obstack_int32_grow (struct obstack *obstack, int32_t data)
				47	{
				48	assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
				49	data = maybe_swap_uint32 (data);
				50	if (sizeof (int32_t) == sizeof (int))
				51	obstack_int_grow (obstack, data);
				52	else
				53	obstack_grow (obstack, &data, sizeof (int32_t));
				54	}
				55
				56	static inline void
				57	__attribute ((always_inline))
				58	obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
				59	{
				60	assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
				61	data = maybe_swap_uint32 (data);
				62	if (sizeof (int32_t) == sizeof (int))
				63	obstack_int_grow_fast (obstack, data);
				64	else
				65	obstack_grow (obstack, &data, sizeof (int32_t));
				66	}
				67
				68	/* Forward declaration. */
				69	struct element_t;
				70
				71	/* Data type for list of strings. */
				72	struct section_list
				73	{
				74	/* Successor in the known_sections list. */
				75	struct section_list *def_next;
				76	/* Successor in the sections list. */
				77	struct section_list *next;
				78	/* Name of the section. */
				79	const char *name;
				80	/* First element of this section. */
				81	struct element_t *first;
				82	/* Last element of this section. */
				83	struct element_t *last;
				84	/* These are the rules for this section. */
				85	enum coll_sort_rule *rules;
				86	/* Index of the rule set in the appropriate section of the output file. */
				87	int ruleidx;
				88	};
				89
				90	struct element_t;
				91
				92	struct element_list_t
				93	{
				94	/* Number of elements. */
				95	int cnt;
				96
				97	struct element_t **w;
				98	};
				99
				100	/* Data type for collating element. */
				101	struct element_t
				102	{
				103	const char *name;
				104
				105	const char *mbs;
				106	size_t nmbs;
				107	const uint32_t *wcs;
				108	size_t nwcs;
				109	int *mborder;
				110	int wcorder;
				111
				112	/* The following is a bit mask which bits are set if this element is
				113	used in the appropriate level. Interesting for the singlebyte
				114	weight computation.
				115
				116	XXX The type here restricts the number of levels to 32. It could
				117	be changed if necessary but I doubt this is necessary. */
				118	unsigned int used_in_level;
				119
				120	struct element_list_t *weights;
				121
				122	/* Nonzero if this is a real character definition. */
				123	int is_character;
				124
				125	/* Order of the character in the sequence. This information will
				126	be used in range expressions. */
				127	int mbseqorder;
				128	int wcseqorder;
				129
				130	/* Where does the definition come from. */
				131	const char *file;
				132	size_t line;
				133
				134	/* Which section does this belong to. */
				135	struct section_list *section;
				136
				137	/* Predecessor and successor in the order list. */
				138	struct element_t *last;
				139	struct element_t *next;
				140
				141	/* Next element in multibyte output list. */
				142	struct element_t *mbnext;
				143	struct element_t *mblast;
				144
				145	/* Next element in wide character output list. */
				146	struct element_t *wcnext;
				147	struct element_t *wclast;
				148	};
				149
				150	/* Special element value. */
				151	#define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
				152	#define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
				153	#define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
				154
				155	/* Data type for collating symbol. */
				156	struct symbol_t
				157	{
				158	const char *name;
				159
				160	/* Point to place in the order list. */
				161	struct element_t *order;
				162
				163	/* Where does the definition come from. */
				164	const char *file;
				165	size_t line;
				166	};
				167
				168	/* Sparse table of struct element_t . /
				169	#define TABLE wchead_table
				170	#define ELEMENT struct element_t *
				171	#define DEFAULT NULL
				172	#define ITERATE
				173	#define NO_ADD_LOCALE
				174	#include "3level.h"
				175
				176	/* Sparse table of int32_t. */
				177	#define TABLE collidx_table
				178	#define ELEMENT int32_t
				179	#define DEFAULT 0
				180	#include "3level.h"
				181
				182	/* Sparse table of uint32_t. */
				183	#define TABLE collseq_table
				184	#define ELEMENT uint32_t
				185	#define DEFAULT ~((uint32_t) 0)
				186	#include "3level.h"
				187
				188
				189	/* Simple name list for the preprocessor. */
				190	struct name_list
				191	{
				192	struct name_list *next;
				193	char str[0];
				194	};
				195
				196
				197	/* The real definition of the struct for the LC_COLLATE locale. */
				198	struct locale_collate_t
				199	{
				200	int col_weight_max;
				201	int cur_weight_max;
				202
				203	/* List of known scripts. */
				204	struct section_list *known_sections;
				205	/* List of used sections. */
				206	struct section_list *sections;
				207	/* Current section using definition. */
				208	struct section_list *current_section;
				209	/* There always can be an unnamed section. */
				210	struct section_list unnamed_section;
				211	/* Flag whether the unnamed section has been defined. */
				212	bool unnamed_section_defined;
				213	/* To make handling of errors easier we have another section. */
				214	struct section_list error_section;
				215	/* Sometimes we are defining the values for collating symbols before
				216	the first actual section. */
				217	struct section_list symbol_section;
				218
				219	/* Start of the order list. */
				220	struct element_t *start;
				221
				222	/* The undefined element. */
				223	struct element_t undefined;
				224
				225	/* This is the cursor for `reorder_after' insertions. */
				226	struct element_t *cursor;
				227
				228	/* This value is used when handling ellipsis. */
				229	struct element_t ellipsis_weight;
				230
				231	/* Known collating elements. */
				232	hash_table elem_table;
				233
				234	/* Known collating symbols. */
				235	hash_table sym_table;
				236
				237	/* Known collation sequences. */
				238	hash_table seq_table;
				239
				240	struct obstack mempool;
				241
				242	/* The LC_COLLATE category is a bit special as it is sometimes possible
				243	that the definitions from more than one input file contains information.
				244	Therefore we keep all relevant input in a list. */
				245	struct locale_collate_t *next;
				246
				247	/* Arrays with heads of the list for each of the leading bytes in
				248	the multibyte sequences. */
				249	struct element_t *mbheads[256];
				250
				251	/* Arrays with heads of the list for each of the leading bytes in
				252	the multibyte sequences. */
				253	struct wchead_table wcheads;
				254
				255	/* The arrays with the collation sequence order. */
				256	unsigned char mbseqorder[256];
				257	struct collseq_table wcseqorder;
				258
				259	/* State of the preprocessor. */
				260	enum
				261	{
				262	else_none = 0,
				263	else_ignore,
				264	else_seen
				265	}
				266	else_action;
				267	};
				268
				269
				270	/* We have a few global variables which are used for reading all
				271	LC_COLLATE category descriptions in all files. */
				272	static uint32_t nrules;
				273
				274	/* List of defined preprocessor symbols. */
				275	static struct name_list *defined;
				276
				277
				278	/* We need UTF-8 encoding of numbers. */
				279	static inline int
				280	__attribute ((always_inline))
				281	utf8_encode (char *buf, int val)
				282	{
				283	int retval;
				284
				285	if (val < 0x80)
				286	{
				287	*buf++ = (char) val;
				288	retval = 1;
				289	}
				290	else
				291	{
				292	int step;
				293
				294	for (step = 2; step < 6; ++step)
				295	if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
				296	break;
				297	retval = step;
				298
				299	*buf = (unsigned char) (~0xff >> step);
				300	--step;
				301	do
				302	{
				303	buf[step] = 0x80 \| (val & 0x3f);
				304	val >>= 6;
				305	}
				306	while (--step > 0);
				307	*buf \|= val;
				308	}
				309
				310	return retval;
				311	}
				312
				313
				314	static struct section_list *
				315	make_seclist_elem (struct locale_collate_t collate, const char string,
				316	struct section_list *next)
				317	{
				318	struct section_list *newp;
				319
				320	newp = (struct section_list *) obstack_alloc (&collate->mempool,
				321	sizeof (*newp));
				322	newp->next = next;
				323	newp->name = string;
				324	newp->first = NULL;
				325	newp->last = NULL;
				326
				327	return newp;
				328	}
				329
				330
				331	static struct element_t *
				332	new_element (struct locale_collate_t collate, const char mbs, size_t mbslen,
				333	const uint32_t wcs, const char name, size_t namelen,
				334	int is_character)
				335	{
				336	struct element_t *newp;
				337
				338	newp = (struct element_t *) obstack_alloc (&collate->mempool,
				339	sizeof (*newp));
				340	newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
				341	name, namelen);
				342	if (mbs != NULL)
				343	{
				344	newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
				345	newp->nmbs = mbslen;
				346	}
				347	else
				348	{
				349	newp->mbs = NULL;
				350	newp->nmbs = 0;
				351	}
				352	if (wcs != NULL)
				353	{
				354	size_t nwcs = wcslen ((wchar_t *) wcs);
				355	uint32_t zero = 0;
				356	/* Handle <U0000> as a single character. */
				357	if (nwcs == 0)
				358	nwcs = 1;
				359	obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
				360	obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
				361	newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
				362	newp->nwcs = nwcs;
				363	}
				364	else
				365	{
				366	newp->wcs = NULL;
				367	newp->nwcs = 0;
				368	}
				369	newp->mborder = NULL;
				370	newp->wcorder = 0;
				371	newp->used_in_level = 0;
				372	newp->is_character = is_character;
				373
				374	/* Will be assigned later. XXX */
				375	newp->mbseqorder = 0;
				376	newp->wcseqorder = 0;
				377
				378	/* Will be allocated later. */
				379	newp->weights = NULL;
				380
				381	newp->file = NULL;
				382	newp->line = 0;
				383
				384	newp->section = collate->current_section;
				385
				386	newp->last = NULL;
				387	newp->next = NULL;
				388
				389	newp->mbnext = NULL;
				390	newp->mblast = NULL;
				391
				392	newp->wcnext = NULL;
				393	newp->wclast = NULL;
				394
				395	return newp;
				396	}
				397
				398
				399	static struct symbol_t *
				400	new_symbol (struct locale_collate_t collate, const char name, size_t len)
				401	{
				402	struct symbol_t *newp;
				403
				404	newp = (struct symbol_t ) obstack_alloc (&collate->mempool, sizeof (newp));
				405
				406	newp->name = obstack_copy0 (&collate->mempool, name, len);
				407	newp->order = NULL;
				408
				409	newp->file = NULL;
				410	newp->line = 0;
				411
				412	return newp;
				413	}
				414
				415
				416	/* Test whether this name is already defined somewhere. */
				417	static int
				418	check_duplicate (struct linereader ldfile, struct locale_collate_t collate,
				419	const struct charmap_t *charmap,
				420	struct repertoire_t repertoire, const char symbol,
				421	size_t symbol_len)
				422	{
				423	void *ignore = NULL;
				424
				425	if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
				426	{
				427	lr_error (ldfile, _("`%.*s' already defined in charmap"),
				428	(int) symbol_len, symbol);
				429	return 1;
				430	}
				431
				432	if (repertoire != NULL
				433	&& (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
				434	== 0))
				435	{
				436	lr_error (ldfile, _("`%.*s' already defined in repertoire"),
				437	(int) symbol_len, symbol);
				438	return 1;
				439	}
				440
				441	if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
				442	{
				443	lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
				444	(int) symbol_len, symbol);
				445	return 1;
				446	}
				447
				448	if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
				449	{
				450	lr_error (ldfile, _("`%.*s' already defined as collating element"),
				451	(int) symbol_len, symbol);
				452	return 1;
				453	}
				454
				455	return 0;
				456	}
				457
				458
				459	/* Read the direction specification. */
				460	static void
				461	read_directions (struct linereader ldfile, struct token arg,
				462	const struct charmap_t *charmap,
				463	struct repertoire_t repertoire, struct localedef_t result)
				464	{
				465	int cnt = 0;
				466	int max = nrules ?: 10;
				467	enum coll_sort_rule rules = calloc (max, sizeof (rules));
				468	int warned = 0;
				469	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
				470
				471	while (1)
				472	{
				473	int valid = 0;
				474
				475	if (arg->tok == tok_forward)
				476	{
				477	if (rules[cnt] & sort_backward)
				478	{
				479	if (! warned)
				480	{
				481	lr_error (ldfile, _("\
				482	%s: `forward' and `backward' are mutually excluding each other"),
				483	"LC_COLLATE");
				484	warned = 1;
				485	}
				486	}
				487	else if (rules[cnt] & sort_forward)
				488	{
				489	if (! warned)
				490	{
				491	lr_error (ldfile, _("\
				492	%s: `%s' mentioned more than once in definition of weight %d"),
				493	"LC_COLLATE", "forward", cnt + 1);
				494	}
				495	}
				496	else
				497	rules[cnt] \|= sort_forward;
				498
				499	valid = 1;
				500	}
				501	else if (arg->tok == tok_backward)
				502	{
				503	if (rules[cnt] & sort_forward)
				504	{
				505	if (! warned)
				506	{
				507	lr_error (ldfile, _("\
				508	%s: `forward' and `backward' are mutually excluding each other"),
				509	"LC_COLLATE");
				510	warned = 1;
				511	}
				512	}
				513	else if (rules[cnt] & sort_backward)
				514	{
				515	if (! warned)
				516	{
				517	lr_error (ldfile, _("\
				518	%s: `%s' mentioned more than once in definition of weight %d"),
				519	"LC_COLLATE", "backward", cnt + 1);
				520	}
				521	}
				522	else
				523	rules[cnt] \|= sort_backward;
				524
				525	valid = 1;
				526	}
				527	else if (arg->tok == tok_position)
				528	{
				529	if (rules[cnt] & sort_position)
				530	{
				531	if (! warned)
				532	{
				533	lr_error (ldfile, _("\
				534	%s: `%s' mentioned more than once in definition of weight %d"),
				535	"LC_COLLATE", "position", cnt + 1);
				536	}
				537	}
				538	else
				539	rules[cnt] \|= sort_position;
				540
				541	valid = 1;
				542	}
				543
				544	if (valid)
				545	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				546
				547	if (arg->tok == tok_eof \|\| arg->tok == tok_eol \|\| arg->tok == tok_comma
				548	\|\| arg->tok == tok_semicolon)
				549	{
				550	if (! valid && ! warned)
				551	{
				552	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
				553	warned = 1;
				554	}
				555
				556	/* See whether we have to increment the counter. */
				557	if (arg->tok != tok_comma && rules[cnt] != 0)
				558	{
				559	/* Add the default `forward' if we have seen only `position'. */
				560	if (rules[cnt] == sort_position)
				561	rules[cnt] = sort_position \| sort_forward;
				562
				563	++cnt;
				564	}
				565
				566	if (arg->tok == tok_eof \|\| arg->tok == tok_eol)
				567	/* End of line or file, so we exit the loop. */
				568	break;
				569
				570	if (nrules == 0)
				571	{
				572	/* See whether we have enough room in the array. */
				573	if (cnt == max)
				574	{
				575	max += 10;
				576	rules = (enum coll_sort_rule *) xrealloc (rules,
				577	max
				578	* sizeof (*rules));
				579	memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
				580	}
				581	}
				582	else
				583	{
				584	if (cnt == nrules)
				585	{
				586	/* There must not be any more rule. */
				587	if (! warned)
				588	{
				589	lr_error (ldfile, _("\
				590	%s: too many rules; first entry only had %d"),
				591	"LC_COLLATE", nrules);
				592	warned = 1;
				593	}
				594
				595	lr_ignore_rest (ldfile, 0);
				596	break;
				597	}
				598	}
				599	}
				600	else
				601	{
				602	if (! warned)
				603	{
				604	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
				605	warned = 1;
				606	}
				607	}
				608
				609	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				610	}
				611
				612	if (nrules == 0)
				613	{
				614	/* Now we know how many rules we have. */
				615	nrules = cnt;
				616	rules = (enum coll_sort_rule *) xrealloc (rules,
				617	nrules * sizeof (*rules));
				618	}
				619	else
				620	{
				621	if (cnt < nrules)
				622	{
				623	/* Not enough rules in this specification. */
				624	if (! warned)
				625	lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
				626
				627	do
				628	rules[cnt] = sort_forward;
				629	while (++cnt < nrules);
				630	}
				631	}
				632
				633	collate->current_section->rules = rules;
				634	}
				635
				636
				637	static struct element_t *
				638	find_element (struct linereader ldfile, struct locale_collate_t collate,
				639	const char *str, size_t len)
				640	{
				641	void *result = NULL;
				642
				643	/* Search for the entries among the collation sequences already define. */
				644	if (find_entry (&collate->seq_table, str, len, &result) != 0)
				645	{
				646	/* Nope, not define yet. So we see whether it is a
				647	collation symbol. */
				648	void *ptr;
				649
				650	if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
				651	{
				652	/* It's a collation symbol. */
				653	struct symbol_t sym = (struct symbol_t ) ptr;
				654	result = sym->order;
				655
				656	if (result == NULL)
				657	result = sym->order = new_element (collate, NULL, 0, NULL,
				658	NULL, 0, 0);
				659	}
				660	else if (find_entry (&collate->elem_table, str, len, &result) != 0)
				661	{
				662	/* It's also no collation element. So it is a character
				663	element defined later. */
				664	result = new_element (collate, NULL, 0, NULL, str, len, 1);
				665	/* Insert it into the sequence table. */
				666	insert_entry (&collate->seq_table, str, len, result);
				667	}
				668	}
				669
				670	return (struct element_t *) result;
				671	}
				672
				673
				674	static void
				675	unlink_element (struct locale_collate_t *collate)
				676	{
				677	if (collate->cursor == collate->start)
				678	{
				679	assert (collate->cursor->next == NULL);
				680	assert (collate->cursor->last == NULL);
				681	collate->cursor = NULL;
				682	}
				683	else
				684	{
				685	if (collate->cursor->next != NULL)
				686	collate->cursor->next->last = collate->cursor->last;
				687	if (collate->cursor->last != NULL)
				688	collate->cursor->last->next = collate->cursor->next;
				689	collate->cursor = collate->cursor->last;
				690	}
				691	}
				692
				693
				694	static void
				695	insert_weights (struct linereader ldfile, struct element_t elem,
				696	const struct charmap_t *charmap,
				697	struct repertoire_t repertoire, struct localedef_t result,
				698	enum token_t ellipsis)
				699	{
				700	int weight_cnt;
				701	struct token *arg;
				702	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
				703
				704	/* Initialize all the fields. */
				705	elem->file = ldfile->fname;
				706	elem->line = ldfile->lineno;
				707
				708	elem->last = collate->cursor;
				709	elem->next = collate->cursor ? collate->cursor->next : NULL;
				710	if (collate->cursor != NULL && collate->cursor->next != NULL)
				711	collate->cursor->next->last = elem;
				712	if (collate->cursor != NULL)
				713	collate->cursor->next = elem;
				714	if (collate->start == NULL)
				715	{
				716	assert (collate->cursor == NULL);
				717	collate->start = elem;
				718	}
				719
				720	elem->section = collate->current_section;
				721
				722	if (collate->current_section->first == NULL)
				723	collate->current_section->first = elem;
				724	if (collate->current_section->last == collate->cursor)
				725	collate->current_section->last = elem;
				726
				727	collate->cursor = elem;
				728
				729	elem->weights = (struct element_list_t *)
				730	obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
				731	memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
				732
				733	weight_cnt = 0;
				734
				735	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				736	do
				737	{
				738	if (arg->tok == tok_eof \|\| arg->tok == tok_eol)
				739	break;
				740
				741	if (arg->tok == tok_ignore)
				742	{
				743	/* The weight for this level has to be ignored. We use the
				744	null pointer to indicate this. */
				745	elem->weights[weight_cnt].w = (struct element_t **)
				746	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
				747	elem->weights[weight_cnt].w[0] = NULL;
				748	elem->weights[weight_cnt].cnt = 1;
				749	}
				750	else if (arg->tok == tok_bsymbol \|\| arg->tok == tok_ucs4)
				751	{
				752	char ucs4str[10];
				753	struct element_t *val;
				754	char *symstr;
				755	size_t symlen;
				756
				757	if (arg->tok == tok_bsymbol)
				758	{
				759	symstr = arg->val.str.startmb;
				760	symlen = arg->val.str.lenmb;
				761	}
				762	else
				763	{
				764	snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
				765	symstr = ucs4str;
				766	symlen = 9;
				767	}
				768
				769	val = find_element (ldfile, collate, symstr, symlen);
				770	if (val == NULL)
				771	break;
				772
				773	elem->weights[weight_cnt].w = (struct element_t **)
				774	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
				775	elem->weights[weight_cnt].w[0] = val;
				776	elem->weights[weight_cnt].cnt = 1;
				777	}
				778	else if (arg->tok == tok_string)
				779	{
				780	/* Split the string up in the individual characters and put
				781	the element definitions in the list. */
				782	const char *cp = arg->val.str.startmb;
				783	int cnt = 0;
				784	struct element_t *charelem;
				785	struct element_t **weights = NULL;
				786	int max = 0;
				787
				788	if (*cp == '\0')
				789	{
				790	lr_error (ldfile, _("%s: empty weight string not allowed"),
				791	"LC_COLLATE");
				792	lr_ignore_rest (ldfile, 0);
				793	break;
				794	}
				795
				796	do
				797	{
				798	if (*cp == '<')
				799	{
				800	/* Ahh, it's a bsymbol or an UCS4 value. If it's
				801	the latter we have to unify the name. */
				802	const char *startp = ++cp;
				803	size_t len;
				804
				805	while (*cp != '>')
				806	{
				807	if (*cp == ldfile->escape_char)
				808	++cp;
				809	if (*cp == '\0')
				810	/* It's a syntax error. */
				811	goto syntax;
				812
				813	++cp;
				814	}
				815
				816	if (cp - startp == 5 && startp[0] == 'U'
				817	&& isxdigit (startp[1]) && isxdigit (startp[2])
				818	&& isxdigit (startp[3]) && isxdigit (startp[4]))
				819	{
				820	unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
				821	char *newstr;
				822
				823	newstr = (char *) xmalloc (10);
				824	snprintf (newstr, 10, "U%08X", ucs4);
				825	startp = newstr;
				826
				827	len = 9;
				828	}
				829	else
				830	len = cp - startp;
				831
				832	charelem = find_element (ldfile, collate, startp, len);
				833	++cp;
				834	}
				835	else
				836	{
				837	/* People really shouldn't use characters directly in
				838	the string. Especially since it's not really clear
				839	what this means. We interpret all characters in the
				840	string as if that would be bsymbols. Otherwise we
				841	would have to match back to bsymbols somehow and this
				842	is normally not what people normally expect. */
				843	charelem = find_element (ldfile, collate, cp++, 1);
				844	}
				845
				846	if (charelem == NULL)
				847	{
				848	/* We ignore the rest of the line. */
				849	lr_ignore_rest (ldfile, 0);
				850	break;
				851	}
				852
				853	/* Add the pointer. */
				854	if (cnt >= max)
				855	{
				856	struct element_t **newp;
				857	max += 10;
				858	newp = (struct element_t **)
				859	alloca (max * sizeof (struct element_t *));
				860	memcpy (newp, weights, cnt * sizeof (struct element_t *));
				861	weights = newp;
				862	}
				863	weights[cnt++] = charelem;
				864	}
				865	while (*cp != '\0');
				866
				867	/* Now store the information. */
				868	elem->weights[weight_cnt].w = (struct element_t **)
				869	obstack_alloc (&collate->mempool,
				870	cnt * sizeof (struct element_t *));
				871	memcpy (elem->weights[weight_cnt].w, weights,
				872	cnt * sizeof (struct element_t *));
				873	elem->weights[weight_cnt].cnt = cnt;
				874
				875	/* We don't need the string anymore. */
				876	free (arg->val.str.startmb);
				877	}
				878	else if (ellipsis != tok_none
				879	&& (arg->tok == tok_ellipsis2
				880	\|\| arg->tok == tok_ellipsis3
				881	\|\| arg->tok == tok_ellipsis4))
				882	{
				883	/* It must be the same ellipsis as used in the initial column. */
				884	if (arg->tok != ellipsis)
				885	lr_error (ldfile, _("\
				886	%s: weights must use the same ellipsis symbol as the name"),
				887	"LC_COLLATE");
				888
				889	/* The weight for this level will depend on the element
				890	iterating over the range. Put a placeholder. */
				891	elem->weights[weight_cnt].w = (struct element_t **)
				892	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
				893	elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
				894	elem->weights[weight_cnt].cnt = 1;
				895	}
				896	else
				897	{
				898	syntax:
				899	/* It's a syntax error. */
				900	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
				901	lr_ignore_rest (ldfile, 0);
				902	break;
				903	}
				904
				905	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				906	/* This better should be the end of the line or a semicolon. */
				907	if (arg->tok == tok_semicolon)
				908	/* OK, ignore this and read the next token. */
				909	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				910	else if (arg->tok != tok_eof && arg->tok != tok_eol)
				911	{
				912	/* It's a syntax error. */
				913	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
				914	lr_ignore_rest (ldfile, 0);
				915	break;
				916	}
				917	}
				918	while (++weight_cnt < nrules);
				919
				920	if (weight_cnt < nrules)
				921	{
				922	/* This means the rest of the line uses the current element as
				923	the weight. */
				924	do
				925	{
				926	elem->weights[weight_cnt].w = (struct element_t **)
				927	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
				928	if (ellipsis == tok_none)
				929	elem->weights[weight_cnt].w[0] = elem;
				930	else
				931	elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
				932	elem->weights[weight_cnt].cnt = 1;
				933	}
				934	while (++weight_cnt < nrules);
				935	}
				936	else
				937	{
				938	if (arg->tok == tok_ignore \|\| arg->tok == tok_bsymbol)
				939	{
				940	/* Too many rule values. */
				941	lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
				942	lr_ignore_rest (ldfile, 0);
				943	}
				944	else
				945	lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
				946	}
				947	}
				948
				949
				950	static int
				951	insert_value (struct linereader ldfile, const char symstr, size_t symlen,
				952	const struct charmap_t charmap, struct repertoire_t repertoire,
				953	struct localedef_t *result)
				954	{
				955	/* First find out what kind of symbol this is. */
				956	struct charseq *seq;
				957	uint32_t wc;
				958	struct element_t *elem = NULL;
				959	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
				960
				961	/* Try to find the character in the charmap. */
				962	seq = charmap_find_value (charmap, symstr, symlen);
				963
				964	/* Determine the wide character. */
				965	if (seq == NULL \|\| seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
				966	{
				967	wc = repertoire_find_value (repertoire, symstr, symlen);
				968	if (seq != NULL)
				969	seq->ucs4 = wc;
				970	}
				971	else
				972	wc = seq->ucs4;
				973
				974	if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
				975	{
				976	/* It's no character, so look through the collation elements and
				977	symbol list. */
				978	void *ptr = elem;
				979	if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0)
				980	{
				981	void *result;
				982	struct symbol_t *sym = NULL;
				983
				984	/* It's also collation element. Therefore it's either a
				985	collating symbol or it's a character which is not
				986	supported by the character set. In the later case we
				987	simply create a dummy entry. */
				988	if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
				989	{
				990	/* It's a collation symbol. */
				991	sym = (struct symbol_t *) result;
				992
				993	elem = sym->order;
				994	}
				995
				996	if (elem == NULL)
				997	{
				998	elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
				999
				1000	if (sym != NULL)
				1001	sym->order = elem;
				1002	else
				1003	/* Enter a fake element in the sequence table. This
				1004	won't cause anything in the output since there is
				1005	no multibyte or wide character associated with
				1006	it. */
				1007	insert_entry (&collate->seq_table, symstr, symlen, elem);
				1008	}
				1009	}
				1010	else
				1011	/* Copy the result back. */
				1012	elem = ptr;
				1013	}
				1014	else
				1015	{
				1016	/* Otherwise the symbols stands for a character. */
				1017	void *ptr = elem;
				1018	if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0)
				1019	{
				1020	uint32_t wcs[2] = { wc, 0 };
				1021
				1022	/* We have to allocate an entry. */
				1023	elem = new_element (collate,
				1024	seq != NULL ? (char *) seq->bytes : NULL,
				1025	seq != NULL ? seq->nbytes : 0,
				1026	wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
				1027	symstr, symlen, 1);
				1028
				1029	/* And add it to the table. */
				1030	if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
				1031	/* This cannot happen. */
				1032	assert (! "Internal error");
				1033	}
				1034	else
				1035	{
				1036	/* Copy the result back. */
				1037	elem = ptr;
				1038
				1039	/* Maybe the character was used before the definition. In this case
				1040	we have to insert the byte sequences now. */
				1041	if (elem->mbs == NULL && seq != NULL)
				1042	{
				1043	elem->mbs = obstack_copy0 (&collate->mempool,
				1044	seq->bytes, seq->nbytes);
				1045	elem->nmbs = seq->nbytes;
				1046	}
				1047
				1048	if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
				1049	{
				1050	uint32_t wcs[2] = { wc, 0 };
				1051
				1052	elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
				1053	elem->nwcs = 1;
				1054	}
				1055	}
				1056	}
				1057
				1058	/* Test whether this element is not already in the list. */
				1059	if (elem->next != NULL \|\| elem == collate->cursor)
				1060	{
				1061	lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
				1062	(int) symlen, symstr, elem->file, elem->line);
				1063	lr_ignore_rest (ldfile, 0);
				1064	return 1;
				1065	}
				1066
				1067	insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
				1068
				1069	return 0;
				1070	}
				1071
				1072
				1073	static void
				1074	handle_ellipsis (struct linereader ldfile, const char symstr, size_t symlen,
				1075	enum token_t ellipsis, const struct charmap_t *charmap,
				1076	struct repertoire_t *repertoire,
				1077	struct localedef_t *result)
				1078	{
				1079	struct element_t *startp;
				1080	struct element_t *endp;
				1081	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
				1082
				1083	/* Unlink the entry added for the ellipsis. */
				1084	unlink_element (collate);
				1085	startp = collate->cursor;
				1086
				1087	/* Process and add the end-entry. */
				1088	if (symstr != NULL
				1089	&& insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
				1090	/* Something went wrong with inserting the to-value. This means
				1091	we cannot process the ellipsis. */
				1092	return;
				1093
				1094	/* Reset the cursor. */
				1095	collate->cursor = startp;
				1096
				1097	/* Now we have to handle many different situations:
				1098	- we have to distinguish between the three different ellipsis forms
				1099	- the is the ellipsis at the beginning, in the middle, or at the end.
				1100	*/
				1101	endp = collate->cursor->next;
				1102	assert (symstr == NULL \|\| endp != NULL);
				1103
				1104	/* XXX The following is probably very wrong since also collating symbols
				1105	can appear in ranges. But do we want/can refine the test for that? */
				1106	#if 0
				1107	/* Both, the start and the end symbol, must stand for characters. */
				1108	if ((startp != NULL && (startp->name == NULL \|\| ! startp->is_character))
				1109	\|\| (endp != NULL && (endp->name == NULL\|\| ! endp->is_character)))
				1110	{
				1111	lr_error (ldfile, _("\
				1112	%s: the start and the end symbol of a range must stand for characters"),
				1113	"LC_COLLATE");
				1114	return;
				1115	}
				1116	#endif
				1117
				1118	if (ellipsis == tok_ellipsis3)
				1119	{
				1120	/* One requirement we make here: the length of the byte
				1121	sequences for the first and end character must be the same.
				1122	This is mainly to prevent unwanted effects and this is often
				1123	not what is wanted. */
				1124	size_t len = (startp->mbs != NULL ? startp->nmbs
				1125	: (endp->mbs != NULL ? endp->nmbs : 0));
				1126	char mbcnt[len + 1];
				1127	char mbend[len + 1];
				1128
				1129	/* Well, this should be caught somewhere else already. Just to
				1130	make sure. */
				1131	assert (startp == NULL \|\| startp->wcs == NULL \|\| startp->wcs[1] == 0);
				1132	assert (endp == NULL \|\| endp->wcs == NULL \|\| endp->wcs[1] == 0);
				1133
				1134	if (startp != NULL && endp != NULL
				1135	&& startp->mbs != NULL && endp->mbs != NULL
				1136	&& startp->nmbs != endp->nmbs)
				1137	{
				1138	lr_error (ldfile, _("\
				1139	%s: byte sequences of first and last character must have the same length"),
				1140	"LC_COLLATE");
				1141	return;
				1142	}
				1143
				1144	/* Determine whether we have to generate multibyte sequences. */
				1145	if ((startp == NULL \|\| startp->mbs != NULL)
				1146	&& (endp == NULL \|\| endp->mbs != NULL))
				1147	{
				1148	int cnt;
				1149	int ret;
				1150
				1151	/* Prepare the beginning byte sequence. This is either from the
				1152	beginning byte sequence or it is all nulls if it was an
				1153	initial ellipsis. */
				1154	if (startp == NULL \|\| startp->mbs == NULL)
				1155	memset (mbcnt, '\0', len);
				1156	else
				1157	{
				1158	memcpy (mbcnt, startp->mbs, len);
				1159
				1160	/* And increment it so that the value is the first one we will
				1161	try to insert. */
				1162	for (cnt = len - 1; cnt >= 0; --cnt)
				1163	if (++mbcnt[cnt] != '\0')
				1164	break;
				1165	}
				1166	mbcnt[len] = '\0';
				1167
				1168	/* And the end sequence. */
				1169	if (endp == NULL \|\| endp->mbs == NULL)
				1170	memset (mbend, '\0', len);
				1171	else
				1172	memcpy (mbend, endp->mbs, len);
				1173	mbend[len] = '\0';
				1174
				1175	/* Test whether we have a correct range. */
				1176	ret = memcmp (mbcnt, mbend, len);
				1177	if (ret >= 0)
				1178	{
				1179	if (ret > 0)
				1180	lr_error (ldfile, _("%s: byte sequence of first character of \
				1181	range is not lower than that of the last character"), "LC_COLLATE");
				1182	return;
				1183	}
				1184
				1185	/* Generate the byte sequences data. */
				1186	while (1)
				1187	{
				1188	struct charseq *seq;
				1189
				1190	/* Quite a bit of work ahead. We have to find the character
				1191	definition for the byte sequence and then determine the
				1192	wide character belonging to it. */
				1193	seq = charmap_find_symbol (charmap, mbcnt, len);
				1194	if (seq != NULL)
				1195	{
				1196	struct element_t *elem;
				1197	size_t namelen;
				1198
				1199	/* I don't think this can ever happen. */
				1200	assert (seq->name != NULL);
				1201	namelen = strlen (seq->name);
				1202
				1203	if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
				1204	seq->ucs4 = repertoire_find_value (repertoire, seq->name,
				1205	namelen);
				1206
				1207	/* Now we are ready to insert the new value in the
				1208	sequence. Find out whether the element is
				1209	already known. */
				1210	void *ptr;
				1211	if (find_entry (&collate->seq_table, seq->name, namelen,
				1212	&ptr) != 0)
				1213	{
				1214	uint32_t wcs[2] = { seq->ucs4, 0 };
				1215
				1216	/* We have to allocate an entry. */
				1217	elem = new_element (collate, mbcnt, len,
				1218	seq->ucs4 == ILLEGAL_CHAR_VALUE
				1219	? NULL : wcs, seq->name,
				1220	namelen, 1);
				1221
				1222	/* And add it to the table. */
				1223	if (insert_entry (&collate->seq_table, seq->name,
				1224	namelen, elem) != 0)
				1225	/* This cannot happen. */
				1226	assert (! "Internal error");
				1227	}
				1228	else
				1229	/* Copy the result. */
				1230	elem = ptr;
				1231
				1232	/* Test whether this element is not already in the list. */
				1233	if (elem->next != NULL \|\| (collate->cursor != NULL
				1234	&& elem->next == collate->cursor))
				1235	{
				1236	lr_error (ldfile, _("\
				1237	order for `%.*s' already defined at %s:%Zu"),
				1238	(int) namelen, seq->name,
				1239	elem->file, elem->line);
				1240	goto increment;
				1241	}
				1242
				1243	/* Enqueue the new element. */
				1244	elem->last = collate->cursor;
				1245	if (collate->cursor == NULL)
				1246	elem->next = NULL;
				1247	else
				1248	{
				1249	elem->next = collate->cursor->next;
				1250	elem->last->next = elem;
				1251	if (elem->next != NULL)
				1252	elem->next->last = elem;
				1253	}
				1254	if (collate->start == NULL)
				1255	{
				1256	assert (collate->cursor == NULL);
				1257	collate->start = elem;
				1258	}
				1259	collate->cursor = elem;
				1260
				1261	/* Add the weight value. We take them from the
				1262	`ellipsis_weights' member of `collate'. */
				1263	elem->weights = (struct element_list_t *)
				1264	obstack_alloc (&collate->mempool,
				1265	nrules * sizeof (struct element_list_t));
				1266	for (cnt = 0; cnt < nrules; ++cnt)
				1267	if (collate->ellipsis_weight.weights[cnt].cnt == 1
				1268	&& (collate->ellipsis_weight.weights[cnt].w[0]
				1269	== ELEMENT_ELLIPSIS2))
				1270	{
				1271	elem->weights[cnt].w = (struct element_t **)
				1272	obstack_alloc (&collate->mempool,
				1273	sizeof (struct element_t *));
				1274	elem->weights[cnt].w[0] = elem;
				1275	elem->weights[cnt].cnt = 1;
				1276	}
				1277	else
				1278	{
				1279	/* Simply use the weight from `ellipsis_weight'. */
				1280	elem->weights[cnt].w =
				1281	collate->ellipsis_weight.weights[cnt].w;
				1282	elem->weights[cnt].cnt =
				1283	collate->ellipsis_weight.weights[cnt].cnt;
				1284	}
				1285	}
				1286
				1287	/* Increment for the next round. */
				1288	increment:
				1289	for (cnt = len - 1; cnt >= 0; --cnt)
				1290	if (++mbcnt[cnt] != '\0')
				1291	break;
				1292
				1293	/* Find out whether this was all. */
				1294	if (cnt < 0 \|\| memcmp (mbcnt, mbend, len) >= 0)
				1295	/* Yep, that's all. */
				1296	break;
				1297	}
				1298	}
				1299	}
				1300	else
				1301	{
				1302	/* For symbolic range we naturally must have a beginning and an
				1303	end specified by the user. */
				1304	if (startp == NULL)
				1305	lr_error (ldfile, _("\
				1306	%s: symbolic range ellipsis must not directly follow `order_start'"),
				1307	"LC_COLLATE");
				1308	else if (endp == NULL)
				1309	lr_error (ldfile, _("\
				1310	%s: symbolic range ellipsis must not be directly followed by `order_end'"),
				1311	"LC_COLLATE");
				1312	else
				1313	{
				1314	/* Determine the range. To do so we have to determine the
				1315	common prefix of the both names and then the numeric
				1316	values of both ends. */
				1317	size_t lenfrom = strlen (startp->name);
				1318	size_t lento = strlen (endp->name);
				1319	char buf[lento + 1];
				1320	int preflen = 0;
				1321	long int from;
				1322	long int to;
				1323	char *cp;
				1324	int base = ellipsis == tok_ellipsis2 ? 16 : 10;
				1325
				1326	if (lenfrom != lento)
				1327	{
				1328	invalid_range:
				1329	lr_error (ldfile, _("\
				1330	`%s' and `%.*s' are not valid names for symbolic range"),
				1331	startp->name, (int) lento, endp->name);
				1332	return;
				1333	}
				1334
				1335	while (startp->name[preflen] == endp->name[preflen])
				1336	if (startp->name[preflen] == '\0')
				1337	/* Nothing to be done. The start and end point are identical
				1338	and while inserting the end point we have already given
				1339	the user an error message. */
				1340	return;
				1341	else
				1342	++preflen;
				1343
				1344	errno = 0;
				1345	from = strtol (startp->name + preflen, &cp, base);
				1346	if ((from == UINT_MAX && errno == ERANGE) \|\| *cp != '\0')
				1347	goto invalid_range;
				1348
				1349	errno = 0;
				1350	to = strtol (endp->name + preflen, &cp, base);
				1351	if ((to == UINT_MAX && errno == ERANGE) \|\| *cp != '\0')
				1352	goto invalid_range;
				1353
				1354	/* Copy the prefix. */
				1355	memcpy (buf, startp->name, preflen);
				1356
				1357	/* Loop over all values. */
				1358	for (++from; from < to; ++from)
				1359	{
				1360	struct element_t *elem = NULL;
				1361	struct charseq *seq;
				1362	uint32_t wc;
				1363	int cnt;
				1364
				1365	/* Generate the name. */
				1366	sprintf (buf + preflen, base == 10 ? "%0ld" : "%0lX",
				1367	(int) (lenfrom - preflen), from);
				1368
				1369	/* Look whether this name is already defined. */
				1370	void *ptr;
				1371	if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0)
				1372	{
				1373	/* Copy back the result. */
				1374	elem = ptr;
				1375
				1376	if (elem->next != NULL \|\| (collate->cursor != NULL
				1377	&& elem->next == collate->cursor))
				1378	{
				1379	lr_error (ldfile, _("\
				1380	%s: order for `%.*s' already defined at %s:%Zu"),
				1381	"LC_COLLATE", (int) lenfrom, buf,
				1382	elem->file, elem->line);
				1383	continue;
				1384	}
				1385
				1386	if (elem->name == NULL)
				1387	{
				1388	lr_error (ldfile, _("%s: `%s' must be a character"),
				1389	"LC_COLLATE", buf);
				1390	continue;
				1391	}
				1392	}
				1393
				1394	if (elem == NULL \|\| (elem->mbs == NULL && elem->wcs == NULL))
				1395	{
				1396	/* Search for a character of this name. */
				1397	seq = charmap_find_value (charmap, buf, lenfrom);
				1398	if (seq == NULL \|\| seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
				1399	{
				1400	wc = repertoire_find_value (repertoire, buf, lenfrom);
				1401
				1402	if (seq != NULL)
				1403	seq->ucs4 = wc;
				1404	}
				1405	else
				1406	wc = seq->ucs4;
				1407
				1408	if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
				1409	/* We don't know anything about a character with this
				1410	name. XXX Should we warn? */
				1411	continue;
				1412
				1413	if (elem == NULL)
				1414	{
				1415	uint32_t wcs[2] = { wc, 0 };
				1416
				1417	/* We have to allocate an entry. */
				1418	elem = new_element (collate,
				1419	seq != NULL
				1420	? (char *) seq->bytes : NULL,
				1421	seq != NULL ? seq->nbytes : 0,
				1422	wc == ILLEGAL_CHAR_VALUE
				1423	? NULL : wcs, buf, lenfrom, 1);
				1424	}
				1425	else
				1426	{
				1427	/* Update the element. */
				1428	if (seq != NULL)
				1429	{
				1430	elem->mbs = obstack_copy0 (&collate->mempool,
				1431	seq->bytes, seq->nbytes);
				1432	elem->nmbs = seq->nbytes;
				1433	}
				1434
				1435	if (wc != ILLEGAL_CHAR_VALUE)
				1436	{
				1437	uint32_t zero = 0;
				1438
				1439	obstack_grow (&collate->mempool,
				1440	&wc, sizeof (uint32_t));
				1441	obstack_grow (&collate->mempool,
				1442	&zero, sizeof (uint32_t));
				1443	elem->wcs = obstack_finish (&collate->mempool);
				1444	elem->nwcs = 1;
				1445	}
				1446	}
				1447
				1448	elem->file = ldfile->fname;
				1449	elem->line = ldfile->lineno;
				1450	elem->section = collate->current_section;
				1451	}
				1452
				1453	/* Enqueue the new element. */
				1454	elem->last = collate->cursor;
				1455	elem->next = collate->cursor->next;
				1456	elem->last->next = elem;
				1457	if (elem->next != NULL)
				1458	elem->next->last = elem;
				1459	collate->cursor = elem;
				1460
				1461	/* Now add the weights. They come from the `ellipsis_weights'
				1462	member of `collate'. */
				1463	elem->weights = (struct element_list_t *)
				1464	obstack_alloc (&collate->mempool,
				1465	nrules * sizeof (struct element_list_t));
				1466	for (cnt = 0; cnt < nrules; ++cnt)
				1467	if (collate->ellipsis_weight.weights[cnt].cnt == 1
				1468	&& (collate->ellipsis_weight.weights[cnt].w[0]
				1469	== ELEMENT_ELLIPSIS2))
				1470	{
				1471	elem->weights[cnt].w = (struct element_t **)
				1472	obstack_alloc (&collate->mempool,
				1473	sizeof (struct element_t *));
				1474	elem->weights[cnt].w[0] = elem;
				1475	elem->weights[cnt].cnt = 1;
				1476	}
				1477	else
				1478	{
				1479	/* Simly use the weight from `ellipsis_weight'. */
				1480	elem->weights[cnt].w =
				1481	collate->ellipsis_weight.weights[cnt].w;
				1482	elem->weights[cnt].cnt =
				1483	collate->ellipsis_weight.weights[cnt].cnt;
				1484	}
				1485	}
				1486	}
				1487	}
				1488	}
				1489
				1490
				1491	static void
				1492	collate_startup (struct linereader ldfile, struct localedef_t locale,
				1493	struct localedef_t *copy_locale, int ignore_content)
				1494	{
				1495	if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
				1496	{
				1497	struct locale_collate_t *collate;
				1498
				1499	if (copy_locale == NULL)
				1500	{
				1501	collate = locale->categories[LC_COLLATE].collate =
				1502	(struct locale_collate_t *)
				1503	xcalloc (1, sizeof (struct locale_collate_t));
				1504
				1505	/* Init the various data structures. */
				1506	init_hash (&collate->elem_table, 100);
				1507	init_hash (&collate->sym_table, 100);
				1508	init_hash (&collate->seq_table, 500);
				1509	obstack_init (&collate->mempool);
				1510
				1511	collate->col_weight_max = -1;
				1512	}
				1513	else
				1514	/* Reuse the copy_locale's data structures. */
				1515	collate = locale->categories[LC_COLLATE].collate =
				1516	copy_locale->categories[LC_COLLATE].collate;
				1517	}
				1518
				1519	ldfile->translate_strings = 0;
				1520	ldfile->return_widestr = 0;
				1521	}
				1522
				1523
				1524	void
				1525	collate_finish (struct localedef_t locale, const struct charmap_t charmap)
				1526	{
				1527	/* Now is the time when we can assign the individual collation
				1528	values for all the symbols. We have possibly different values
				1529	for the wide- and the multibyte-character symbols. This is done
				1530	since it might make a difference in the encoding if there is in
				1531	some cases no multibyte-character but there are wide-characters.
				1532	(The other way around it is not important since theencoded
				1533	collation value in the wide-character case is 32 bits wide and
				1534	therefore requires no encoding).
				1535
				1536	The lowest collation value assigned is 2. Zero is reserved for
				1537	the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
				1538	functions and 1 is used to separate the individual passes for the
				1539	different rules.
				1540
				1541	We also have to construct is list with all the bytes/words which
				1542	can come first in a sequence, followed by all the elements which
				1543	also start with this byte/word. The order is reverse which has
				1544	among others the important effect that longer strings are located
				1545	first in the list. This is required for the output data since
				1546	the algorithm used in `strcoll' etc depends on this.
				1547
				1548	The multibyte case is easy. We simply sort into an array with
				1549	256 elements. */
				1550	struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
				1551	int mbact[nrules];
				1552	int wcact;
				1553	int mbseqact;
				1554	int wcseqact;
				1555	struct element_t *runp;
				1556	int i;
				1557	int need_undefined = 0;
				1558	struct section_list *sect;
				1559	int ruleidx;
				1560	int nr_wide_elems = 0;
				1561
				1562	if (collate == NULL)
				1563	{
				1564	/* No data, no check. */
				1565	if (! be_quiet)
				1566	WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
				1567	"LC_COLLATE"));
				1568	return;
				1569	}
				1570
				1571	/* If this assertion is hit change the type in `element_t'. */
				1572	assert (nrules <= sizeof (runp->used_in_level) * 8);
				1573
				1574	/* Make sure that the `position' rule is used either in all sections
				1575	or in none. */
				1576	for (i = 0; i < nrules; ++i)
				1577	for (sect = collate->sections; sect != NULL; sect = sect->next)
				1578	if (sect != collate->current_section
				1579	&& sect->rules != NULL
				1580	&& ((sect->rules[i] & sort_position)
				1581	!= (collate->current_section->rules[i] & sort_position)))
				1582	{
				1583	WITH_CUR_LOCALE (error (0, 0, _("\
				1584	%s: `position' must be used for a specific level in all sections or none"),
				1585	"LC_COLLATE"));
				1586	break;
				1587	}
				1588
				1589	/* Find out which elements are used at which level. At the same
				1590	time we find out whether we have any undefined symbols. */
				1591	runp = collate->start;
				1592	while (runp != NULL)
				1593	{
				1594	if (runp->mbs != NULL)
				1595	{
				1596	for (i = 0; i < nrules; ++i)
				1597	{
				1598	int j;
				1599
				1600	for (j = 0; j < runp->weights[i].cnt; ++j)
				1601	/* A NULL pointer as the weight means IGNORE. */
				1602	if (runp->weights[i].w[j] != NULL)
				1603	{
				1604	if (runp->weights[i].w[j]->weights == NULL)
				1605	{
				1606	WITH_CUR_LOCALE (error_at_line (0, 0, runp->file,
				1607	runp->line,
				1608	_("symbol `%s' not defined"),
				1609	runp->weights[i].w[j]->name));
				1610
				1611	need_undefined = 1;
				1612	runp->weights[i].w[j] = &collate->undefined;
				1613	}
				1614	else
				1615	/* Set the bit for the level. */
				1616	runp->weights[i].w[j]->used_in_level \|= 1 << i;
				1617	}
				1618	}
				1619	}
				1620
				1621	/* Up to the next entry. */
				1622	runp = runp->next;
				1623	}
				1624
				1625	/* Walk through the list of defined sequences and assign weights. Also
				1626	create the data structure which will allow generating the single byte
				1627	character based tables.
				1628
				1629	Since at each time only the weights for each of the rules are
				1630	only compared to other weights for this rule it is possible to
				1631	assign more compact weight values than simply counting all
				1632	weights in sequence. We can assign weights from 3, one for each
				1633	rule individually and only for those elements, which are actually
				1634	used for this rule.
				1635
				1636	Why is this important? It is not for the wide char table. But
				1637	it is for the singlebyte output since here larger numbers have to
				1638	be encoded to make it possible to emit the value as a byte
				1639	string. */
				1640	for (i = 0; i < nrules; ++i)
				1641	mbact[i] = 2;
				1642	wcact = 2;
				1643	mbseqact = 0;
				1644	wcseqact = 0;
				1645	runp = collate->start;
				1646	while (runp != NULL)
				1647	{
				1648	/* Determine the order. */
				1649	if (runp->used_in_level != 0)
				1650	{
				1651	runp->mborder = (int *) obstack_alloc (&collate->mempool,
				1652	nrules * sizeof (int));
				1653
				1654	for (i = 0; i < nrules; ++i)
				1655	if ((runp->used_in_level & (1 << i)) != 0)
				1656	runp->mborder[i] = mbact[i]++;
				1657	else
				1658	runp->mborder[i] = 0;
				1659	}
				1660
				1661	if (runp->mbs != NULL)
				1662	{
				1663	struct element_t **eptr;
				1664	struct element_t *lastp = NULL;
				1665
				1666	/* Find the point where to insert in the list. */
				1667	eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
				1668	while (*eptr != NULL)
				1669	{
				1670	if ((*eptr)->nmbs < runp->nmbs)
				1671	break;
				1672
				1673	if ((*eptr)->nmbs == runp->nmbs)
				1674	{
				1675	int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
				1676
				1677	if (c == 0)
				1678	{
				1679	/* This should not happen. It means that we have
				1680	to symbols with the same byte sequence. It is
				1681	of course an error. */
				1682	WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
				1683	(*eptr)->line,
				1684	_("\
				1685	symbol `%s' has the same encoding as"), (*eptr)->name);
				1686	error_at_line (0, 0, runp->file,
				1687	runp->line,
				1688	_("symbol `%s'"),
				1689	runp->name));
				1690	goto dont_insert;
				1691	}
				1692	else if (c < 0)
				1693	/* Insert it here. */
				1694	break;
				1695	}
				1696
				1697	/* To the next entry. */
				1698	lastp = *eptr;
				1699	eptr = &(*eptr)->mbnext;
				1700	}
				1701
				1702	/* Set the pointers. */
				1703	runp->mbnext = *eptr;
				1704	runp->mblast = lastp;
				1705	if (*eptr != NULL)
				1706	(*eptr)->mblast = runp;
				1707	*eptr = runp;
				1708	dont_insert:
				1709	;
				1710	}
				1711
				1712	if (runp->used_in_level)
				1713	{
				1714	runp->wcorder = wcact++;
				1715
				1716	/* We take the opportunity to count the elements which have
				1717	wide characters. */
				1718	++nr_wide_elems;
				1719	}
				1720
				1721	if (runp->is_character)
				1722	{
				1723	if (runp->nmbs == 1)
				1724	collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
				1725
				1726	runp->wcseqorder = wcseqact++;
				1727	}
				1728	else if (runp->mbs != NULL && runp->weights != NULL)
				1729	/* This is for collation elements. */
				1730	runp->wcseqorder = wcseqact++;
				1731
				1732	/* Up to the next entry. */
				1733	runp = runp->next;
				1734	}
				1735
				1736	/* Find out whether any of the `mbheads' entries is unset. In this
				1737	case we use the UNDEFINED entry. */
				1738	for (i = 1; i < 256; ++i)
				1739	if (collate->mbheads[i] == NULL)
				1740	{
				1741	need_undefined = 1;
				1742	collate->mbheads[i] = &collate->undefined;
				1743	}
				1744
				1745	/* Now to the wide character case. */
				1746	collate->wcheads.p = 6;
				1747	collate->wcheads.q = 10;
				1748	wchead_table_init (&collate->wcheads);
				1749
				1750	collate->wcseqorder.p = 6;
				1751	collate->wcseqorder.q = 10;
				1752	collseq_table_init (&collate->wcseqorder);
				1753
				1754	/* Start adding. */
				1755	runp = collate->start;
				1756	while (runp != NULL)
				1757	{
				1758	if (runp->wcs != NULL)
				1759	{
				1760	struct element_t *e;
				1761	struct element_t **eptr;
				1762	struct element_t *lastp;
				1763
				1764	/* Insert the collation sequence value. */
				1765	if (runp->is_character)
				1766	collseq_table_add (&collate->wcseqorder, runp->wcs[0],
				1767	runp->wcseqorder);
				1768
				1769	/* Find the point where to insert in the list. */
				1770	e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
				1771	eptr = &e;
				1772	lastp = NULL;
				1773	while (*eptr != NULL)
				1774	{
				1775	if ((*eptr)->nwcs < runp->nwcs)
				1776	break;
				1777
				1778	if ((*eptr)->nwcs == runp->nwcs)
				1779	{
				1780	int c = wmemcmp ((wchar_t ) (eptr)->wcs,
				1781	(wchar_t *) runp->wcs, runp->nwcs);
				1782
				1783	if (c == 0)
				1784	{
				1785	/* This should not happen. It means that we have
				1786	two symbols with the same byte sequence. It is
				1787	of course an error. */
				1788	WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
				1789	(*eptr)->line,
				1790	_("\
				1791	symbol `%s' has the same encoding as"), (*eptr)->name);
				1792	error_at_line (0, 0, runp->file,
				1793	runp->line,
				1794	_("symbol `%s'"),
				1795	runp->name));
				1796	goto dont_insertwc;
				1797	}
				1798	else if (c < 0)
				1799	/* Insert it here. */
				1800	break;
				1801	}
				1802
				1803	/* To the next entry. */
				1804	lastp = *eptr;
				1805	eptr = &(*eptr)->wcnext;
				1806	}
				1807
				1808	/* Set the pointers. */
				1809	runp->wcnext = *eptr;
				1810	runp->wclast = lastp;
				1811	if (*eptr != NULL)
				1812	(*eptr)->wclast = runp;
				1813	*eptr = runp;
				1814	if (eptr == &e)
				1815	wchead_table_add (&collate->wcheads, runp->wcs[0], e);
				1816	dont_insertwc:
				1817	;
				1818	}
				1819
				1820	/* Up to the next entry. */
				1821	runp = runp->next;
				1822	}
				1823
				1824	/* Now determine whether the UNDEFINED entry is needed and if yes,
				1825	whether it was defined. */
				1826	collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
				1827	if (collate->undefined.file == NULL)
				1828	{
				1829	if (need_undefined)
				1830	{
				1831	/* This seems not to be enforced by recent standards. Don't
				1832	emit an error, simply append UNDEFINED at the end. */
				1833	if (0)
				1834	WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
				1835
				1836	/* Add UNDEFINED at the end. */
				1837	collate->undefined.mborder =
				1838	(int ) obstack_alloc (&collate->mempool, nrules sizeof (int));
				1839
				1840	for (i = 0; i < nrules; ++i)
				1841	collate->undefined.mborder[i] = mbact[i]++;
				1842	}
				1843
				1844	/* In any case we will need the definition for the wide character
				1845	case. But we will not complain that it is missing since the
				1846	specification strangely enough does not seem to account for
				1847	this. */
				1848	collate->undefined.wcorder = wcact++;
				1849	}
				1850
				1851	/* Finally, try to unify the rules for the sections. Whenever the rules
				1852	for a section are the same as those for another section give the
				1853	ruleset the same index. Since there are never many section we can
				1854	use an O(n^2) algorithm here. */
				1855	sect = collate->sections;
				1856	while (sect != NULL && sect->rules == NULL)
				1857	sect = sect->next;
				1858
				1859	/* Bail out if we have no sections because of earlier errors. */
				1860	if (sect == NULL)
				1861	{
				1862	WITH_CUR_LOCALE (error (EXIT_FAILURE, 0,
				1863	_("too many errors; giving up")));
				1864	return;
				1865	}
				1866
				1867	ruleidx = 0;
				1868	do
				1869	{
				1870	struct section_list *osect = collate->sections;
				1871
				1872	while (osect != sect)
				1873	if (osect->rules != NULL
				1874	&& memcmp (osect->rules, sect->rules,
				1875	nrules * sizeof (osect->rules[0])) == 0)
				1876	break;
				1877	else
				1878	osect = osect->next;
				1879
				1880	if (osect == sect)
				1881	sect->ruleidx = ruleidx++;
				1882	else
				1883	sect->ruleidx = osect->ruleidx;
				1884
				1885	/* Next section. */
				1886	do
				1887	sect = sect->next;
				1888	while (sect != NULL && sect->rules == NULL);
				1889	}
				1890	while (sect != NULL);
				1891	/* We are currently not prepared for more than 128 rulesets. But this
				1892	should never really be a problem. */
				1893	assert (ruleidx <= 128);
				1894	}
				1895
				1896
				1897	static int32_t
				1898	output_weight (struct obstack pool, struct locale_collate_t collate,
				1899	struct element_t *elem)
				1900	{
				1901	size_t cnt;
				1902	int32_t retval;
				1903
				1904	/* Optimize the use of UNDEFINED. */
				1905	if (elem == &collate->undefined)
				1906	/* The weights are already inserted. */
				1907	return 0;
				1908
				1909	/* This byte can start exactly one collation element and this is
				1910	a single byte. We can directly give the index to the weights. */
				1911	retval = obstack_object_size (pool);
				1912
				1913	/* Construct the weight. */
				1914	for (cnt = 0; cnt < nrules; ++cnt)
				1915	{
				1916	char buf[elem->weights[cnt].cnt * 7];
				1917	int len = 0;
				1918	int i;
				1919
				1920	for (i = 0; i < elem->weights[cnt].cnt; ++i)
				1921	/* Encode the weight value. We do nothing for IGNORE entries. */
				1922	if (elem->weights[cnt].w[i] != NULL)
				1923	len += utf8_encode (&buf[len],
				1924	elem->weights[cnt].w[i]->mborder[cnt]);
				1925
				1926	/* And add the buffer content. */
				1927	obstack_1grow (pool, len);
				1928	obstack_grow (pool, buf, len);
				1929	}
				1930
				1931	return retval \| ((elem->section->ruleidx & 0x7f) << 24);
				1932	}
				1933
				1934
				1935	static int32_t
				1936	output_weightwc (struct obstack pool, struct locale_collate_t collate,
				1937	struct element_t *elem)
				1938	{
				1939	size_t cnt;
				1940	int32_t retval;
				1941
				1942	/* Optimize the use of UNDEFINED. */
				1943	if (elem == &collate->undefined)
				1944	/* The weights are already inserted. */
				1945	return 0;
				1946
				1947	/* This byte can start exactly one collation element and this is
				1948	a single byte. We can directly give the index to the weights. */
				1949	retval = obstack_object_size (pool) / sizeof (int32_t);
				1950
				1951	/* Construct the weight. */
				1952	for (cnt = 0; cnt < nrules; ++cnt)
				1953	{
				1954	int32_t buf[elem->weights[cnt].cnt];
				1955	int i;
				1956	int32_t j;
				1957
				1958	for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
				1959	if (elem->weights[cnt].w[i] != NULL)
				1960	buf[j++] = elem->weights[cnt].w[i]->wcorder;
				1961
				1962	/* And add the buffer content. */
				1963	obstack_int32_grow (pool, j);
				1964
				1965	obstack_grow (pool, buf, j * sizeof (int32_t));
				1966	maybe_swap_uint32_obstack (pool, j);
				1967	}
				1968
				1969	return retval \| ((elem->section->ruleidx & 0x7f) << 24);
				1970	}
				1971
				1972	/* If localedef is every threaded, this would need to be __thread var. */
				1973	static struct
				1974	{
				1975	struct obstack *weightpool;
				1976	struct obstack *extrapool;
				1977	struct obstack *indpool;
				1978	struct locale_collate_t *collate;
				1979	struct collidx_table *tablewc;
				1980	} atwc;
				1981
				1982	static void add_to_tablewc (uint32_t ch, struct element_t *runp);
				1983
				1984	static void
				1985	add_to_tablewc (uint32_t ch, struct element_t *runp)
				1986	{
				1987	if (runp->wcnext == NULL && runp->nwcs == 1)
				1988	{
				1989	int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
				1990	runp);
				1991	collidx_table_add (atwc.tablewc, ch, weigthidx);
				1992	}
				1993	else
				1994	{
				1995	/* As for the singlebyte table, we recognize sequences and
				1996	compress them. */
				1997
				1998	collidx_table_add (atwc.tablewc, ch,
				1999	-(obstack_object_size (atwc.extrapool)
				2000	/ sizeof (uint32_t)));
				2001
				2002	do
				2003	{
				2004	/* Store the current index in the weight table. We know that
				2005	the current position in the `extrapool' is aligned on a
				2006	32-bit address. */
				2007	int32_t weightidx;
				2008	int added;
				2009
				2010	/* Find out wether this is a single entry or we have more than
				2011	one consecutive entry. */
				2012	if (runp->wcnext != NULL
				2013	&& runp->nwcs == runp->wcnext->nwcs
				2014	&& wmemcmp ((wchar_t *) runp->wcs,
				2015	(wchar_t *)runp->wcnext->wcs,
				2016	runp->nwcs - 1) == 0
				2017	&& (runp->wcs[runp->nwcs - 1]
				2018	== runp->wcnext->wcs[runp->nwcs - 1] + 1))
				2019	{
				2020	int i;
				2021	struct element_t *series_startp = runp;
				2022	struct element_t *curp;
				2023
				2024	/* Now add first the initial byte sequence. */
				2025	added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
				2026	if (sizeof (int32_t) == sizeof (int))
				2027	obstack_make_room (atwc.extrapool, added);
				2028
				2029	/* More than one consecutive entry. We mark this by having
				2030	a negative index into the indirect table. */
				2031	obstack_int32_grow_fast (atwc.extrapool,
				2032	-(obstack_object_size (atwc.indpool)
				2033	/ sizeof (int32_t)));
				2034	obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
				2035
				2036	do
				2037	runp = runp->wcnext;
				2038	while (runp->wcnext != NULL
				2039	&& runp->nwcs == runp->wcnext->nwcs
				2040	&& wmemcmp ((wchar_t *) runp->wcs,
				2041	(wchar_t *)runp->wcnext->wcs,
				2042	runp->nwcs - 1) == 0
				2043	&& (runp->wcs[runp->nwcs - 1]
				2044	== runp->wcnext->wcs[runp->nwcs - 1] + 1));
				2045
				2046	/* Now walk backward from here to the beginning. */
				2047	curp = runp;
				2048
				2049	for (i = 1; i < runp->nwcs; ++i)
				2050	obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
				2051
				2052	/* Now find the end of the consecutive sequence and
				2053	add all the indeces in the indirect pool. */
				2054	do
				2055	{
				2056	weightidx = output_weightwc (atwc.weightpool, atwc.collate,
				2057	curp);
				2058	obstack_int32_grow (atwc.indpool, weightidx);
				2059
				2060	curp = curp->wclast;
				2061	}
				2062	while (curp != series_startp);
				2063
				2064	/* Add the final weight. */
				2065	weightidx = output_weightwc (atwc.weightpool, atwc.collate,
				2066	curp);
				2067	obstack_int32_grow (atwc.indpool, weightidx);
				2068
				2069	/* And add the end byte sequence. Without length this
				2070	time. */
				2071	for (i = 1; i < curp->nwcs; ++i)
				2072	obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
				2073	}
				2074	else
				2075	{
				2076	/* A single entry. Simply add the index and the length and
				2077	string (except for the first character which is already
				2078	tested for). */
				2079	int i;
				2080
				2081	/* Output the weight info. */
				2082	weightidx = output_weightwc (atwc.weightpool, atwc.collate,
				2083	runp);
				2084
				2085	assert (runp->nwcs > 0);
				2086	added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
				2087	if (sizeof (int) == sizeof (int32_t))
				2088	obstack_make_room (atwc.extrapool, added);
				2089
				2090	obstack_int32_grow_fast (atwc.extrapool, weightidx);
				2091	obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
				2092	for (i = 1; i < runp->nwcs; ++i)
				2093	obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
				2094	}
				2095
				2096	/* Next entry. */
				2097	runp = runp->wcnext;
				2098	}
				2099	while (runp != NULL);
				2100	}
				2101	}
				2102
				2103	void
				2104	collate_output (struct localedef_t locale, const struct charmap_t charmap,
				2105	const char *output_path)
				2106	{
				2107	struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
				2108	const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
				2109	struct locale_file file;
				2110	size_t ch;
				2111	int32_t tablemb[256];
				2112	struct obstack weightpool;
				2113	struct obstack extrapool;
				2114	struct obstack indirectpool;
				2115	struct section_list *sect;
				2116	struct collidx_table tablewc;
				2117	uint32_t elem_size;
				2118	uint32_t *elem_table;
				2119	int i;
				2120	struct element_t *runp;
				2121
				2122	init_locale_data (&file, nelems);
				2123	add_locale_uint32 (&file, nrules);
				2124
				2125	/* If we have no LC_COLLATE data emit only the number of rules as zero. */
				2126	if (collate == NULL)
				2127	{
				2128	size_t idx;
				2129	for (idx = 1; idx < nelems; idx++)
				2130	{
				2131	/* The words have to be handled specially. */
				2132	if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
				2133	add_locale_uint32 (&file, 0);
				2134	else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_ENCODING_TYPE))
				2135	add_locale_uint32 (&file, __cet_other);
				2136	else
				2137	add_locale_empty (&file);
				2138	}
				2139	write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
				2140	return;
				2141	}
				2142
				2143	obstack_init (&weightpool);
				2144	obstack_init (&extrapool);
				2145	obstack_init (&indirectpool);
				2146
				2147	/* Since we are using the sign of an integer to mark indirection the
				2148	offsets in the arrays we are indirectly referring to must not be
				2149	zero since -0 == 0. Therefore we add a bit of dummy content. */
				2150	obstack_int32_grow (&extrapool, 0);
				2151	obstack_int32_grow (&indirectpool, 0);
				2152
				2153	/* Prepare the ruleset table. */
				2154	for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
				2155	if (sect->rules != NULL && sect->ruleidx == i)
				2156	{
				2157	int j;
				2158
				2159	obstack_make_room (&weightpool, nrules);
				2160
				2161	for (j = 0; j < nrules; ++j)
				2162	obstack_1grow_fast (&weightpool, sect->rules[j]);
				2163	++i;
				2164	}
				2165	/* And align the output. */
				2166	i = (nrules * i) % LOCFILE_ALIGN;
				2167	if (i > 0)
				2168	do
				2169	obstack_1grow (&weightpool, '\0');
				2170	while (++i < LOCFILE_ALIGN);
				2171
				2172	add_locale_raw_obstack (&file, &weightpool);
				2173
				2174	/* Generate the 8-bit table. Walk through the lists of sequences
				2175	starting with the same byte and add them one after the other to
				2176	the table. In case we have more than one sequence starting with
				2177	the same byte we have to use extra indirection.
				2178
				2179	First add a record for the NUL byte. This entry will never be used
				2180	so it does not matter. */
				2181	tablemb[0] = 0;
				2182
				2183	/* Now insert the `UNDEFINED' value if it is used. Since this value
				2184	will probably be used more than once it is good to store the
				2185	weights only once. */
				2186	if (collate->undefined.used_in_level != 0)
				2187	output_weight (&weightpool, collate, &collate->undefined);
				2188
				2189	for (ch = 1; ch < 256; ++ch)
				2190	if (collate->mbheads[ch]->mbnext == NULL
				2191	&& collate->mbheads[ch]->nmbs <= 1)
				2192	{
				2193	tablemb[ch] = output_weight (&weightpool, collate,
				2194	collate->mbheads[ch]);
				2195	}
				2196	else
				2197	{
				2198	/* The entries in the list are sorted by length and then
				2199	alphabetically. This is the order in which we will add the
				2200	elements to the collation table. This allows simply walking
				2201	the table in sequence and stopping at the first matching
				2202	entry. Since the longer sequences are coming first in the
				2203	list they have the possibility to match first, just as it
				2204	has to be. In the worst case we are walking to the end of
				2205	the list where we put, if no singlebyte sequence is defined
				2206	in the locale definition, the weights for UNDEFINED.
				2207
				2208	To reduce the length of the search list we compress them a bit.
				2209	This happens by collecting sequences of consecutive byte
				2210	sequences in one entry (having and begin and end byte sequence)
				2211	and add only one index into the weight table. We can find the
				2212	consecutive entries since they are also consecutive in the list. */
				2213	struct element_t *runp = collate->mbheads[ch];
				2214	struct element_t *lastp;
				2215
				2216	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
				2217
				2218	tablemb[ch] = -obstack_object_size (&extrapool);
				2219
				2220	do
				2221	{
				2222	/* Store the current index in the weight table. We know that
				2223	the current position in the `extrapool' is aligned on a
				2224	32-bit address. */
				2225	int32_t weightidx;
				2226	int added;
				2227
				2228	/* Find out wether this is a single entry or we have more than
				2229	one consecutive entry. */
				2230	if (runp->mbnext != NULL
				2231	&& runp->nmbs == runp->mbnext->nmbs
				2232	&& memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
				2233	&& (runp->mbs[runp->nmbs - 1]
				2234	== runp->mbnext->mbs[runp->nmbs - 1] + 1))
				2235	{
				2236	int i;
				2237	struct element_t *series_startp = runp;
				2238	struct element_t *curp;
				2239
				2240	/* Compute how much space we will need. */
				2241	added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
				2242	+ 2 * (runp->nmbs - 1));
				2243	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
				2244	obstack_make_room (&extrapool, added);
				2245
				2246	/* More than one consecutive entry. We mark this by having
				2247	a negative index into the indirect table. */
				2248	obstack_int32_grow_fast (&extrapool,
				2249	-(obstack_object_size (&indirectpool)
				2250	/ sizeof (int32_t)));
				2251
				2252	/* Now search first the end of the series. */
				2253	do
				2254	runp = runp->mbnext;
				2255	while (runp->mbnext != NULL
				2256	&& runp->nmbs == runp->mbnext->nmbs
				2257	&& memcmp (runp->mbs, runp->mbnext->mbs,
				2258	runp->nmbs - 1) == 0
				2259	&& (runp->mbs[runp->nmbs - 1]
				2260	== runp->mbnext->mbs[runp->nmbs - 1] + 1));
				2261
				2262	/* Now walk backward from here to the beginning. */
				2263	curp = runp;
				2264
				2265	assert (runp->nmbs <= 256);
				2266	obstack_1grow_fast (&extrapool, curp->nmbs - 1);
				2267	for (i = 1; i < curp->nmbs; ++i)
				2268	obstack_1grow_fast (&extrapool, curp->mbs[i]);
				2269
				2270	/* Now find the end of the consecutive sequence and
				2271	add all the indeces in the indirect pool. */
				2272	do
				2273	{
				2274	weightidx = output_weight (&weightpool, collate, curp);
				2275	obstack_int32_grow (&indirectpool, weightidx);
				2276
				2277	curp = curp->mblast;
				2278	}
				2279	while (curp != series_startp);
				2280
				2281	/* Add the final weight. */
				2282	weightidx = output_weight (&weightpool, collate, curp);
				2283	obstack_int32_grow (&indirectpool, weightidx);
				2284
				2285	/* And add the end byte sequence. Without length this
				2286	time. */
				2287	for (i = 1; i < curp->nmbs; ++i)
				2288	obstack_1grow_fast (&extrapool, curp->mbs[i]);
				2289	}
				2290	else
				2291	{
				2292	/* A single entry. Simply add the index and the length and
				2293	string (except for the first character which is already
				2294	tested for). */
				2295	int i;
				2296
				2297	/* Output the weight info. */
				2298	weightidx = output_weight (&weightpool, collate, runp);
				2299
				2300	added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
				2301	+ runp->nmbs - 1);
				2302	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
				2303	obstack_make_room (&extrapool, added);
				2304
				2305	obstack_int32_grow_fast (&extrapool, weightidx);
				2306	assert (runp->nmbs <= 256);
				2307	obstack_1grow_fast (&extrapool, runp->nmbs - 1);
				2308
				2309	for (i = 1; i < runp->nmbs; ++i)
				2310	obstack_1grow_fast (&extrapool, runp->mbs[i]);
				2311	}
				2312
				2313	/* Add alignment bytes if necessary. */
				2314	while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
				2315	obstack_1grow_fast (&extrapool, '\0');
				2316
				2317	/* Next entry. */
				2318	lastp = runp;
				2319	runp = runp->mbnext;
				2320	}
				2321	while (runp != NULL);
				2322
				2323	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
				2324
				2325	/* If the final entry in the list is not a single character we
				2326	add an UNDEFINED entry here. */
				2327	if (lastp->nmbs != 1)
				2328	{
				2329	int added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + 1);
				2330	obstack_make_room (&extrapool, added);
				2331
				2332	obstack_int32_grow_fast (&extrapool, 0);
				2333	/* XXX What rule? We just pick the first. */
				2334	obstack_1grow_fast (&extrapool, 0);
				2335	/* Length is zero. */
				2336	obstack_1grow_fast (&extrapool, 0);
				2337
				2338	/* Add alignment bytes if necessary. */
				2339	while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
				2340	obstack_1grow_fast (&extrapool, '\0');
				2341	}
				2342	}
				2343
				2344	/* Add padding to the tables if necessary. */
				2345	while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool)))
				2346	obstack_1grow (&weightpool, 0);
				2347
				2348	/* Now add the four tables. */
				2349	add_locale_uint32_array (&file, (const uint32_t *) tablemb, 256);
				2350	add_locale_raw_obstack (&file, &weightpool);
				2351	add_locale_raw_obstack (&file, &extrapool);
				2352	add_locale_raw_obstack (&file, &indirectpool);
				2353
				2354	/* Now the same for the wide character table. We need to store some
				2355	more information here. */
				2356	add_locale_empty (&file);
				2357	add_locale_empty (&file);
				2358	add_locale_empty (&file);
				2359
				2360	/* Since we are using the sign of an integer to mark indirection the
				2361	offsets in the arrays we are indirectly referring to must not be
				2362	zero since -0 == 0. Therefore we add a bit of dummy content. */
				2363	obstack_int32_grow (&extrapool, 0);
				2364	obstack_int32_grow (&indirectpool, 0);
				2365
				2366	/* Now insert the `UNDEFINED' value if it is used. Since this value
				2367	will probably be used more than once it is good to store the
				2368	weights only once. */
				2369	if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
				2370	abort ();
				2371
				2372	/* Generate the table. Walk through the lists of sequences starting
				2373	with the same wide character and add them one after the other to
				2374	the table. In case we have more than one sequence starting with
				2375	the same byte we have to use extra indirection. */
				2376	tablewc.p = 6;
				2377	tablewc.q = 10;
				2378	collidx_table_init (&tablewc);
				2379
				2380	atwc.weightpool = &weightpool;
				2381	atwc.extrapool = &extrapool;
				2382	atwc.indpool = &indirectpool;
				2383	atwc.collate = collate;
				2384	atwc.tablewc = &tablewc;
				2385
				2386	wchead_table_iterate (&collate->wcheads, add_to_tablewc);
				2387
				2388	memset (&atwc, 0, sizeof (atwc));
				2389
				2390	/* Now add the four tables. */
				2391	add_locale_collidx_table (&file, &tablewc);
				2392	add_locale_raw_obstack (&file, &weightpool);
				2393	add_locale_raw_obstack (&file, &extrapool);
				2394	add_locale_raw_obstack (&file, &indirectpool);
				2395
				2396	/* Finally write the table with collation element names out. It is
				2397	a hash table with a simple function which gets the name of the
				2398	character as the input. One character might have many names. The
				2399	value associated with the name is an index into the weight table
				2400	where we are then interested in the first-level weight value.
				2401
				2402	To determine how large the table should be we are counting the
				2403	elements have to put in. Since we are using internal chaining
				2404	using a secondary hash function we have to make the table a bit
				2405	larger to avoid extremely long search times. We can achieve
				2406	good results with a 40% larger table than there are entries. */
				2407	elem_size = 0;
				2408	runp = collate->start;
				2409	while (runp != NULL)
				2410	{
				2411	if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
				2412	/* Yep, the element really counts. */
				2413	++elem_size;
				2414
				2415	runp = runp->next;
				2416	}
				2417	/* Add 40% and find the next prime number. */
				2418	elem_size = next_prime (elem_size * 1.4);
				2419
				2420	/* Allocate the table. Each entry consists of two words: the hash
				2421	value and an index in a secondary table which provides the index
				2422	into the weight table and the string itself (so that a match can
				2423	be determined). */
				2424	elem_table = (uint32_t *) obstack_alloc (&extrapool,
				2425	elem_size * 2 * sizeof (uint32_t));
				2426	memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
				2427
				2428	/* Now add the elements. */
				2429	runp = collate->start;
				2430	while (runp != NULL)
				2431	{
				2432	if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
				2433	{
				2434	/* Compute the hash value of the name. */
				2435	uint32_t namelen = strlen (runp->name);
				2436	uint32_t hash = elem_hash (runp->name, namelen);
				2437	size_t idx = hash % elem_size;
				2438	#ifndef NDEBUG
				2439	size_t start_idx = idx;
				2440	#endif
				2441
				2442	if (elem_table[idx * 2] != 0)
				2443	{
				2444	/* The spot is already taken. Try iterating using the value
				2445	from the secondary hashing function. */
				2446	size_t iter = hash % (elem_size - 2) + 1;
				2447
				2448	do
				2449	{
				2450	idx += iter;
				2451	if (idx >= elem_size)
				2452	idx -= elem_size;
				2453	assert (idx != start_idx);
				2454	}
				2455	while (elem_table[idx * 2] != 0);
				2456	}
				2457	/* This is the spot where we will insert the value. */
				2458	elem_table[idx * 2] = hash;
				2459	elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
				2460
				2461	/* The string itself including length. */
				2462	obstack_1grow (&extrapool, namelen);
				2463	obstack_grow (&extrapool, runp->name, namelen);
				2464
				2465	/* And the multibyte representation. */
				2466	obstack_1grow (&extrapool, runp->nmbs);
				2467	obstack_grow (&extrapool, runp->mbs, runp->nmbs);
				2468
				2469	/* And align again to 32 bits. */
				2470	if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
				2471	obstack_grow (&extrapool, "\0\0",
				2472	(sizeof (int32_t)
				2473	- ((1 + namelen + 1 + runp->nmbs)
				2474	% sizeof (int32_t))));
				2475
				2476	/* Now some 32-bit values: multibyte collation sequence,
				2477	wide char string (including length), and wide char
				2478	collation sequence. */
				2479	obstack_int32_grow (&extrapool, runp->mbseqorder);
				2480
				2481	obstack_int32_grow (&extrapool, runp->nwcs);
				2482	obstack_grow (&extrapool, runp->wcs,
				2483	runp->nwcs * sizeof (uint32_t));
				2484	maybe_swap_uint32_obstack (&extrapool, runp->nwcs);
				2485
				2486	obstack_int32_grow (&extrapool, runp->wcseqorder);
				2487	}
				2488
				2489	runp = runp->next;
				2490	}
				2491
				2492	/* Prepare to write out this data. */
				2493	add_locale_uint32 (&file, elem_size);
				2494	add_locale_uint32_array (&file, elem_table, 2 * elem_size);
				2495	add_locale_raw_obstack (&file, &extrapool);
				2496	add_locale_raw_data (&file, collate->mbseqorder, 256);
				2497	add_locale_collseq_table (&file, &collate->wcseqorder);
				2498	add_locale_string (&file, charmap->code_set_name);
				2499	if (strcmp (charmap->code_set_name, "UTF-8") == 0)
				2500	add_locale_uint32 (&file, __cet_utf8);
				2501	else if (charmap->mb_cur_max == 1)
				2502	add_locale_uint32 (&file, __cet_8bit);
				2503	else
				2504	add_locale_uint32 (&file, __cet_other);
				2505	write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
				2506
				2507	obstack_free (&weightpool, NULL);
				2508	obstack_free (&extrapool, NULL);
				2509	obstack_free (&indirectpool, NULL);
				2510	}
				2511
				2512
				2513	static enum token_t
				2514	skip_to (struct linereader ldfile, struct locale_collate_t collate,
				2515	const struct charmap_t *charmap, int to_endif)
				2516	{
				2517	while (1)
				2518	{
				2519	struct token *now = lr_token (ldfile, charmap, NULL, NULL, 0);
				2520	enum token_t nowtok = now->tok;
				2521
				2522	if (nowtok == tok_eof \|\| nowtok == tok_end)
				2523	return nowtok;
				2524
				2525	if (nowtok == tok_ifdef \|\| nowtok == tok_ifndef)
				2526	{
				2527	lr_error (ldfile, _("%s: nested conditionals not supported"),
				2528	"LC_COLLATE");
				2529	nowtok = skip_to (ldfile, collate, charmap, tok_endif);
				2530	if (nowtok == tok_eof \|\| nowtok == tok_end)
				2531	return nowtok;
				2532	}
				2533	else if (nowtok == tok_endif \|\| (!to_endif && nowtok == tok_else))
				2534	{
				2535	lr_ignore_rest (ldfile, 1);
				2536	return nowtok;
				2537	}
				2538	else if (!to_endif && (nowtok == tok_elifdef \|\| nowtok == tok_elifndef))
				2539	{
				2540	/* Do not read the rest of the line. */
				2541	return nowtok;
				2542	}
				2543	else if (nowtok == tok_else)
				2544	{
				2545	lr_error (ldfile, _("%s: more than one 'else'"), "LC_COLLATE");
				2546	}
				2547
				2548	lr_ignore_rest (ldfile, 0);
				2549	}
				2550	}
				2551
				2552
				2553	void
				2554	collate_read (struct linereader ldfile, struct localedef_t result,
				2555	const struct charmap_t charmap, const char repertoire_name,
				2556	int ignore_content)
				2557	{
				2558	struct repertoire_t *repertoire = NULL;
				2559	struct locale_collate_t *collate;
				2560	struct token *now;
				2561	struct token *arg = NULL;
				2562	enum token_t nowtok;
				2563	enum token_t was_ellipsis = tok_none;
				2564	struct localedef_t *copy_locale = NULL;
				2565	/* Parsing state:
				2566	0 - start
				2567	1 - between `order-start' and `order-end'
				2568	2 - after `order-end'
				2569	3 - after `reorder-after', waiting for `reorder-end'
				2570	4 - after `reorder-end'
				2571	5 - after `reorder-sections-after', waiting for `reorder-sections-end'
				2572	6 - after `reorder-sections-end'
				2573	*/
				2574	int state = 0;
				2575
				2576	/* Get the repertoire we have to use. */
				2577	if (repertoire_name != NULL)
				2578	repertoire = repertoire_read (repertoire_name);
				2579
				2580	/* The rest of the line containing `LC_COLLATE' must be free. */
				2581	lr_ignore_rest (ldfile, 1);
				2582
				2583	while (1)
				2584	{
				2585	do
				2586	{
				2587	now = lr_token (ldfile, charmap, result, NULL, verbose);
				2588	nowtok = now->tok;
				2589	}
				2590	while (nowtok == tok_eol);
				2591
				2592	if (nowtok != tok_define)
				2593	break;
				2594
				2595	if (ignore_content)
				2596	lr_ignore_rest (ldfile, 0);
				2597	else
				2598	{
				2599	arg = lr_token (ldfile, charmap, result, NULL, verbose);
				2600	if (arg->tok != tok_ident)
				2601	SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
				2602	else
				2603	{
				2604	/* Simply add the new symbol. */
				2605	struct name_list newsym = xmalloc (sizeof (newsym)
				2606	+ arg->val.str.lenmb + 1);
				2607	memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
				2608	newsym->str[arg->val.str.lenmb] = '\0';
				2609	newsym->next = defined;
				2610	defined = newsym;
				2611
				2612	lr_ignore_rest (ldfile, 1);
				2613	}
				2614	}
				2615	}
				2616
				2617	if (nowtok == tok_copy)
				2618	{
				2619	now = lr_token (ldfile, charmap, result, NULL, verbose);
				2620	if (now->tok != tok_string)
				2621	{
				2622	SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
				2623
				2624	skip_category:
				2625	do
				2626	now = lr_token (ldfile, charmap, result, NULL, verbose);
				2627	while (now->tok != tok_eof && now->tok != tok_end);
				2628
				2629	if (now->tok != tok_eof
				2630	\|\| (now = lr_token (ldfile, charmap, result, NULL, verbose),
				2631	now->tok == tok_eof))
				2632	lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
				2633	else if (now->tok != tok_lc_collate)
				2634	{
				2635	lr_error (ldfile, _("\
				2636	%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
				2637	lr_ignore_rest (ldfile, 0);
				2638	}
				2639	else
				2640	lr_ignore_rest (ldfile, 1);
				2641
				2642	return;
				2643	}
				2644
				2645	if (! ignore_content)
				2646	{
				2647	/* Get the locale definition. */
				2648	copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
				2649	repertoire_name, charmap, NULL);
				2650	if ((copy_locale->avail & COLLATE_LOCALE) == 0)
				2651	{
				2652	/* Not yet loaded. So do it now. */
				2653	if (locfile_read (copy_locale, charmap) != 0)
				2654	goto skip_category;
				2655	}
				2656
				2657	if (copy_locale->categories[LC_COLLATE].collate == NULL)
				2658	return;
				2659	}
				2660
				2661	lr_ignore_rest (ldfile, 1);
				2662
				2663	now = lr_token (ldfile, charmap, result, NULL, verbose);
				2664	nowtok = now->tok;
				2665	}
				2666
				2667	/* Prepare the data structures. */
				2668	collate_startup (ldfile, result, copy_locale, ignore_content);
				2669	collate = result->categories[LC_COLLATE].collate;
				2670
				2671	while (1)
				2672	{
				2673	char ucs4buf[10];
				2674	char *symstr;
				2675	size_t symlen;
				2676
				2677	/* Of course we don't proceed beyond the end of file. */
				2678	if (nowtok == tok_eof)
				2679	break;
				2680
				2681	/* Ingore empty lines. */
				2682	if (nowtok == tok_eol)
				2683	{
				2684	now = lr_token (ldfile, charmap, result, NULL, verbose);
				2685	nowtok = now->tok;
				2686	continue;
				2687	}
				2688
				2689	switch (nowtok)
				2690	{
				2691	case tok_copy:
				2692	/* Allow copying other locales. */
				2693	now = lr_token (ldfile, charmap, result, NULL, verbose);
				2694	if (now->tok != tok_string)
				2695	goto err_label;
				2696
				2697	if (! ignore_content)
				2698	load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
				2699	charmap, result);
				2700
				2701	lr_ignore_rest (ldfile, 1);
				2702	break;
				2703
				2704	case tok_coll_weight_max:
				2705	/* Ignore the rest of the line if we don't need the input of
				2706	this line. */
				2707	if (ignore_content)
				2708	{
				2709	lr_ignore_rest (ldfile, 0);
				2710	break;
				2711	}
				2712
				2713	if (state != 0)
				2714	goto err_label;
				2715
				2716	arg = lr_token (ldfile, charmap, result, NULL, verbose);
				2717	if (arg->tok != tok_number)
				2718	goto err_label;
				2719	if (collate->col_weight_max != -1)
				2720	lr_error (ldfile, _("%s: duplicate definition of `%s'"),
				2721	"LC_COLLATE", "col_weight_max");
				2722	else
				2723	collate->col_weight_max = arg->val.num;
				2724	lr_ignore_rest (ldfile, 1);
				2725	break;
				2726
				2727	case tok_section_symbol:
				2728	/* Ignore the rest of the line if we don't need the input of
				2729	this line. */
				2730	if (ignore_content)
				2731	{
				2732	lr_ignore_rest (ldfile, 0);
				2733	break;
				2734	}
				2735
				2736	if (state != 0)
				2737	goto err_label;
				2738
				2739	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				2740	if (arg->tok != tok_bsymbol)
				2741	goto err_label;
				2742	else if (!ignore_content)
				2743	{
				2744	/* Check whether this section is already known. */
				2745	struct section_list *known = collate->sections;
				2746	while (known != NULL)
				2747	{
				2748	if (strcmp (known->name, arg->val.str.startmb) == 0)
				2749	break;
				2750	known = known->next;
				2751	}
				2752
				2753	if (known != NULL)
				2754	{
				2755	lr_error (ldfile,
				2756	_("%s: duplicate declaration of section `%s'"),
				2757	"LC_COLLATE", arg->val.str.startmb);
				2758	free (arg->val.str.startmb);
				2759	}
				2760	else
				2761	collate->sections = make_seclist_elem (collate,
				2762	arg->val.str.startmb,
				2763	collate->sections);
				2764
				2765	lr_ignore_rest (ldfile, known == NULL);
				2766	}
				2767	else
				2768	{
				2769	free (arg->val.str.startmb);
				2770	lr_ignore_rest (ldfile, 0);
				2771	}
				2772	break;
				2773
				2774	case tok_collating_element:
				2775	/* Ignore the rest of the line if we don't need the input of
				2776	this line. */
				2777	if (ignore_content)
				2778	{
				2779	lr_ignore_rest (ldfile, 0);
				2780	break;
				2781	}
				2782
				2783	if (state != 0 && state != 2)
				2784	goto err_label;
				2785
				2786	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				2787	if (arg->tok != tok_bsymbol)
				2788	goto err_label;
				2789	else
				2790	{
				2791	const char *symbol = arg->val.str.startmb;
				2792	size_t symbol_len = arg->val.str.lenmb;
				2793
				2794	/* Next the `from' keyword. */
				2795	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				2796	if (arg->tok != tok_from)
				2797	{
				2798	free ((char *) symbol);
				2799	goto err_label;
				2800	}
				2801
				2802	ldfile->return_widestr = 1;
				2803	ldfile->translate_strings = 1;
				2804
				2805	/* Finally the string with the replacement. */
				2806	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				2807
				2808	ldfile->return_widestr = 0;
				2809	ldfile->translate_strings = 0;
				2810
				2811	if (arg->tok != tok_string)
				2812	goto err_label;
				2813
				2814	if (!ignore_content && symbol != NULL)
				2815	{
				2816	/* The name is already defined. */
				2817	if (check_duplicate (ldfile, collate, charmap,
				2818	repertoire, symbol, symbol_len))
				2819	goto col_elem_free;
				2820
				2821	if (arg->val.str.startmb != NULL)
				2822	insert_entry (&collate->elem_table, symbol, symbol_len,
				2823	new_element (collate,
				2824	arg->val.str.startmb,
				2825	arg->val.str.lenmb - 1,
				2826	arg->val.str.startwc,
				2827	symbol, symbol_len, 0));
				2828	}
				2829	else
				2830	{
				2831	col_elem_free:
				2832	free ((char *) symbol);
				2833	free (arg->val.str.startmb);
				2834	free (arg->val.str.startwc);
				2835	}
				2836	lr_ignore_rest (ldfile, 1);
				2837	}
				2838	break;
				2839
				2840	case tok_collating_symbol:
				2841	/* Ignore the rest of the line if we don't need the input of
				2842	this line. */
				2843	if (ignore_content)
				2844	{
				2845	lr_ignore_rest (ldfile, 0);
				2846	break;
				2847	}
				2848
				2849	if (state != 0 && state != 2)
				2850	goto err_label;
				2851
				2852	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				2853	if (arg->tok != tok_bsymbol)
				2854	goto err_label;
				2855	else
				2856	{
				2857	char *symbol = arg->val.str.startmb;
				2858	size_t symbol_len = arg->val.str.lenmb;
				2859	char *endsymbol = NULL;
				2860	size_t endsymbol_len = 0;
				2861	enum token_t ellipsis = tok_none;
				2862
				2863	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				2864	if (arg->tok == tok_ellipsis2 \|\| arg->tok == tok_ellipsis4)
				2865	{
				2866	ellipsis = arg->tok;
				2867
				2868	arg = lr_token (ldfile, charmap, result, repertoire,
				2869	verbose);
				2870	if (arg->tok != tok_bsymbol)
				2871	{
				2872	free (symbol);
				2873	goto err_label;
				2874	}
				2875
				2876	endsymbol = arg->val.str.startmb;
				2877	endsymbol_len = arg->val.str.lenmb;
				2878
				2879	lr_ignore_rest (ldfile, 1);
				2880	}
				2881	else if (arg->tok != tok_eol)
				2882	{
				2883	free (symbol);
				2884	goto err_label;
				2885	}
				2886
				2887	if (!ignore_content)
				2888	{
				2889	if (symbol == NULL
				2890	\|\| (ellipsis != tok_none && endsymbol == NULL))
				2891	{
				2892	lr_error (ldfile, _("\
				2893	%s: unknown character in collating symbol name"),
				2894	"LC_COLLATE");
				2895	goto col_sym_free;
				2896	}
				2897	else if (ellipsis == tok_none)
				2898	{
				2899	/* A single symbol, no ellipsis. */
				2900	if (check_duplicate (ldfile, collate, charmap,
				2901	repertoire, symbol, symbol_len))
				2902	/* The name is already defined. */
				2903	goto col_sym_free;
				2904
				2905	insert_entry (&collate->sym_table, symbol, symbol_len,
				2906	new_symbol (collate, symbol, symbol_len));
				2907	}
				2908	else if (symbol_len != endsymbol_len)
				2909	{
				2910	col_sym_inv_range:
				2911	lr_error (ldfile,
				2912	_("invalid names for character range"));
				2913	goto col_sym_free;
				2914	}
				2915	else
				2916	{
				2917	/* Oh my, we have to handle an ellipsis. First, as
				2918	usual, determine the common prefix and then
				2919	convert the rest into a range. */
				2920	size_t prefixlen;
				2921	unsigned long int from;
				2922	unsigned long int to;
				2923	char *endp;
				2924
				2925	for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
				2926	if (symbol[prefixlen] != endsymbol[prefixlen])
				2927	break;
				2928
				2929	/* Convert the rest into numbers. */
				2930	symbol[symbol_len] = '\0';
				2931	from = strtoul (&symbol[prefixlen], &endp,
				2932	ellipsis == tok_ellipsis2 ? 16 : 10);
				2933	if (*endp != '\0')
				2934	goto col_sym_inv_range;
				2935
				2936	endsymbol[symbol_len] = '\0';
				2937	to = strtoul (&endsymbol[prefixlen], &endp,
				2938	ellipsis == tok_ellipsis2 ? 16 : 10);
				2939	if (*endp != '\0')
				2940	goto col_sym_inv_range;
				2941
				2942	if (from > to)
				2943	goto col_sym_inv_range;
				2944
				2945	/* Now loop over all entries. */
				2946	while (from <= to)
				2947	{
				2948	char *symbuf;
				2949
				2950	symbuf = (char *) obstack_alloc (&collate->mempool,
				2951	symbol_len + 1);
				2952
				2953	/* Create the name. */
				2954	sprintf (symbuf,
				2955	ellipsis == tok_ellipsis2
				2956	? "%.s%.lX" : "%.s%.lu",
				2957	(int) prefixlen, symbol,
				2958	(int) (symbol_len - prefixlen), from);
				2959
				2960	if (check_duplicate (ldfile, collate, charmap,
				2961	repertoire, symbuf, symbol_len))
				2962	/* The name is already defined. */
				2963	goto col_sym_free;
				2964
				2965	insert_entry (&collate->sym_table, symbuf,
				2966	symbol_len,
				2967	new_symbol (collate, symbuf,
				2968	symbol_len));
				2969
				2970	/* Increment the counter. */
				2971	++from;
				2972	}
				2973
				2974	goto col_sym_free;
				2975	}
				2976	}
				2977	else
				2978	{
				2979	col_sym_free:
				2980	free (symbol);
				2981	free (endsymbol);
				2982	}
				2983	}
				2984	break;
				2985
				2986	case tok_symbol_equivalence:
				2987	/* Ignore the rest of the line if we don't need the input of
				2988	this line. */
				2989	if (ignore_content)
				2990	{
				2991	lr_ignore_rest (ldfile, 0);
				2992	break;
				2993	}
				2994
				2995	if (state != 0)
				2996	goto err_label;
				2997
				2998	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				2999	if (arg->tok != tok_bsymbol)
				3000	goto err_label;
				3001	else
				3002	{
				3003	const char *newname = arg->val.str.startmb;
				3004	size_t newname_len = arg->val.str.lenmb;
				3005	const char *symname;
				3006	size_t symname_len;
				3007	void symval; / Actually struct symbol_t* */
				3008
				3009	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				3010	if (arg->tok != tok_bsymbol)
				3011	{
				3012	free ((char *) newname);
				3013	goto err_label;
				3014	}
				3015
				3016	symname = arg->val.str.startmb;
				3017	symname_len = arg->val.str.lenmb;
				3018
				3019	if (newname == NULL)
				3020	{
				3021	lr_error (ldfile, _("\
				3022	%s: unknown character in equivalent definition name"),
				3023	"LC_COLLATE");
				3024
				3025	sym_equiv_free:
				3026	free ((char *) newname);
				3027	free ((char *) symname);
				3028	break;
				3029	}
				3030	if (symname == NULL)
				3031	{
				3032	lr_error (ldfile, _("\
				3033	%s: unknown character in equivalent definition value"),
				3034	"LC_COLLATE");
				3035	goto sym_equiv_free;
				3036	}
				3037
				3038	/* See whether the symbol name is already defined. */
				3039	if (find_entry (&collate->sym_table, symname, symname_len,
				3040	&symval) != 0)
				3041	{
				3042	lr_error (ldfile, _("\
				3043	%s: unknown symbol `%s' in equivalent definition"),
				3044	"LC_COLLATE", symname);
				3045	goto sym_equiv_free;
				3046	}
				3047
				3048	if (insert_entry (&collate->sym_table,
				3049	newname, newname_len, symval) < 0)
				3050	{
				3051	lr_error (ldfile, _("\
				3052	error while adding equivalent collating symbol"));
				3053	goto sym_equiv_free;
				3054	}
				3055
				3056	free ((char *) symname);
				3057	}
				3058	lr_ignore_rest (ldfile, 1);
				3059	break;
				3060
				3061	case tok_script:
				3062	/* Ignore the rest of the line if we don't need the input of
				3063	this line. */
				3064	if (ignore_content)
				3065	{
				3066	lr_ignore_rest (ldfile, 0);
				3067	break;
				3068	}
				3069
				3070	/* We get told about the scripts we know. */
				3071	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				3072	if (arg->tok != tok_bsymbol)
				3073	goto err_label;
				3074	else
				3075	{
				3076	struct section_list *runp = collate->known_sections;
				3077	char *name;
				3078
				3079	while (runp != NULL)
				3080	if (strncmp (runp->name, arg->val.str.startmb,
				3081	arg->val.str.lenmb) == 0
				3082	&& runp->name[arg->val.str.lenmb] == '\0')
				3083	break;
				3084	else
				3085	runp = runp->def_next;
				3086
				3087	if (runp != NULL)
				3088	{
				3089	lr_error (ldfile, _("duplicate definition of script `%s'"),
				3090	runp->name);
				3091	lr_ignore_rest (ldfile, 0);
				3092	break;
				3093	}
				3094
				3095	runp = (struct section_list ) xcalloc (1, sizeof (runp));
				3096	name = (char *) xmalloc (arg->val.str.lenmb + 1);
				3097	memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
				3098	name[arg->val.str.lenmb] = '\0';
				3099	runp->name = name;
				3100
				3101	runp->def_next = collate->known_sections;
				3102	collate->known_sections = runp;
				3103	}
				3104	lr_ignore_rest (ldfile, 1);
				3105	break;
				3106
				3107	case tok_order_start:
				3108	/* Ignore the rest of the line if we don't need the input of
				3109	this line. */
				3110	if (ignore_content)
				3111	{
				3112	lr_ignore_rest (ldfile, 0);
				3113	break;
				3114	}
				3115
				3116	if (state != 0 && state != 1 && state != 2)
				3117	goto err_label;
				3118	state = 1;
				3119
				3120	/* The 14652 draft does not specify whether all `order_start' lines
				3121	must contain the same number of sort-rules, but 14651 does. So
				3122	we require this here as well. */
				3123	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				3124	if (arg->tok == tok_bsymbol)
				3125	{
				3126	/* This better should be a section name. */
				3127	struct section_list *sp = collate->known_sections;
				3128	while (sp != NULL
				3129	&& (sp->name == NULL
				3130	\|\| strncmp (sp->name, arg->val.str.startmb,
				3131	arg->val.str.lenmb) != 0
				3132	\|\| sp->name[arg->val.str.lenmb] != '\0'))
				3133	sp = sp->def_next;
				3134
				3135	if (sp == NULL)
				3136	{
				3137	lr_error (ldfile, _("\
				3138	%s: unknown section name `%.*s'"),
				3139	"LC_COLLATE", (int) arg->val.str.lenmb,
				3140	arg->val.str.startmb);
				3141	/* We use the error section. */
				3142	collate->current_section = &collate->error_section;
				3143
				3144	if (collate->error_section.first == NULL)
				3145	{
				3146	/* Insert &collate->error_section at the end of
				3147	the collate->sections list. */
				3148	if (collate->sections == NULL)
				3149	collate->sections = &collate->error_section;
				3150	else
				3151	{
				3152	sp = collate->sections;
				3153	while (sp->next != NULL)
				3154	sp = sp->next;
				3155
				3156	sp->next = &collate->error_section;
				3157	}
				3158	collate->error_section.next = NULL;
				3159	}
				3160	}
				3161	else
				3162	{
				3163	/* One should not be allowed to open the same
				3164	section twice. */
				3165	if (sp->first != NULL)
				3166	lr_error (ldfile, _("\
				3167	%s: multiple order definitions for section `%s'"),
				3168	"LC_COLLATE", sp->name);
				3169	else
				3170	{
				3171	/* Insert sp in the collate->sections list,
				3172	right after collate->current_section. */
				3173	if (collate->current_section != NULL)
				3174	{
				3175	sp->next = collate->current_section->next;
				3176	collate->current_section->next = sp;
				3177	}
				3178	else if (collate->sections == NULL)
				3179	/* This is the first section to be defined. */
				3180	collate->sections = sp;
				3181
				3182	collate->current_section = sp;
				3183	}
				3184
				3185	/* Next should come the end of the line or a semicolon. */
				3186	arg = lr_token (ldfile, charmap, result, repertoire,
				3187	verbose);
				3188	if (arg->tok == tok_eol)
				3189	{
				3190	uint32_t cnt;
				3191
				3192	/* This means we have exactly one rule: `forward'. */
				3193	if (nrules > 1)
				3194	lr_error (ldfile, _("\
				3195	%s: invalid number of sorting rules"),
				3196	"LC_COLLATE");
				3197	else
				3198	nrules = 1;
				3199	sp->rules = obstack_alloc (&collate->mempool,
				3200	(sizeof (enum coll_sort_rule)
				3201	* nrules));
				3202	for (cnt = 0; cnt < nrules; ++cnt)
				3203	sp->rules[cnt] = sort_forward;
				3204
				3205	/* Next line. */
				3206	break;
				3207	}
				3208
				3209	/* Get the next token. */
				3210	arg = lr_token (ldfile, charmap, result, repertoire,
				3211	verbose);
				3212	}
				3213	}
				3214	else
				3215	{
				3216	/* There is no section symbol. Therefore we use the unnamed
				3217	section. */
				3218	collate->current_section = &collate->unnamed_section;
				3219
				3220	if (collate->unnamed_section_defined)
				3221	lr_error (ldfile, _("\
				3222	%s: multiple order definitions for unnamed section"),
				3223	"LC_COLLATE");
				3224	else
				3225	{
				3226	/* Insert &collate->unnamed_section at the beginning of
				3227	the collate->sections list. */
				3228	collate->unnamed_section.next = collate->sections;
				3229	collate->sections = &collate->unnamed_section;
				3230	collate->unnamed_section_defined = true;
				3231	}
				3232	}
				3233
				3234	/* Now read the direction names. */
				3235	read_directions (ldfile, arg, charmap, repertoire, result);
				3236
				3237	/* From now we need the strings untranslated. */
				3238	ldfile->translate_strings = 0;
				3239	break;
				3240
				3241	case tok_order_end:
				3242	/* Ignore the rest of the line if we don't need the input of
				3243	this line. */
				3244	if (ignore_content)
				3245	{
				3246	lr_ignore_rest (ldfile, 0);
				3247	break;
				3248	}
				3249
				3250	if (state != 1)
				3251	goto err_label;
				3252
				3253	/* Handle ellipsis at end of list. */
				3254	if (was_ellipsis != tok_none)
				3255	{
				3256	handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
				3257	repertoire, result);
				3258	was_ellipsis = tok_none;
				3259	}
				3260
				3261	state = 2;
				3262	lr_ignore_rest (ldfile, 1);
				3263	break;
				3264
				3265	case tok_reorder_after:
				3266	/* Ignore the rest of the line if we don't need the input of
				3267	this line. */
				3268	if (ignore_content)
				3269	{
				3270	lr_ignore_rest (ldfile, 0);
				3271	break;
				3272	}
				3273
				3274	if (state == 1)
				3275	{
				3276	lr_error (ldfile, _("%s: missing `order_end' keyword"),
				3277	"LC_COLLATE");
				3278	state = 2;
				3279
				3280	/* Handle ellipsis at end of list. */
				3281	if (was_ellipsis != tok_none)
				3282	{
				3283	handle_ellipsis (ldfile, arg->val.str.startmb,
				3284	arg->val.str.lenmb, was_ellipsis, charmap,
				3285	repertoire, result);
				3286	was_ellipsis = tok_none;
				3287	}
				3288	}
				3289	else if (state == 0 && copy_locale == NULL)
				3290	goto err_label;
				3291	else if (state != 0 && state != 2 && state != 3)
				3292	goto err_label;
				3293	state = 3;
				3294
				3295	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				3296	if (arg->tok == tok_bsymbol \|\| arg->tok == tok_ucs4)
				3297	{
				3298	/* Find this symbol in the sequence table. */
				3299	char ucsbuf[10];
				3300	char *startmb;
				3301	size_t lenmb;
				3302	struct element_t *insp;
				3303	int no_error = 1;
				3304	void *ptr;
				3305
				3306	if (arg->tok == tok_bsymbol)
				3307	{
				3308	startmb = arg->val.str.startmb;
				3309	lenmb = arg->val.str.lenmb;
				3310	}
				3311	else
				3312	{
				3313	sprintf (ucsbuf, "U%08X", arg->val.ucs4);
				3314	startmb = ucsbuf;
				3315	lenmb = 9;
				3316	}
				3317
				3318	if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0)
				3319	/* Yes, the symbol exists. Simply point the cursor
				3320	to it. */
				3321	collate->cursor = (struct element_t *) ptr;
				3322	else
				3323	{
				3324	struct symbol_t *symbp;
				3325	void *ptr;
				3326
				3327	if (find_entry (&collate->sym_table, startmb, lenmb,
				3328	&ptr) == 0)
				3329	{
				3330	symbp = ptr;
				3331
				3332	if (symbp->order->last != NULL
				3333	\|\| symbp->order->next != NULL)
				3334	collate->cursor = symbp->order;
				3335	else
				3336	{
				3337	/* This is a collating symbol but its position
				3338	is not yet defined. */
				3339	lr_error (ldfile, _("\
				3340	%s: order for collating symbol %.*s not yet defined"),
				3341	"LC_COLLATE", (int) lenmb, startmb);
				3342	collate->cursor = NULL;
				3343	no_error = 0;
				3344	}
				3345	}
				3346	else if (find_entry (&collate->elem_table, startmb, lenmb,
				3347	&ptr) == 0)
				3348	{
				3349	insp = (struct element_t *) ptr;
				3350
				3351	if (insp->last != NULL \|\| insp->next != NULL)
				3352	collate->cursor = insp;
				3353	else
				3354	{
				3355	/* This is a collating element but its position
				3356	is not yet defined. */
				3357	lr_error (ldfile, _("\
				3358	%s: order for collating element %.*s not yet defined"),
				3359	"LC_COLLATE", (int) lenmb, startmb);
				3360	collate->cursor = NULL;
				3361	no_error = 0;
				3362	}
				3363	}
				3364	else
				3365	{
				3366	/* This is bad. The symbol after which we have to
				3367	insert does not exist. */
				3368	lr_error (ldfile, _("\
				3369	%s: cannot reorder after %.*s: symbol not known"),
				3370	"LC_COLLATE", (int) lenmb, startmb);
				3371	collate->cursor = NULL;
				3372	no_error = 0;
				3373	}
				3374	}
				3375
				3376	lr_ignore_rest (ldfile, no_error);
				3377	}
				3378	else
				3379	/* This must not happen. */
				3380	goto err_label;
				3381	break;
				3382
				3383	case tok_reorder_end:
				3384	/* Ignore the rest of the line if we don't need the input of
				3385	this line. */
				3386	if (ignore_content)
				3387	break;
				3388
				3389	if (state != 3)
				3390	goto err_label;
				3391	state = 4;
				3392	lr_ignore_rest (ldfile, 1);
				3393	break;
				3394
				3395	case tok_reorder_sections_after:
				3396	/* Ignore the rest of the line if we don't need the input of
				3397	this line. */
				3398	if (ignore_content)
				3399	{
				3400	lr_ignore_rest (ldfile, 0);
				3401	break;
				3402	}
				3403
				3404	if (state == 1)
				3405	{
				3406	lr_error (ldfile, _("%s: missing `order_end' keyword"),
				3407	"LC_COLLATE");
				3408	state = 2;
				3409
				3410	/* Handle ellipsis at end of list. */
				3411	if (was_ellipsis != tok_none)
				3412	{
				3413	handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
				3414	repertoire, result);
				3415	was_ellipsis = tok_none;
				3416	}
				3417	}
				3418	else if (state == 3)
				3419	{
				3420	WITH_CUR_LOCALE (error (0, 0, _("\
				3421	%s: missing `reorder-end' keyword"), "LC_COLLATE"));
				3422	state = 4;
				3423	}
				3424	else if (state != 2 && state != 4)
				3425	goto err_label;
				3426	state = 5;
				3427
				3428	/* Get the name of the sections we are adding after. */
				3429	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				3430	if (arg->tok == tok_bsymbol)
				3431	{
				3432	/* Now find a section with this name. */
				3433	struct section_list *runp = collate->sections;
				3434
				3435	while (runp != NULL)
				3436	{
				3437	if (runp->name != NULL
				3438	&& strlen (runp->name) == arg->val.str.lenmb
				3439	&& memcmp (runp->name, arg->val.str.startmb,
				3440	arg->val.str.lenmb) == 0)
				3441	break;
				3442
				3443	runp = runp->next;
				3444	}
				3445
				3446	if (runp != NULL)
				3447	collate->current_section = runp;
				3448	else
				3449	{
				3450	/* This is bad. The section after which we have to
				3451	reorder does not exist. Therefore we cannot
				3452	process the whole rest of this reorder
				3453	specification. */
				3454	lr_error (ldfile, _("%s: section `%.*s' not known"),
				3455	"LC_COLLATE", (int) arg->val.str.lenmb,
				3456	arg->val.str.startmb);
				3457
				3458	do
				3459	{
				3460	lr_ignore_rest (ldfile, 0);
				3461
				3462	now = lr_token (ldfile, charmap, result, NULL, verbose);
				3463	}
				3464	while (now->tok == tok_reorder_sections_after
				3465	\|\| now->tok == tok_reorder_sections_end
				3466	\|\| now->tok == tok_end);
				3467
				3468	/* Process the token we just saw. */
				3469	nowtok = now->tok;
				3470	continue;
				3471	}
				3472	}
				3473	else
				3474	/* This must not happen. */
				3475	goto err_label;
				3476	break;
				3477
				3478	case tok_reorder_sections_end:
				3479	/* Ignore the rest of the line if we don't need the input of
				3480	this line. */
				3481	if (ignore_content)
				3482	break;
				3483
				3484	if (state != 5)
				3485	goto err_label;
				3486	state = 6;
				3487	lr_ignore_rest (ldfile, 1);
				3488	break;
				3489
				3490	case tok_bsymbol:
				3491	case tok_ucs4:
				3492	/* Ignore the rest of the line if we don't need the input of
				3493	this line. */
				3494	if (ignore_content)
				3495	{
				3496	lr_ignore_rest (ldfile, 0);
				3497	break;
				3498	}
				3499
				3500	if (state != 0 && state != 1 && state != 3 && state != 5)
				3501	goto err_label;
				3502
				3503	if ((state == 0 \|\| state == 5) && nowtok == tok_ucs4)
				3504	goto err_label;
				3505
				3506	if (nowtok == tok_ucs4)
				3507	{
				3508	snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
				3509	symstr = ucs4buf;
				3510	symlen = 9;
				3511	}
				3512	else if (arg != NULL)
				3513	{
				3514	symstr = arg->val.str.startmb;
				3515	symlen = arg->val.str.lenmb;
				3516	}
				3517	else
				3518	{
				3519	lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
				3520	(int) ldfile->token.val.str.lenmb,
				3521	ldfile->token.val.str.startmb);
				3522	break;
				3523	}
				3524
				3525	struct element_t *seqp;
				3526	if (state == 0)
				3527	{
				3528	/* We are outside an `order_start' region. This means
				3529	we must only accept definitions of values for
				3530	collation symbols since these are purely abstract
				3531	values and don't need directions associated. */
				3532	void *ptr;
				3533
				3534	if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
				3535	{
				3536	seqp = ptr;
				3537
				3538	/* It's already defined. First check whether this
				3539	is really a collating symbol. */
				3540	if (seqp->is_character)
				3541	goto err_label;
				3542
				3543	goto move_entry;
				3544	}
				3545	else
				3546	{
				3547	void *result;
				3548
				3549	if (find_entry (&collate->sym_table, symstr, symlen,
				3550	&result) != 0)
				3551	/* No collating symbol, it's an error. */
				3552	goto err_label;
				3553
				3554	/* Maybe this is the first time we define a symbol
				3555	value and it is before the first actual section. */
				3556	if (collate->sections == NULL)
				3557	collate->sections = collate->current_section =
				3558	&collate->symbol_section;
				3559	}
				3560
				3561	if (was_ellipsis != tok_none)
				3562	{
				3563	handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
				3564	charmap, repertoire, result);
				3565
				3566	/* Remember that we processed the ellipsis. */
				3567	was_ellipsis = tok_none;
				3568
				3569	/* And don't add the value a second time. */
				3570	break;
				3571	}
				3572	}
				3573	else if (state == 3)
				3574	{
				3575	/* It is possible that we already have this collation sequence.
				3576	In this case we move the entry. */
				3577	void *sym;
				3578	void *ptr;
				3579
				3580	/* If the symbol after which we have to insert was not found
				3581	ignore all entries. */
				3582	if (collate->cursor == NULL)
				3583	{
				3584	lr_ignore_rest (ldfile, 0);
				3585	break;
				3586	}
				3587
				3588	if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
				3589	{
				3590	seqp = (struct element_t *) ptr;
				3591	goto move_entry;
				3592	}
				3593
				3594	if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
				3595	&& (seqp = ((struct symbol_t *) sym)->order) != NULL)
				3596	goto move_entry;
				3597
				3598	if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0
				3599	&& (seqp = (struct element_t *) ptr,
				3600	seqp->last != NULL \|\| seqp->next != NULL
				3601	\|\| (collate->start != NULL && seqp == collate->start)))
				3602	{
				3603	move_entry:
				3604	/* Remove the entry from the old position. */
				3605	if (seqp->last == NULL)
				3606	collate->start = seqp->next;
				3607	else
				3608	seqp->last->next = seqp->next;
				3609	if (seqp->next != NULL)
				3610	seqp->next->last = seqp->last;
				3611
				3612	/* We also have to check whether this entry is the
				3613	first or last of a section. */
				3614	if (seqp->section->first == seqp)
				3615	{
				3616	if (seqp->section->first == seqp->section->last)
				3617	/* This section has no content anymore. */
				3618	seqp->section->first = seqp->section->last = NULL;
				3619	else
				3620	seqp->section->first = seqp->next;
				3621	}
				3622	else if (seqp->section->last == seqp)
				3623	seqp->section->last = seqp->last;
				3624
				3625	/* Now insert it in the new place. */
				3626	insert_weights (ldfile, seqp, charmap, repertoire, result,
				3627	tok_none);
				3628	break;
				3629	}
				3630
				3631	/* Otherwise we just add a new entry. */
				3632	}
				3633	else if (state == 5)
				3634	{
				3635	/* We are reordering sections. Find the named section. */
				3636	struct section_list *runp = collate->sections;
				3637	struct section_list *prevp = NULL;
				3638
				3639	while (runp != NULL)
				3640	{
				3641	if (runp->name != NULL
				3642	&& strlen (runp->name) == symlen
				3643	&& memcmp (runp->name, symstr, symlen) == 0)
				3644	break;
				3645
				3646	prevp = runp;
				3647	runp = runp->next;
				3648	}
				3649
				3650	if (runp == NULL)
				3651	{
				3652	lr_error (ldfile, _("%s: section `%.*s' not known"),
				3653	"LC_COLLATE", (int) symlen, symstr);
				3654	lr_ignore_rest (ldfile, 0);
				3655	}
				3656	else
				3657	{
				3658	if (runp != collate->current_section)
				3659	{
				3660	/* Remove the named section from the old place and
				3661	insert it in the new one. */
				3662	prevp->next = runp->next;
				3663
				3664	runp->next = collate->current_section->next;
				3665	collate->current_section->next = runp;
				3666	collate->current_section = runp;
				3667	}
				3668
				3669	/* Process the rest of the line which might change
				3670	the collation rules. */
				3671	arg = lr_token (ldfile, charmap, result, repertoire,
				3672	verbose);
				3673	if (arg->tok != tok_eof && arg->tok != tok_eol)
				3674	read_directions (ldfile, arg, charmap, repertoire,
				3675	result);
				3676	}
				3677	break;
				3678	}
				3679	else if (was_ellipsis != tok_none)
				3680	{
				3681	/* Using the information in the `ellipsis_weight'
				3682	element and this and the last value we have to handle
				3683	the ellipsis now. */
				3684	assert (state == 1);
				3685
				3686	handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
				3687	repertoire, result);
				3688
				3689	/* Remember that we processed the ellipsis. */
				3690	was_ellipsis = tok_none;
				3691
				3692	/* And don't add the value a second time. */
				3693	break;
				3694	}
				3695
				3696	/* Now insert in the new place. */
				3697	insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
				3698	break;
				3699
				3700	case tok_undefined:
				3701	/* Ignore the rest of the line if we don't need the input of
				3702	this line. */
				3703	if (ignore_content)
				3704	{
				3705	lr_ignore_rest (ldfile, 0);
				3706	break;
				3707	}
				3708
				3709	if (state != 1)
				3710	goto err_label;
				3711
				3712	if (was_ellipsis != tok_none)
				3713	{
				3714	lr_error (ldfile,
				3715	_("%s: cannot have `%s' as end of ellipsis range"),
				3716	"LC_COLLATE", "UNDEFINED");
				3717
				3718	unlink_element (collate);
				3719	was_ellipsis = tok_none;
				3720	}
				3721
				3722	/* See whether UNDEFINED already appeared somewhere. */
				3723	if (collate->undefined.next != NULL
				3724	\|\| &collate->undefined == collate->cursor)
				3725	{
				3726	lr_error (ldfile,
				3727	_("%s: order for `%.*s' already defined at %s:%Zu"),
				3728	"LC_COLLATE", 9, "UNDEFINED",
				3729	collate->undefined.file,
				3730	collate->undefined.line);
				3731	lr_ignore_rest (ldfile, 0);
				3732	}
				3733	else
				3734	/* Parse the weights. */
				3735	insert_weights (ldfile, &collate->undefined, charmap,
				3736	repertoire, result, tok_none);
				3737	break;
				3738
				3739	case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
				3740	case tok_ellipsis3: /* absolute ellipsis */
				3741	case tok_ellipsis4: /* symbolic decimal ellipsis */
				3742	/* This is the symbolic (decimal or hexadecimal) or absolute
				3743	ellipsis. */
				3744	if (was_ellipsis != tok_none)
				3745	goto err_label;
				3746
				3747	if (state != 0 && state != 1 && state != 3)
				3748	goto err_label;
				3749
				3750	was_ellipsis = nowtok;
				3751
				3752	insert_weights (ldfile, &collate->ellipsis_weight, charmap,
				3753	repertoire, result, nowtok);
				3754	break;
				3755
				3756	case tok_end:
				3757	seen_end:
				3758	/* Next we assume `LC_COLLATE'. */
				3759	if (!ignore_content)
				3760	{
				3761	if (state == 0 && copy_locale == NULL)
				3762	/* We must either see a copy statement or have
				3763	ordering values. */
				3764	lr_error (ldfile,
				3765	_("%s: empty category description not allowed"),
				3766	"LC_COLLATE");
				3767	else if (state == 1)
				3768	{
				3769	lr_error (ldfile, _("%s: missing `order_end' keyword"),
				3770	"LC_COLLATE");
				3771
				3772	/* Handle ellipsis at end of list. */
				3773	if (was_ellipsis != tok_none)
				3774	{
				3775	handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
				3776	repertoire, result);
				3777	was_ellipsis = tok_none;
				3778	}
				3779	}
				3780	else if (state == 3)
				3781	WITH_CUR_LOCALE (error (0, 0, _("\
				3782	%s: missing `reorder-end' keyword"), "LC_COLLATE"));
				3783	else if (state == 5)
				3784	WITH_CUR_LOCALE (error (0, 0, _("\
				3785	%s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
				3786	}
				3787	arg = lr_token (ldfile, charmap, result, NULL, verbose);
				3788	if (arg->tok == tok_eof)
				3789	break;
				3790	if (arg->tok == tok_eol)
				3791	lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
				3792	else if (arg->tok != tok_lc_collate)
				3793	lr_error (ldfile, _("\
				3794	%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
				3795	lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
				3796	return;
				3797
				3798	case tok_define:
				3799	if (ignore_content)
				3800	{
				3801	lr_ignore_rest (ldfile, 0);
				3802	break;
				3803	}
				3804
				3805	arg = lr_token (ldfile, charmap, result, NULL, verbose);
				3806	if (arg->tok != tok_ident)
				3807	goto err_label;
				3808
				3809	/* Simply add the new symbol. */
				3810	struct name_list newsym = xmalloc (sizeof (newsym)
				3811	+ arg->val.str.lenmb + 1);
				3812	memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
				3813	newsym->str[arg->val.str.lenmb] = '\0';
				3814	newsym->next = defined;
				3815	defined = newsym;
				3816
				3817	lr_ignore_rest (ldfile, 1);
				3818	break;
				3819
				3820	case tok_undef:
				3821	if (ignore_content)
				3822	{
				3823	lr_ignore_rest (ldfile, 0);
				3824	break;
				3825	}
				3826
				3827	arg = lr_token (ldfile, charmap, result, NULL, verbose);
				3828	if (arg->tok != tok_ident)
				3829	goto err_label;
				3830
				3831	/* Remove _all_ occurrences of the symbol from the list. */
				3832	struct name_list *prevdef = NULL;
				3833	struct name_list *curdef = defined;
				3834	while (curdef != NULL)
				3835	if (strncmp (arg->val.str.startmb, curdef->str,
				3836	arg->val.str.lenmb) == 0
				3837	&& curdef->str[arg->val.str.lenmb] == '\0')
				3838	{
				3839	if (prevdef == NULL)
				3840	defined = curdef->next;
				3841	else
				3842	prevdef->next = curdef->next;
				3843
				3844	struct name_list *olddef = curdef;
				3845	curdef = curdef->next;
				3846
				3847	free (olddef);
				3848	}
				3849	else
				3850	{
				3851	prevdef = curdef;
				3852	curdef = curdef->next;
				3853	}
				3854
				3855	lr_ignore_rest (ldfile, 1);
				3856	break;
				3857
				3858	case tok_ifdef:
				3859	case tok_ifndef:
				3860	if (ignore_content)
				3861	{
				3862	lr_ignore_rest (ldfile, 0);
				3863	break;
				3864	}
				3865
				3866	found_ifdef:
				3867	arg = lr_token (ldfile, charmap, result, NULL, verbose);
				3868	if (arg->tok != tok_ident)
				3869	goto err_label;
				3870	lr_ignore_rest (ldfile, 1);
				3871
				3872	if (collate->else_action == else_none)
				3873	{
				3874	curdef = defined;
				3875	while (curdef != NULL)
				3876	if (strncmp (arg->val.str.startmb, curdef->str,
				3877	arg->val.str.lenmb) == 0
				3878	&& curdef->str[arg->val.str.lenmb] == '\0')
				3879	break;
				3880	else
				3881	curdef = curdef->next;
				3882
				3883	if ((nowtok == tok_ifdef && curdef != NULL)
				3884	\|\| (nowtok == tok_ifndef && curdef == NULL))
				3885	{
				3886	/* We have to use the if-branch. */
				3887	collate->else_action = else_ignore;
				3888	}
				3889	else
				3890	{
				3891	/* We have to use the else-branch, if there is one. */
				3892	nowtok = skip_to (ldfile, collate, charmap, 0);
				3893	if (nowtok == tok_else)
				3894	collate->else_action = else_seen;
				3895	else if (nowtok == tok_elifdef)
				3896	{
				3897	nowtok = tok_ifdef;
				3898	goto found_ifdef;
				3899	}
				3900	else if (nowtok == tok_elifndef)
				3901	{
				3902	nowtok = tok_ifndef;
				3903	goto found_ifdef;
				3904	}
				3905	else if (nowtok == tok_eof)
				3906	goto seen_eof;
				3907	else if (nowtok == tok_end)
				3908	goto seen_end;
				3909	}
				3910	}
				3911	else
				3912	{
				3913	/* XXX Should it really become necessary to support nested
				3914	preprocessor handling we will push the state here. */
				3915	lr_error (ldfile, _("%s: nested conditionals not supported"),
				3916	"LC_COLLATE");
				3917	nowtok = skip_to (ldfile, collate, charmap, 1);
				3918	if (nowtok == tok_eof)
				3919	goto seen_eof;
				3920	else if (nowtok == tok_end)
				3921	goto seen_end;
				3922	}
				3923	break;
				3924
				3925	case tok_elifdef:
				3926	case tok_elifndef:
				3927	case tok_else:
				3928	if (ignore_content)
				3929	{
				3930	lr_ignore_rest (ldfile, 0);
				3931	break;
				3932	}
				3933
				3934	lr_ignore_rest (ldfile, 1);
				3935
				3936	if (collate->else_action == else_ignore)
				3937	{
				3938	/* Ignore everything until the endif. */
				3939	nowtok = skip_to (ldfile, collate, charmap, 1);
				3940	if (nowtok == tok_eof)
				3941	goto seen_eof;
				3942	else if (nowtok == tok_end)
				3943	goto seen_end;
				3944	}
				3945	else
				3946	{
				3947	assert (collate->else_action == else_none);
				3948	lr_error (ldfile, _("\
				3949	%s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
				3950	nowtok == tok_else ? "else"
				3951	: nowtok == tok_elifdef ? "elifdef" : "elifndef");
				3952	}
				3953	break;
				3954
				3955	case tok_endif:
				3956	if (ignore_content)
				3957	{
				3958	lr_ignore_rest (ldfile, 0);
				3959	break;
				3960	}
				3961
				3962	lr_ignore_rest (ldfile, 1);
				3963
				3964	if (collate->else_action != else_ignore
				3965	&& collate->else_action != else_seen)
				3966	lr_error (ldfile, _("\
				3967	%s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
				3968
				3969	/* XXX If we support nested preprocessor directives we pop
				3970	the state here. */
				3971	collate->else_action = else_none;
				3972	break;
				3973
				3974	default:
				3975	err_label:
				3976	SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
				3977	}
				3978
				3979	/* Prepare for the next round. */
				3980	now = lr_token (ldfile, charmap, result, NULL, verbose);
				3981	nowtok = now->tok;
				3982	}
				3983
				3984	seen_eof:
				3985	/* When we come here we reached the end of the file. */
				3986	lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
				3987	}