Blame - ap/libc/glibc/glibc-2.23/locale/programs/ld-collate.c - T106_DC

blob: 1e125f60fdd2a6383057ed2a41b62602860408a8 [file] [log] [blame]

xf.li	bdd93d5	2023-05-12 07:10:14 -0700	[diff] [blame^]	1	/* Copyright (C) 1995-2016 Free Software Foundation, Inc.
				2	This file is part of the GNU C Library.
				3	Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
				4
				5	This program is free software; you can redistribute it and/or modify
				6	it under the terms of the GNU General Public License as published
				7	by the Free Software Foundation; version 2 of the License, or
				8	(at your option) any later version.
				9
				10	This program is distributed in the hope that it will be useful,
				11	but WITHOUT ANY WARRANTY; without even the implied warranty of
				12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				13	GNU General Public License for more details.
				14
				15	You should have received a copy of the GNU General Public License
				16	along with this program; if not, see <http://www.gnu.org/licenses/>. */
				17
				18	#ifdef HAVE_CONFIG_H
				19	# include <config.h>
				20	#endif
				21
				22	#include <errno.h>
				23	#include <error.h>
				24	#include <stdlib.h>
				25	#include <wchar.h>
				26	#include <stdint.h>
				27	#include <sys/param.h>
				28
				29	#include "localedef.h"
				30	#include "charmap.h"
				31	#include "localeinfo.h"
				32	#include "linereader.h"
				33	#include "locfile.h"
				34	#include "elem-hash.h"
				35
				36	/* Uncomment the following line in the production version. */
				37	/* #define NDEBUG 1 */
				38	#include <assert.h>
				39
				40	#define obstack_chunk_alloc malloc
				41	#define obstack_chunk_free free
				42
				43	static inline void
				44	__attribute ((always_inline))
				45	obstack_int32_grow (struct obstack *obstack, int32_t data)
				46	{
				47	assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
				48	data = maybe_swap_uint32 (data);
				49	if (sizeof (int32_t) == sizeof (int))
				50	obstack_int_grow (obstack, data);
				51	else
				52	obstack_grow (obstack, &data, sizeof (int32_t));
				53	}
				54
				55	static inline void
				56	__attribute ((always_inline))
				57	obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
				58	{
				59	assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
				60	data = maybe_swap_uint32 (data);
				61	if (sizeof (int32_t) == sizeof (int))
				62	obstack_int_grow_fast (obstack, data);
				63	else
				64	obstack_grow (obstack, &data, sizeof (int32_t));
				65	}
				66
				67	/* Forward declaration. */
				68	struct element_t;
				69
				70	/* Data type for list of strings. */
				71	struct section_list
				72	{
				73	/* Successor in the known_sections list. */
				74	struct section_list *def_next;
				75	/* Successor in the sections list. */
				76	struct section_list *next;
				77	/* Name of the section. */
				78	const char *name;
				79	/* First element of this section. */
				80	struct element_t *first;
				81	/* Last element of this section. */
				82	struct element_t *last;
				83	/* These are the rules for this section. */
				84	enum coll_sort_rule *rules;
				85	/* Index of the rule set in the appropriate section of the output file. */
				86	int ruleidx;
				87	};
				88
				89	struct element_t;
				90
				91	struct element_list_t
				92	{
				93	/* Number of elements. */
				94	int cnt;
				95
				96	struct element_t **w;
				97	};
				98
				99	/* Data type for collating element. */
				100	struct element_t
				101	{
				102	const char *name;
				103
				104	const char *mbs;
				105	size_t nmbs;
				106	const uint32_t *wcs;
				107	size_t nwcs;
				108	int *mborder;
				109	int wcorder;
				110
				111	/* The following is a bit mask which bits are set if this element is
				112	used in the appropriate level. Interesting for the singlebyte
				113	weight computation.
				114
				115	XXX The type here restricts the number of levels to 32. It could
				116	be changed if necessary but I doubt this is necessary. */
				117	unsigned int used_in_level;
				118
				119	struct element_list_t *weights;
				120
				121	/* Nonzero if this is a real character definition. */
				122	int is_character;
				123
				124	/* Order of the character in the sequence. This information will
				125	be used in range expressions. */
				126	int mbseqorder;
				127	int wcseqorder;
				128
				129	/* Where does the definition come from. */
				130	const char *file;
				131	size_t line;
				132
				133	/* Which section does this belong to. */
				134	struct section_list *section;
				135
				136	/* Predecessor and successor in the order list. */
				137	struct element_t *last;
				138	struct element_t *next;
				139
				140	/* Next element in multibyte output list. */
				141	struct element_t *mbnext;
				142	struct element_t *mblast;
				143
				144	/* Next element in wide character output list. */
				145	struct element_t *wcnext;
				146	struct element_t *wclast;
				147	};
				148
				149	/* Special element value. */
				150	#define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
				151	#define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
				152	#define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
				153
				154	/* Data type for collating symbol. */
				155	struct symbol_t
				156	{
				157	const char *name;
				158
				159	/* Point to place in the order list. */
				160	struct element_t *order;
				161
				162	/* Where does the definition come from. */
				163	const char *file;
				164	size_t line;
				165	};
				166
				167	/* Sparse table of struct element_t . /
				168	#define TABLE wchead_table
				169	#define ELEMENT struct element_t *
				170	#define DEFAULT NULL
				171	#define ITERATE
				172	#define NO_ADD_LOCALE
				173	#include "3level.h"
				174
				175	/* Sparse table of int32_t. */
				176	#define TABLE collidx_table
				177	#define ELEMENT int32_t
				178	#define DEFAULT 0
				179	#include "3level.h"
				180
				181	/* Sparse table of uint32_t. */
				182	#define TABLE collseq_table
				183	#define ELEMENT uint32_t
				184	#define DEFAULT ~((uint32_t) 0)
				185	#include "3level.h"
				186
				187
				188	/* Simple name list for the preprocessor. */
				189	struct name_list
				190	{
				191	struct name_list *next;
				192	char str[0];
				193	};
				194
				195
				196	/* The real definition of the struct for the LC_COLLATE locale. */
				197	struct locale_collate_t
				198	{
				199	int col_weight_max;
				200	int cur_weight_max;
				201
				202	/* List of known scripts. */
				203	struct section_list *known_sections;
				204	/* List of used sections. */
				205	struct section_list *sections;
				206	/* Current section using definition. */
				207	struct section_list *current_section;
				208	/* There always can be an unnamed section. */
				209	struct section_list unnamed_section;
				210	/* Flag whether the unnamed section has been defined. */
				211	bool unnamed_section_defined;
				212	/* To make handling of errors easier we have another section. */
				213	struct section_list error_section;
				214	/* Sometimes we are defining the values for collating symbols before
				215	the first actual section. */
				216	struct section_list symbol_section;
				217
				218	/* Start of the order list. */
				219	struct element_t *start;
				220
				221	/* The undefined element. */
				222	struct element_t undefined;
				223
				224	/* This is the cursor for `reorder_after' insertions. */
				225	struct element_t *cursor;
				226
				227	/* This value is used when handling ellipsis. */
				228	struct element_t ellipsis_weight;
				229
				230	/* Known collating elements. */
				231	hash_table elem_table;
				232
				233	/* Known collating symbols. */
				234	hash_table sym_table;
				235
				236	/* Known collation sequences. */
				237	hash_table seq_table;
				238
				239	struct obstack mempool;
				240
				241	/* The LC_COLLATE category is a bit special as it is sometimes possible
				242	that the definitions from more than one input file contains information.
				243	Therefore we keep all relevant input in a list. */
				244	struct locale_collate_t *next;
				245
				246	/* Arrays with heads of the list for each of the leading bytes in
				247	the multibyte sequences. */
				248	struct element_t *mbheads[256];
				249
				250	/* Arrays with heads of the list for each of the leading bytes in
				251	the multibyte sequences. */
				252	struct wchead_table wcheads;
				253
				254	/* The arrays with the collation sequence order. */
				255	unsigned char mbseqorder[256];
				256	struct collseq_table wcseqorder;
				257
				258	/* State of the preprocessor. */
				259	enum
				260	{
				261	else_none = 0,
				262	else_ignore,
				263	else_seen
				264	}
				265	else_action;
				266	};
				267
				268
				269	/* We have a few global variables which are used for reading all
				270	LC_COLLATE category descriptions in all files. */
				271	static uint32_t nrules;
				272
				273	/* List of defined preprocessor symbols. */
				274	static struct name_list *defined;
				275
				276
				277	/* We need UTF-8 encoding of numbers. */
				278	static inline int
				279	__attribute ((always_inline))
				280	utf8_encode (char *buf, int val)
				281	{
				282	int retval;
				283
				284	if (val < 0x80)
				285	{
				286	*buf++ = (char) val;
				287	retval = 1;
				288	}
				289	else
				290	{
				291	int step;
				292
				293	for (step = 2; step < 6; ++step)
				294	if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
				295	break;
				296	retval = step;
				297
				298	*buf = (unsigned char) (~0xff >> step);
				299	--step;
				300	do
				301	{
				302	buf[step] = 0x80 \| (val & 0x3f);
				303	val >>= 6;
				304	}
				305	while (--step > 0);
				306	*buf \|= val;
				307	}
				308
				309	return retval;
				310	}
				311
				312
				313	static struct section_list *
				314	make_seclist_elem (struct locale_collate_t collate, const char string,
				315	struct section_list *next)
				316	{
				317	struct section_list *newp;
				318
				319	newp = (struct section_list *) obstack_alloc (&collate->mempool,
				320	sizeof (*newp));
				321	newp->next = next;
				322	newp->name = string;
				323	newp->first = NULL;
				324	newp->last = NULL;
				325
				326	return newp;
				327	}
				328
				329
				330	static struct element_t *
				331	new_element (struct locale_collate_t collate, const char mbs, size_t mbslen,
				332	const uint32_t wcs, const char name, size_t namelen,
				333	int is_character)
				334	{
				335	struct element_t *newp;
				336
				337	newp = (struct element_t *) obstack_alloc (&collate->mempool,
				338	sizeof (*newp));
				339	newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
				340	name, namelen);
				341	if (mbs != NULL)
				342	{
				343	newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
				344	newp->nmbs = mbslen;
				345	}
				346	else
				347	{
				348	newp->mbs = NULL;
				349	newp->nmbs = 0;
				350	}
				351	if (wcs != NULL)
				352	{
				353	size_t nwcs = wcslen ((wchar_t *) wcs);
				354	uint32_t zero = 0;
				355	/* Handle <U0000> as a single character. */
				356	if (nwcs == 0)
				357	nwcs = 1;
				358	obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
				359	obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
				360	newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
				361	newp->nwcs = nwcs;
				362	}
				363	else
				364	{
				365	newp->wcs = NULL;
				366	newp->nwcs = 0;
				367	}
				368	newp->mborder = NULL;
				369	newp->wcorder = 0;
				370	newp->used_in_level = 0;
				371	newp->is_character = is_character;
				372
				373	/* Will be assigned later. XXX */
				374	newp->mbseqorder = 0;
				375	newp->wcseqorder = 0;
				376
				377	/* Will be allocated later. */
				378	newp->weights = NULL;
				379
				380	newp->file = NULL;
				381	newp->line = 0;
				382
				383	newp->section = collate->current_section;
				384
				385	newp->last = NULL;
				386	newp->next = NULL;
				387
				388	newp->mbnext = NULL;
				389	newp->mblast = NULL;
				390
				391	newp->wcnext = NULL;
				392	newp->wclast = NULL;
				393
				394	return newp;
				395	}
				396
				397
				398	static struct symbol_t *
				399	new_symbol (struct locale_collate_t collate, const char name, size_t len)
				400	{
				401	struct symbol_t *newp;
				402
				403	newp = (struct symbol_t ) obstack_alloc (&collate->mempool, sizeof (newp));
				404
				405	newp->name = obstack_copy0 (&collate->mempool, name, len);
				406	newp->order = NULL;
				407
				408	newp->file = NULL;
				409	newp->line = 0;
				410
				411	return newp;
				412	}
				413
				414
				415	/* Test whether this name is already defined somewhere. */
				416	static int
				417	check_duplicate (struct linereader ldfile, struct locale_collate_t collate,
				418	const struct charmap_t *charmap,
				419	struct repertoire_t repertoire, const char symbol,
				420	size_t symbol_len)
				421	{
				422	void *ignore = NULL;
				423
				424	if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
				425	{
				426	lr_error (ldfile, _("`%.*s' already defined in charmap"),
				427	(int) symbol_len, symbol);
				428	return 1;
				429	}
				430
				431	if (repertoire != NULL
				432	&& (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
				433	== 0))
				434	{
				435	lr_error (ldfile, _("`%.*s' already defined in repertoire"),
				436	(int) symbol_len, symbol);
				437	return 1;
				438	}
				439
				440	if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
				441	{
				442	lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
				443	(int) symbol_len, symbol);
				444	return 1;
				445	}
				446
				447	if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
				448	{
				449	lr_error (ldfile, _("`%.*s' already defined as collating element"),
				450	(int) symbol_len, symbol);
				451	return 1;
				452	}
				453
				454	return 0;
				455	}
				456
				457
				458	/* Read the direction specification. */
				459	static void
				460	read_directions (struct linereader ldfile, struct token arg,
				461	const struct charmap_t *charmap,
				462	struct repertoire_t repertoire, struct localedef_t result)
				463	{
				464	int cnt = 0;
				465	int max = nrules ?: 10;
				466	enum coll_sort_rule rules = calloc (max, sizeof (rules));
				467	int warned = 0;
				468	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
				469
				470	while (1)
				471	{
				472	int valid = 0;
				473
				474	if (arg->tok == tok_forward)
				475	{
				476	if (rules[cnt] & sort_backward)
				477	{
				478	if (! warned)
				479	{
				480	lr_error (ldfile, _("\
				481	%s: `forward' and `backward' are mutually excluding each other"),
				482	"LC_COLLATE");
				483	warned = 1;
				484	}
				485	}
				486	else if (rules[cnt] & sort_forward)
				487	{
				488	if (! warned)
				489	{
				490	lr_error (ldfile, _("\
				491	%s: `%s' mentioned more than once in definition of weight %d"),
				492	"LC_COLLATE", "forward", cnt + 1);
				493	}
				494	}
				495	else
				496	rules[cnt] \|= sort_forward;
				497
				498	valid = 1;
				499	}
				500	else if (arg->tok == tok_backward)
				501	{
				502	if (rules[cnt] & sort_forward)
				503	{
				504	if (! warned)
				505	{
				506	lr_error (ldfile, _("\
				507	%s: `forward' and `backward' are mutually excluding each other"),
				508	"LC_COLLATE");
				509	warned = 1;
				510	}
				511	}
				512	else if (rules[cnt] & sort_backward)
				513	{
				514	if (! warned)
				515	{
				516	lr_error (ldfile, _("\
				517	%s: `%s' mentioned more than once in definition of weight %d"),
				518	"LC_COLLATE", "backward", cnt + 1);
				519	}
				520	}
				521	else
				522	rules[cnt] \|= sort_backward;
				523
				524	valid = 1;
				525	}
				526	else if (arg->tok == tok_position)
				527	{
				528	if (rules[cnt] & sort_position)
				529	{
				530	if (! warned)
				531	{
				532	lr_error (ldfile, _("\
				533	%s: `%s' mentioned more than once in definition of weight %d"),
				534	"LC_COLLATE", "position", cnt + 1);
				535	}
				536	}
				537	else
				538	rules[cnt] \|= sort_position;
				539
				540	valid = 1;
				541	}
				542
				543	if (valid)
				544	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				545
				546	if (arg->tok == tok_eof \|\| arg->tok == tok_eol \|\| arg->tok == tok_comma
				547	\|\| arg->tok == tok_semicolon)
				548	{
				549	if (! valid && ! warned)
				550	{
				551	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
				552	warned = 1;
				553	}
				554
				555	/* See whether we have to increment the counter. */
				556	if (arg->tok != tok_comma && rules[cnt] != 0)
				557	{
				558	/* Add the default `forward' if we have seen only `position'. */
				559	if (rules[cnt] == sort_position)
				560	rules[cnt] = sort_position \| sort_forward;
				561
				562	++cnt;
				563	}
				564
				565	if (arg->tok == tok_eof \|\| arg->tok == tok_eol)
				566	/* End of line or file, so we exit the loop. */
				567	break;
				568
				569	if (nrules == 0)
				570	{
				571	/* See whether we have enough room in the array. */
				572	if (cnt == max)
				573	{
				574	max += 10;
				575	rules = (enum coll_sort_rule *) xrealloc (rules,
				576	max
				577	* sizeof (*rules));
				578	memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
				579	}
				580	}
				581	else
				582	{
				583	if (cnt == nrules)
				584	{
				585	/* There must not be any more rule. */
				586	if (! warned)
				587	{
				588	lr_error (ldfile, _("\
				589	%s: too many rules; first entry only had %d"),
				590	"LC_COLLATE", nrules);
				591	warned = 1;
				592	}
				593
				594	lr_ignore_rest (ldfile, 0);
				595	break;
				596	}
				597	}
				598	}
				599	else
				600	{
				601	if (! warned)
				602	{
				603	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
				604	warned = 1;
				605	}
				606	}
				607
				608	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				609	}
				610
				611	if (nrules == 0)
				612	{
				613	/* Now we know how many rules we have. */
				614	nrules = cnt;
				615	rules = (enum coll_sort_rule *) xrealloc (rules,
				616	nrules * sizeof (*rules));
				617	}
				618	else
				619	{
				620	if (cnt < nrules)
				621	{
				622	/* Not enough rules in this specification. */
				623	if (! warned)
				624	lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
				625
				626	do
				627	rules[cnt] = sort_forward;
				628	while (++cnt < nrules);
				629	}
				630	}
				631
				632	collate->current_section->rules = rules;
				633	}
				634
				635
				636	static struct element_t *
				637	find_element (struct linereader ldfile, struct locale_collate_t collate,
				638	const char *str, size_t len)
				639	{
				640	void *result = NULL;
				641
				642	/* Search for the entries among the collation sequences already define. */
				643	if (find_entry (&collate->seq_table, str, len, &result) != 0)
				644	{
				645	/* Nope, not define yet. So we see whether it is a
				646	collation symbol. */
				647	void *ptr;
				648
				649	if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
				650	{
				651	/* It's a collation symbol. */
				652	struct symbol_t sym = (struct symbol_t ) ptr;
				653	result = sym->order;
				654
				655	if (result == NULL)
				656	result = sym->order = new_element (collate, NULL, 0, NULL,
				657	NULL, 0, 0);
				658	}
				659	else if (find_entry (&collate->elem_table, str, len, &result) != 0)
				660	{
				661	/* It's also no collation element. So it is a character
				662	element defined later. */
				663	result = new_element (collate, NULL, 0, NULL, str, len, 1);
				664	/* Insert it into the sequence table. */
				665	insert_entry (&collate->seq_table, str, len, result);
				666	}
				667	}
				668
				669	return (struct element_t *) result;
				670	}
				671
				672
				673	static void
				674	unlink_element (struct locale_collate_t *collate)
				675	{
				676	if (collate->cursor == collate->start)
				677	{
				678	assert (collate->cursor->next == NULL);
				679	assert (collate->cursor->last == NULL);
				680	collate->cursor = NULL;
				681	}
				682	else
				683	{
				684	if (collate->cursor->next != NULL)
				685	collate->cursor->next->last = collate->cursor->last;
				686	if (collate->cursor->last != NULL)
				687	collate->cursor->last->next = collate->cursor->next;
				688	collate->cursor = collate->cursor->last;
				689	}
				690	}
				691
				692
				693	static void
				694	insert_weights (struct linereader ldfile, struct element_t elem,
				695	const struct charmap_t *charmap,
				696	struct repertoire_t repertoire, struct localedef_t result,
				697	enum token_t ellipsis)
				698	{
				699	int weight_cnt;
				700	struct token *arg;
				701	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
				702
				703	/* Initialize all the fields. */
				704	elem->file = ldfile->fname;
				705	elem->line = ldfile->lineno;
				706
				707	elem->last = collate->cursor;
				708	elem->next = collate->cursor ? collate->cursor->next : NULL;
				709	if (collate->cursor != NULL && collate->cursor->next != NULL)
				710	collate->cursor->next->last = elem;
				711	if (collate->cursor != NULL)
				712	collate->cursor->next = elem;
				713	if (collate->start == NULL)
				714	{
				715	assert (collate->cursor == NULL);
				716	collate->start = elem;
				717	}
				718
				719	elem->section = collate->current_section;
				720
				721	if (collate->current_section->first == NULL)
				722	collate->current_section->first = elem;
				723	if (collate->current_section->last == collate->cursor)
				724	collate->current_section->last = elem;
				725
				726	collate->cursor = elem;
				727
				728	elem->weights = (struct element_list_t *)
				729	obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
				730	memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
				731
				732	weight_cnt = 0;
				733
				734	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				735	do
				736	{
				737	if (arg->tok == tok_eof \|\| arg->tok == tok_eol)
				738	break;
				739
				740	if (arg->tok == tok_ignore)
				741	{
				742	/* The weight for this level has to be ignored. We use the
				743	null pointer to indicate this. */
				744	elem->weights[weight_cnt].w = (struct element_t **)
				745	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
				746	elem->weights[weight_cnt].w[0] = NULL;
				747	elem->weights[weight_cnt].cnt = 1;
				748	}
				749	else if (arg->tok == tok_bsymbol \|\| arg->tok == tok_ucs4)
				750	{
				751	char ucs4str[10];
				752	struct element_t *val;
				753	char *symstr;
				754	size_t symlen;
				755
				756	if (arg->tok == tok_bsymbol)
				757	{
				758	symstr = arg->val.str.startmb;
				759	symlen = arg->val.str.lenmb;
				760	}
				761	else
				762	{
				763	snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
				764	symstr = ucs4str;
				765	symlen = 9;
				766	}
				767
				768	val = find_element (ldfile, collate, symstr, symlen);
				769	if (val == NULL)
				770	break;
				771
				772	elem->weights[weight_cnt].w = (struct element_t **)
				773	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
				774	elem->weights[weight_cnt].w[0] = val;
				775	elem->weights[weight_cnt].cnt = 1;
				776	}
				777	else if (arg->tok == tok_string)
				778	{
				779	/* Split the string up in the individual characters and put
				780	the element definitions in the list. */
				781	const char *cp = arg->val.str.startmb;
				782	int cnt = 0;
				783	struct element_t *charelem;
				784	struct element_t **weights = NULL;
				785	int max = 0;
				786
				787	if (*cp == '\0')
				788	{
				789	lr_error (ldfile, _("%s: empty weight string not allowed"),
				790	"LC_COLLATE");
				791	lr_ignore_rest (ldfile, 0);
				792	break;
				793	}
				794
				795	do
				796	{
				797	if (*cp == '<')
				798	{
				799	/* Ahh, it's a bsymbol or an UCS4 value. If it's
				800	the latter we have to unify the name. */
				801	const char *startp = ++cp;
				802	size_t len;
				803
				804	while (*cp != '>')
				805	{
				806	if (*cp == ldfile->escape_char)
				807	++cp;
				808	if (*cp == '\0')
				809	/* It's a syntax error. */
				810	goto syntax;
				811
				812	++cp;
				813	}
				814
				815	if (cp - startp == 5 && startp[0] == 'U'
				816	&& isxdigit (startp[1]) && isxdigit (startp[2])
				817	&& isxdigit (startp[3]) && isxdigit (startp[4]))
				818	{
				819	unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
				820	char *newstr;
				821
				822	newstr = (char *) xmalloc (10);
				823	snprintf (newstr, 10, "U%08X", ucs4);
				824	startp = newstr;
				825
				826	len = 9;
				827	}
				828	else
				829	len = cp - startp;
				830
				831	charelem = find_element (ldfile, collate, startp, len);
				832	++cp;
				833	}
				834	else
				835	{
				836	/* People really shouldn't use characters directly in
				837	the string. Especially since it's not really clear
				838	what this means. We interpret all characters in the
				839	string as if that would be bsymbols. Otherwise we
				840	would have to match back to bsymbols somehow and this
				841	is normally not what people normally expect. */
				842	charelem = find_element (ldfile, collate, cp++, 1);
				843	}
				844
				845	if (charelem == NULL)
				846	{
				847	/* We ignore the rest of the line. */
				848	lr_ignore_rest (ldfile, 0);
				849	break;
				850	}
				851
				852	/* Add the pointer. */
				853	if (cnt >= max)
				854	{
				855	struct element_t **newp;
				856	max += 10;
				857	newp = (struct element_t **)
				858	alloca (max * sizeof (struct element_t *));
				859	memcpy (newp, weights, cnt * sizeof (struct element_t *));
				860	weights = newp;
				861	}
				862	weights[cnt++] = charelem;
				863	}
				864	while (*cp != '\0');
				865
				866	/* Now store the information. */
				867	elem->weights[weight_cnt].w = (struct element_t **)
				868	obstack_alloc (&collate->mempool,
				869	cnt * sizeof (struct element_t *));
				870	memcpy (elem->weights[weight_cnt].w, weights,
				871	cnt * sizeof (struct element_t *));
				872	elem->weights[weight_cnt].cnt = cnt;
				873
				874	/* We don't need the string anymore. */
				875	free (arg->val.str.startmb);
				876	}
				877	else if (ellipsis != tok_none
				878	&& (arg->tok == tok_ellipsis2
				879	\|\| arg->tok == tok_ellipsis3
				880	\|\| arg->tok == tok_ellipsis4))
				881	{
				882	/* It must be the same ellipsis as used in the initial column. */
				883	if (arg->tok != ellipsis)
				884	lr_error (ldfile, _("\
				885	%s: weights must use the same ellipsis symbol as the name"),
				886	"LC_COLLATE");
				887
				888	/* The weight for this level will depend on the element
				889	iterating over the range. Put a placeholder. */
				890	elem->weights[weight_cnt].w = (struct element_t **)
				891	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
				892	elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
				893	elem->weights[weight_cnt].cnt = 1;
				894	}
				895	else
				896	{
				897	syntax:
				898	/* It's a syntax error. */
				899	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
				900	lr_ignore_rest (ldfile, 0);
				901	break;
				902	}
				903
				904	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				905	/* This better should be the end of the line or a semicolon. */
				906	if (arg->tok == tok_semicolon)
				907	/* OK, ignore this and read the next token. */
				908	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				909	else if (arg->tok != tok_eof && arg->tok != tok_eol)
				910	{
				911	/* It's a syntax error. */
				912	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
				913	lr_ignore_rest (ldfile, 0);
				914	break;
				915	}
				916	}
				917	while (++weight_cnt < nrules);
				918
				919	if (weight_cnt < nrules)
				920	{
				921	/* This means the rest of the line uses the current element as
				922	the weight. */
				923	do
				924	{
				925	elem->weights[weight_cnt].w = (struct element_t **)
				926	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
				927	if (ellipsis == tok_none)
				928	elem->weights[weight_cnt].w[0] = elem;
				929	else
				930	elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
				931	elem->weights[weight_cnt].cnt = 1;
				932	}
				933	while (++weight_cnt < nrules);
				934	}
				935	else
				936	{
				937	if (arg->tok == tok_ignore \|\| arg->tok == tok_bsymbol)
				938	{
				939	/* Too many rule values. */
				940	lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
				941	lr_ignore_rest (ldfile, 0);
				942	}
				943	else
				944	lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
				945	}
				946	}
				947
				948
				949	static int
				950	insert_value (struct linereader ldfile, const char symstr, size_t symlen,
				951	const struct charmap_t charmap, struct repertoire_t repertoire,
				952	struct localedef_t *result)
				953	{
				954	/* First find out what kind of symbol this is. */
				955	struct charseq *seq;
				956	uint32_t wc;
				957	struct element_t *elem = NULL;
				958	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
				959
				960	/* Try to find the character in the charmap. */
				961	seq = charmap_find_value (charmap, symstr, symlen);
				962
				963	/* Determine the wide character. */
				964	if (seq == NULL \|\| seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
				965	{
				966	wc = repertoire_find_value (repertoire, symstr, symlen);
				967	if (seq != NULL)
				968	seq->ucs4 = wc;
				969	}
				970	else
				971	wc = seq->ucs4;
				972
				973	if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
				974	{
				975	/* It's no character, so look through the collation elements and
				976	symbol list. */
				977	void *ptr = elem;
				978	if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0)
				979	{
				980	void *result;
				981	struct symbol_t *sym = NULL;
				982
				983	/* It's also collation element. Therefore it's either a
				984	collating symbol or it's a character which is not
				985	supported by the character set. In the later case we
				986	simply create a dummy entry. */
				987	if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
				988	{
				989	/* It's a collation symbol. */
				990	sym = (struct symbol_t *) result;
				991
				992	elem = sym->order;
				993	}
				994
				995	if (elem == NULL)
				996	{
				997	elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
				998
				999	if (sym != NULL)
				1000	sym->order = elem;
				1001	else
				1002	/* Enter a fake element in the sequence table. This
				1003	won't cause anything in the output since there is
				1004	no multibyte or wide character associated with
				1005	it. */
				1006	insert_entry (&collate->seq_table, symstr, symlen, elem);
				1007	}
				1008	}
				1009	else
				1010	/* Copy the result back. */
				1011	elem = ptr;
				1012	}
				1013	else
				1014	{
				1015	/* Otherwise the symbols stands for a character. */
				1016	void *ptr = elem;
				1017	if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0)
				1018	{
				1019	uint32_t wcs[2] = { wc, 0 };
				1020
				1021	/* We have to allocate an entry. */
				1022	elem = new_element (collate,
				1023	seq != NULL ? (char *) seq->bytes : NULL,
				1024	seq != NULL ? seq->nbytes : 0,
				1025	wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
				1026	symstr, symlen, 1);
				1027
				1028	/* And add it to the table. */
				1029	if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
				1030	/* This cannot happen. */
				1031	assert (! "Internal error");
				1032	}
				1033	else
				1034	{
				1035	/* Copy the result back. */
				1036	elem = ptr;
				1037
				1038	/* Maybe the character was used before the definition. In this case
				1039	we have to insert the byte sequences now. */
				1040	if (elem->mbs == NULL && seq != NULL)
				1041	{
				1042	elem->mbs = obstack_copy0 (&collate->mempool,
				1043	seq->bytes, seq->nbytes);
				1044	elem->nmbs = seq->nbytes;
				1045	}
				1046
				1047	if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
				1048	{
				1049	uint32_t wcs[2] = { wc, 0 };
				1050
				1051	elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
				1052	elem->nwcs = 1;
				1053	}
				1054	}
				1055	}
				1056
				1057	/* Test whether this element is not already in the list. */
				1058	if (elem->next != NULL \|\| elem == collate->cursor)
				1059	{
				1060	lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
				1061	(int) symlen, symstr, elem->file, elem->line);
				1062	lr_ignore_rest (ldfile, 0);
				1063	return 1;
				1064	}
				1065
				1066	insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
				1067
				1068	return 0;
				1069	}
				1070
				1071
				1072	static void
				1073	handle_ellipsis (struct linereader ldfile, const char symstr, size_t symlen,
				1074	enum token_t ellipsis, const struct charmap_t *charmap,
				1075	struct repertoire_t *repertoire,
				1076	struct localedef_t *result)
				1077	{
				1078	struct element_t *startp;
				1079	struct element_t *endp;
				1080	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
				1081
				1082	/* Unlink the entry added for the ellipsis. */
				1083	unlink_element (collate);
				1084	startp = collate->cursor;
				1085
				1086	/* Process and add the end-entry. */
				1087	if (symstr != NULL
				1088	&& insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
				1089	/* Something went wrong with inserting the to-value. This means
				1090	we cannot process the ellipsis. */
				1091	return;
				1092
				1093	/* Reset the cursor. */
				1094	collate->cursor = startp;
				1095
				1096	/* Now we have to handle many different situations:
				1097	- we have to distinguish between the three different ellipsis forms
				1098	- the is the ellipsis at the beginning, in the middle, or at the end.
				1099	*/
				1100	endp = collate->cursor->next;
				1101	assert (symstr == NULL \|\| endp != NULL);
				1102
				1103	/* XXX The following is probably very wrong since also collating symbols
				1104	can appear in ranges. But do we want/can refine the test for that? */
				1105	#if 0
				1106	/* Both, the start and the end symbol, must stand for characters. */
				1107	if ((startp != NULL && (startp->name == NULL \|\| ! startp->is_character))
				1108	\|\| (endp != NULL && (endp->name == NULL\|\| ! endp->is_character)))
				1109	{
				1110	lr_error (ldfile, _("\
				1111	%s: the start and the end symbol of a range must stand for characters"),
				1112	"LC_COLLATE");
				1113	return;
				1114	}
				1115	#endif
				1116
				1117	if (ellipsis == tok_ellipsis3)
				1118	{
				1119	/* One requirement we make here: the length of the byte
				1120	sequences for the first and end character must be the same.
				1121	This is mainly to prevent unwanted effects and this is often
				1122	not what is wanted. */
				1123	size_t len = (startp->mbs != NULL ? startp->nmbs
				1124	: (endp->mbs != NULL ? endp->nmbs : 0));
				1125	char mbcnt[len + 1];
				1126	char mbend[len + 1];
				1127
				1128	/* Well, this should be caught somewhere else already. Just to
				1129	make sure. */
				1130	assert (startp == NULL \|\| startp->wcs == NULL \|\| startp->wcs[1] == 0);
				1131	assert (endp == NULL \|\| endp->wcs == NULL \|\| endp->wcs[1] == 0);
				1132
				1133	if (startp != NULL && endp != NULL
				1134	&& startp->mbs != NULL && endp->mbs != NULL
				1135	&& startp->nmbs != endp->nmbs)
				1136	{
				1137	lr_error (ldfile, _("\
				1138	%s: byte sequences of first and last character must have the same length"),
				1139	"LC_COLLATE");
				1140	return;
				1141	}
				1142
				1143	/* Determine whether we have to generate multibyte sequences. */
				1144	if ((startp == NULL \|\| startp->mbs != NULL)
				1145	&& (endp == NULL \|\| endp->mbs != NULL))
				1146	{
				1147	int cnt;
				1148	int ret;
				1149
				1150	/* Prepare the beginning byte sequence. This is either from the
				1151	beginning byte sequence or it is all nulls if it was an
				1152	initial ellipsis. */
				1153	if (startp == NULL \|\| startp->mbs == NULL)
				1154	memset (mbcnt, '\0', len);
				1155	else
				1156	{
				1157	memcpy (mbcnt, startp->mbs, len);
				1158
				1159	/* And increment it so that the value is the first one we will
				1160	try to insert. */
				1161	for (cnt = len - 1; cnt >= 0; --cnt)
				1162	if (++mbcnt[cnt] != '\0')
				1163	break;
				1164	}
				1165	mbcnt[len] = '\0';
				1166
				1167	/* And the end sequence. */
				1168	if (endp == NULL \|\| endp->mbs == NULL)
				1169	memset (mbend, '\0', len);
				1170	else
				1171	memcpy (mbend, endp->mbs, len);
				1172	mbend[len] = '\0';
				1173
				1174	/* Test whether we have a correct range. */
				1175	ret = memcmp (mbcnt, mbend, len);
				1176	if (ret >= 0)
				1177	{
				1178	if (ret > 0)
				1179	lr_error (ldfile, _("%s: byte sequence of first character of \
				1180	range is not lower than that of the last character"), "LC_COLLATE");
				1181	return;
				1182	}
				1183
				1184	/* Generate the byte sequences data. */
				1185	while (1)
				1186	{
				1187	struct charseq *seq;
				1188
				1189	/* Quite a bit of work ahead. We have to find the character
				1190	definition for the byte sequence and then determine the
				1191	wide character belonging to it. */
				1192	seq = charmap_find_symbol (charmap, mbcnt, len);
				1193	if (seq != NULL)
				1194	{
				1195	struct element_t *elem;
				1196	size_t namelen;
				1197
				1198	/* I don't think this can ever happen. */
				1199	assert (seq->name != NULL);
				1200	namelen = strlen (seq->name);
				1201
				1202	if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
				1203	seq->ucs4 = repertoire_find_value (repertoire, seq->name,
				1204	namelen);
				1205
				1206	/* Now we are ready to insert the new value in the
				1207	sequence. Find out whether the element is
				1208	already known. */
				1209	void *ptr;
				1210	if (find_entry (&collate->seq_table, seq->name, namelen,
				1211	&ptr) != 0)
				1212	{
				1213	uint32_t wcs[2] = { seq->ucs4, 0 };
				1214
				1215	/* We have to allocate an entry. */
				1216	elem = new_element (collate, mbcnt, len,
				1217	seq->ucs4 == ILLEGAL_CHAR_VALUE
				1218	? NULL : wcs, seq->name,
				1219	namelen, 1);
				1220
				1221	/* And add it to the table. */
				1222	if (insert_entry (&collate->seq_table, seq->name,
				1223	namelen, elem) != 0)
				1224	/* This cannot happen. */
				1225	assert (! "Internal error");
				1226	}
				1227	else
				1228	/* Copy the result. */
				1229	elem = ptr;
				1230
				1231	/* Test whether this element is not already in the list. */
				1232	if (elem->next != NULL \|\| (collate->cursor != NULL
				1233	&& elem->next == collate->cursor))
				1234	{
				1235	lr_error (ldfile, _("\
				1236	order for `%.*s' already defined at %s:%Zu"),
				1237	(int) namelen, seq->name,
				1238	elem->file, elem->line);
				1239	goto increment;
				1240	}
				1241
				1242	/* Enqueue the new element. */
				1243	elem->last = collate->cursor;
				1244	if (collate->cursor == NULL)
				1245	elem->next = NULL;
				1246	else
				1247	{
				1248	elem->next = collate->cursor->next;
				1249	elem->last->next = elem;
				1250	if (elem->next != NULL)
				1251	elem->next->last = elem;
				1252	}
				1253	if (collate->start == NULL)
				1254	{
				1255	assert (collate->cursor == NULL);
				1256	collate->start = elem;
				1257	}
				1258	collate->cursor = elem;
				1259
				1260	/* Add the weight value. We take them from the
				1261	`ellipsis_weights' member of `collate'. */
				1262	elem->weights = (struct element_list_t *)
				1263	obstack_alloc (&collate->mempool,
				1264	nrules * sizeof (struct element_list_t));
				1265	for (cnt = 0; cnt < nrules; ++cnt)
				1266	if (collate->ellipsis_weight.weights[cnt].cnt == 1
				1267	&& (collate->ellipsis_weight.weights[cnt].w[0]
				1268	== ELEMENT_ELLIPSIS2))
				1269	{
				1270	elem->weights[cnt].w = (struct element_t **)
				1271	obstack_alloc (&collate->mempool,
				1272	sizeof (struct element_t *));
				1273	elem->weights[cnt].w[0] = elem;
				1274	elem->weights[cnt].cnt = 1;
				1275	}
				1276	else
				1277	{
				1278	/* Simply use the weight from `ellipsis_weight'. */
				1279	elem->weights[cnt].w =
				1280	collate->ellipsis_weight.weights[cnt].w;
				1281	elem->weights[cnt].cnt =
				1282	collate->ellipsis_weight.weights[cnt].cnt;
				1283	}
				1284	}
				1285
				1286	/* Increment for the next round. */
				1287	increment:
				1288	for (cnt = len - 1; cnt >= 0; --cnt)
				1289	if (++mbcnt[cnt] != '\0')
				1290	break;
				1291
				1292	/* Find out whether this was all. */
				1293	if (cnt < 0 \|\| memcmp (mbcnt, mbend, len) >= 0)
				1294	/* Yep, that's all. */
				1295	break;
				1296	}
				1297	}
				1298	}
				1299	else
				1300	{
				1301	/* For symbolic range we naturally must have a beginning and an
				1302	end specified by the user. */
				1303	if (startp == NULL)
				1304	lr_error (ldfile, _("\
				1305	%s: symbolic range ellipsis must not directly follow `order_start'"),
				1306	"LC_COLLATE");
				1307	else if (endp == NULL)
				1308	lr_error (ldfile, _("\
				1309	%s: symbolic range ellipsis must not be directly followed by `order_end'"),
				1310	"LC_COLLATE");
				1311	else
				1312	{
				1313	/* Determine the range. To do so we have to determine the
				1314	common prefix of the both names and then the numeric
				1315	values of both ends. */
				1316	size_t lenfrom = strlen (startp->name);
				1317	size_t lento = strlen (endp->name);
				1318	char buf[lento + 1];
				1319	int preflen = 0;
				1320	long int from;
				1321	long int to;
				1322	char *cp;
				1323	int base = ellipsis == tok_ellipsis2 ? 16 : 10;
				1324
				1325	if (lenfrom != lento)
				1326	{
				1327	invalid_range:
				1328	lr_error (ldfile, _("\
				1329	`%s' and `%.*s' are not valid names for symbolic range"),
				1330	startp->name, (int) lento, endp->name);
				1331	return;
				1332	}
				1333
				1334	while (startp->name[preflen] == endp->name[preflen])
				1335	if (startp->name[preflen] == '\0')
				1336	/* Nothing to be done. The start and end point are identical
				1337	and while inserting the end point we have already given
				1338	the user an error message. */
				1339	return;
				1340	else
				1341	++preflen;
				1342
				1343	errno = 0;
				1344	from = strtol (startp->name + preflen, &cp, base);
				1345	if ((from == UINT_MAX && errno == ERANGE) \|\| *cp != '\0')
				1346	goto invalid_range;
				1347
				1348	errno = 0;
				1349	to = strtol (endp->name + preflen, &cp, base);
				1350	if ((to == UINT_MAX && errno == ERANGE) \|\| *cp != '\0')
				1351	goto invalid_range;
				1352
				1353	/* Copy the prefix. */
				1354	memcpy (buf, startp->name, preflen);
				1355
				1356	/* Loop over all values. */
				1357	for (++from; from < to; ++from)
				1358	{
				1359	struct element_t *elem = NULL;
				1360	struct charseq *seq;
				1361	uint32_t wc;
				1362	int cnt;
				1363
				1364	/* Generate the name. */
				1365	sprintf (buf + preflen, base == 10 ? "%0ld" : "%0lX",
				1366	(int) (lenfrom - preflen), from);
				1367
				1368	/* Look whether this name is already defined. */
				1369	void *ptr;
				1370	if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0)
				1371	{
				1372	/* Copy back the result. */
				1373	elem = ptr;
				1374
				1375	if (elem->next != NULL \|\| (collate->cursor != NULL
				1376	&& elem->next == collate->cursor))
				1377	{
				1378	lr_error (ldfile, _("\
				1379	%s: order for `%.*s' already defined at %s:%Zu"),
				1380	"LC_COLLATE", (int) lenfrom, buf,
				1381	elem->file, elem->line);
				1382	continue;
				1383	}
				1384
				1385	if (elem->name == NULL)
				1386	{
				1387	lr_error (ldfile, _("%s: `%s' must be a character"),
				1388	"LC_COLLATE", buf);
				1389	continue;
				1390	}
				1391	}
				1392
				1393	if (elem == NULL \|\| (elem->mbs == NULL && elem->wcs == NULL))
				1394	{
				1395	/* Search for a character of this name. */
				1396	seq = charmap_find_value (charmap, buf, lenfrom);
				1397	if (seq == NULL \|\| seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
				1398	{
				1399	wc = repertoire_find_value (repertoire, buf, lenfrom);
				1400
				1401	if (seq != NULL)
				1402	seq->ucs4 = wc;
				1403	}
				1404	else
				1405	wc = seq->ucs4;
				1406
				1407	if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
				1408	/* We don't know anything about a character with this
				1409	name. XXX Should we warn? */
				1410	continue;
				1411
				1412	if (elem == NULL)
				1413	{
				1414	uint32_t wcs[2] = { wc, 0 };
				1415
				1416	/* We have to allocate an entry. */
				1417	elem = new_element (collate,
				1418	seq != NULL
				1419	? (char *) seq->bytes : NULL,
				1420	seq != NULL ? seq->nbytes : 0,
				1421	wc == ILLEGAL_CHAR_VALUE
				1422	? NULL : wcs, buf, lenfrom, 1);
				1423	}
				1424	else
				1425	{
				1426	/* Update the element. */
				1427	if (seq != NULL)
				1428	{
				1429	elem->mbs = obstack_copy0 (&collate->mempool,
				1430	seq->bytes, seq->nbytes);
				1431	elem->nmbs = seq->nbytes;
				1432	}
				1433
				1434	if (wc != ILLEGAL_CHAR_VALUE)
				1435	{
				1436	uint32_t zero = 0;
				1437
				1438	obstack_grow (&collate->mempool,
				1439	&wc, sizeof (uint32_t));
				1440	obstack_grow (&collate->mempool,
				1441	&zero, sizeof (uint32_t));
				1442	elem->wcs = obstack_finish (&collate->mempool);
				1443	elem->nwcs = 1;
				1444	}
				1445	}
				1446
				1447	elem->file = ldfile->fname;
				1448	elem->line = ldfile->lineno;
				1449	elem->section = collate->current_section;
				1450	}
				1451
				1452	/* Enqueue the new element. */
				1453	elem->last = collate->cursor;
				1454	elem->next = collate->cursor->next;
				1455	elem->last->next = elem;
				1456	if (elem->next != NULL)
				1457	elem->next->last = elem;
				1458	collate->cursor = elem;
				1459
				1460	/* Now add the weights. They come from the `ellipsis_weights'
				1461	member of `collate'. */
				1462	elem->weights = (struct element_list_t *)
				1463	obstack_alloc (&collate->mempool,
				1464	nrules * sizeof (struct element_list_t));
				1465	for (cnt = 0; cnt < nrules; ++cnt)
				1466	if (collate->ellipsis_weight.weights[cnt].cnt == 1
				1467	&& (collate->ellipsis_weight.weights[cnt].w[0]
				1468	== ELEMENT_ELLIPSIS2))
				1469	{
				1470	elem->weights[cnt].w = (struct element_t **)
				1471	obstack_alloc (&collate->mempool,
				1472	sizeof (struct element_t *));
				1473	elem->weights[cnt].w[0] = elem;
				1474	elem->weights[cnt].cnt = 1;
				1475	}
				1476	else
				1477	{
				1478	/* Simly use the weight from `ellipsis_weight'. */
				1479	elem->weights[cnt].w =
				1480	collate->ellipsis_weight.weights[cnt].w;
				1481	elem->weights[cnt].cnt =
				1482	collate->ellipsis_weight.weights[cnt].cnt;
				1483	}
				1484	}
				1485	}
				1486	}
				1487	}
				1488
				1489
				1490	static void
				1491	collate_startup (struct linereader ldfile, struct localedef_t locale,
				1492	struct localedef_t *copy_locale, int ignore_content)
				1493	{
				1494	if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
				1495	{
				1496	struct locale_collate_t *collate;
				1497
				1498	if (copy_locale == NULL)
				1499	{
				1500	collate = locale->categories[LC_COLLATE].collate =
				1501	(struct locale_collate_t *)
				1502	xcalloc (1, sizeof (struct locale_collate_t));
				1503
				1504	/* Init the various data structures. */
				1505	init_hash (&collate->elem_table, 100);
				1506	init_hash (&collate->sym_table, 100);
				1507	init_hash (&collate->seq_table, 500);
				1508	obstack_init (&collate->mempool);
				1509
				1510	collate->col_weight_max = -1;
				1511	}
				1512	else
				1513	/* Reuse the copy_locale's data structures. */
				1514	collate = locale->categories[LC_COLLATE].collate =
				1515	copy_locale->categories[LC_COLLATE].collate;
				1516	}
				1517
				1518	ldfile->translate_strings = 0;
				1519	ldfile->return_widestr = 0;
				1520	}
				1521
				1522
				1523	void
				1524	collate_finish (struct localedef_t locale, const struct charmap_t charmap)
				1525	{
				1526	/* Now is the time when we can assign the individual collation
				1527	values for all the symbols. We have possibly different values
				1528	for the wide- and the multibyte-character symbols. This is done
				1529	since it might make a difference in the encoding if there is in
				1530	some cases no multibyte-character but there are wide-characters.
				1531	(The other way around it is not important since theencoded
				1532	collation value in the wide-character case is 32 bits wide and
				1533	therefore requires no encoding).
				1534
				1535	The lowest collation value assigned is 2. Zero is reserved for
				1536	the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
				1537	functions and 1 is used to separate the individual passes for the
				1538	different rules.
				1539
				1540	We also have to construct is list with all the bytes/words which
				1541	can come first in a sequence, followed by all the elements which
				1542	also start with this byte/word. The order is reverse which has
				1543	among others the important effect that longer strings are located
				1544	first in the list. This is required for the output data since
				1545	the algorithm used in `strcoll' etc depends on this.
				1546
				1547	The multibyte case is easy. We simply sort into an array with
				1548	256 elements. */
				1549	struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
				1550	int mbact[nrules];
				1551	int wcact;
				1552	int mbseqact;
				1553	int wcseqact;
				1554	struct element_t *runp;
				1555	int i;
				1556	int need_undefined = 0;
				1557	struct section_list *sect;
				1558	int ruleidx;
				1559	int nr_wide_elems = 0;
				1560
				1561	if (collate == NULL)
				1562	{
				1563	/* No data, no check. */
				1564	if (! be_quiet)
				1565	WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
				1566	"LC_COLLATE"));
				1567	return;
				1568	}
				1569
				1570	/* If this assertion is hit change the type in `element_t'. */
				1571	assert (nrules <= sizeof (runp->used_in_level) * 8);
				1572
				1573	/* Make sure that the `position' rule is used either in all sections
				1574	or in none. */
				1575	for (i = 0; i < nrules; ++i)
				1576	for (sect = collate->sections; sect != NULL; sect = sect->next)
				1577	if (sect != collate->current_section
				1578	&& sect->rules != NULL
				1579	&& ((sect->rules[i] & sort_position)
				1580	!= (collate->current_section->rules[i] & sort_position)))
				1581	{
				1582	WITH_CUR_LOCALE (error (0, 0, _("\
				1583	%s: `position' must be used for a specific level in all sections or none"),
				1584	"LC_COLLATE"));
				1585	break;
				1586	}
				1587
				1588	/* Find out which elements are used at which level. At the same
				1589	time we find out whether we have any undefined symbols. */
				1590	runp = collate->start;
				1591	while (runp != NULL)
				1592	{
				1593	if (runp->mbs != NULL)
				1594	{
				1595	for (i = 0; i < nrules; ++i)
				1596	{
				1597	int j;
				1598
				1599	for (j = 0; j < runp->weights[i].cnt; ++j)
				1600	/* A NULL pointer as the weight means IGNORE. */
				1601	if (runp->weights[i].w[j] != NULL)
				1602	{
				1603	if (runp->weights[i].w[j]->weights == NULL)
				1604	{
				1605	WITH_CUR_LOCALE (error_at_line (0, 0, runp->file,
				1606	runp->line,
				1607	_("symbol `%s' not defined"),
				1608	runp->weights[i].w[j]->name));
				1609
				1610	need_undefined = 1;
				1611	runp->weights[i].w[j] = &collate->undefined;
				1612	}
				1613	else
				1614	/* Set the bit for the level. */
				1615	runp->weights[i].w[j]->used_in_level \|= 1 << i;
				1616	}
				1617	}
				1618	}
				1619
				1620	/* Up to the next entry. */
				1621	runp = runp->next;
				1622	}
				1623
				1624	/* Walk through the list of defined sequences and assign weights. Also
				1625	create the data structure which will allow generating the single byte
				1626	character based tables.
				1627
				1628	Since at each time only the weights for each of the rules are
				1629	only compared to other weights for this rule it is possible to
				1630	assign more compact weight values than simply counting all
				1631	weights in sequence. We can assign weights from 3, one for each
				1632	rule individually and only for those elements, which are actually
				1633	used for this rule.
				1634
				1635	Why is this important? It is not for the wide char table. But
				1636	it is for the singlebyte output since here larger numbers have to
				1637	be encoded to make it possible to emit the value as a byte
				1638	string. */
				1639	for (i = 0; i < nrules; ++i)
				1640	mbact[i] = 2;
				1641	wcact = 2;
				1642	mbseqact = 0;
				1643	wcseqact = 0;
				1644	runp = collate->start;
				1645	while (runp != NULL)
				1646	{
				1647	/* Determine the order. */
				1648	if (runp->used_in_level != 0)
				1649	{
				1650	runp->mborder = (int *) obstack_alloc (&collate->mempool,
				1651	nrules * sizeof (int));
				1652
				1653	for (i = 0; i < nrules; ++i)
				1654	if ((runp->used_in_level & (1 << i)) != 0)
				1655	runp->mborder[i] = mbact[i]++;
				1656	else
				1657	runp->mborder[i] = 0;
				1658	}
				1659
				1660	if (runp->mbs != NULL)
				1661	{
				1662	struct element_t **eptr;
				1663	struct element_t *lastp = NULL;
				1664
				1665	/* Find the point where to insert in the list. */
				1666	eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
				1667	while (*eptr != NULL)
				1668	{
				1669	if ((*eptr)->nmbs < runp->nmbs)
				1670	break;
				1671
				1672	if ((*eptr)->nmbs == runp->nmbs)
				1673	{
				1674	int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
				1675
				1676	if (c == 0)
				1677	{
				1678	/* This should not happen. It means that we have
				1679	to symbols with the same byte sequence. It is
				1680	of course an error. */
				1681	WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
				1682	(*eptr)->line,
				1683	_("\
				1684	symbol `%s' has the same encoding as"), (*eptr)->name);
				1685	error_at_line (0, 0, runp->file,
				1686	runp->line,
				1687	_("symbol `%s'"),
				1688	runp->name));
				1689	goto dont_insert;
				1690	}
				1691	else if (c < 0)
				1692	/* Insert it here. */
				1693	break;
				1694	}
				1695
				1696	/* To the next entry. */
				1697	lastp = *eptr;
				1698	eptr = &(*eptr)->mbnext;
				1699	}
				1700
				1701	/* Set the pointers. */
				1702	runp->mbnext = *eptr;
				1703	runp->mblast = lastp;
				1704	if (*eptr != NULL)
				1705	(*eptr)->mblast = runp;
				1706	*eptr = runp;
				1707	dont_insert:
				1708	;
				1709	}
				1710
				1711	if (runp->used_in_level)
				1712	{
				1713	runp->wcorder = wcact++;
				1714
				1715	/* We take the opportunity to count the elements which have
				1716	wide characters. */
				1717	++nr_wide_elems;
				1718	}
				1719
				1720	if (runp->is_character)
				1721	{
				1722	if (runp->nmbs == 1)
				1723	collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
				1724
				1725	runp->wcseqorder = wcseqact++;
				1726	}
				1727	else if (runp->mbs != NULL && runp->weights != NULL)
				1728	/* This is for collation elements. */
				1729	runp->wcseqorder = wcseqact++;
				1730
				1731	/* Up to the next entry. */
				1732	runp = runp->next;
				1733	}
				1734
				1735	/* Find out whether any of the `mbheads' entries is unset. In this
				1736	case we use the UNDEFINED entry. */
				1737	for (i = 1; i < 256; ++i)
				1738	if (collate->mbheads[i] == NULL)
				1739	{
				1740	need_undefined = 1;
				1741	collate->mbheads[i] = &collate->undefined;
				1742	}
				1743
				1744	/* Now to the wide character case. */
				1745	collate->wcheads.p = 6;
				1746	collate->wcheads.q = 10;
				1747	wchead_table_init (&collate->wcheads);
				1748
				1749	collate->wcseqorder.p = 6;
				1750	collate->wcseqorder.q = 10;
				1751	collseq_table_init (&collate->wcseqorder);
				1752
				1753	/* Start adding. */
				1754	runp = collate->start;
				1755	while (runp != NULL)
				1756	{
				1757	if (runp->wcs != NULL)
				1758	{
				1759	struct element_t *e;
				1760	struct element_t **eptr;
				1761	struct element_t *lastp;
				1762
				1763	/* Insert the collation sequence value. */
				1764	if (runp->is_character)
				1765	collseq_table_add (&collate->wcseqorder, runp->wcs[0],
				1766	runp->wcseqorder);
				1767
				1768	/* Find the point where to insert in the list. */
				1769	e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
				1770	eptr = &e;
				1771	lastp = NULL;
				1772	while (*eptr != NULL)
				1773	{
				1774	if ((*eptr)->nwcs < runp->nwcs)
				1775	break;
				1776
				1777	if ((*eptr)->nwcs == runp->nwcs)
				1778	{
				1779	int c = wmemcmp ((wchar_t ) (eptr)->wcs,
				1780	(wchar_t *) runp->wcs, runp->nwcs);
				1781
				1782	if (c == 0)
				1783	{
				1784	/* This should not happen. It means that we have
				1785	two symbols with the same byte sequence. It is
				1786	of course an error. */
				1787	WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
				1788	(*eptr)->line,
				1789	_("\
				1790	symbol `%s' has the same encoding as"), (*eptr)->name);
				1791	error_at_line (0, 0, runp->file,
				1792	runp->line,
				1793	_("symbol `%s'"),
				1794	runp->name));
				1795	goto dont_insertwc;
				1796	}
				1797	else if (c < 0)
				1798	/* Insert it here. */
				1799	break;
				1800	}
				1801
				1802	/* To the next entry. */
				1803	lastp = *eptr;
				1804	eptr = &(*eptr)->wcnext;
				1805	}
				1806
				1807	/* Set the pointers. */
				1808	runp->wcnext = *eptr;
				1809	runp->wclast = lastp;
				1810	if (*eptr != NULL)
				1811	(*eptr)->wclast = runp;
				1812	*eptr = runp;
				1813	if (eptr == &e)
				1814	wchead_table_add (&collate->wcheads, runp->wcs[0], e);
				1815	dont_insertwc:
				1816	;
				1817	}
				1818
				1819	/* Up to the next entry. */
				1820	runp = runp->next;
				1821	}
				1822
				1823	/* Now determine whether the UNDEFINED entry is needed and if yes,
				1824	whether it was defined. */
				1825	collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
				1826	if (collate->undefined.file == NULL)
				1827	{
				1828	if (need_undefined)
				1829	{
				1830	/* This seems not to be enforced by recent standards. Don't
				1831	emit an error, simply append UNDEFINED at the end. */
				1832	if (0)
				1833	WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
				1834
				1835	/* Add UNDEFINED at the end. */
				1836	collate->undefined.mborder =
				1837	(int ) obstack_alloc (&collate->mempool, nrules sizeof (int));
				1838
				1839	for (i = 0; i < nrules; ++i)
				1840	collate->undefined.mborder[i] = mbact[i]++;
				1841	}
				1842
				1843	/* In any case we will need the definition for the wide character
				1844	case. But we will not complain that it is missing since the
				1845	specification strangely enough does not seem to account for
				1846	this. */
				1847	collate->undefined.wcorder = wcact++;
				1848	}
				1849
				1850	/* Finally, try to unify the rules for the sections. Whenever the rules
				1851	for a section are the same as those for another section give the
				1852	ruleset the same index. Since there are never many section we can
				1853	use an O(n^2) algorithm here. */
				1854	sect = collate->sections;
				1855	while (sect != NULL && sect->rules == NULL)
				1856	sect = sect->next;
				1857
				1858	/* Bail out if we have no sections because of earlier errors. */
				1859	if (sect == NULL)
				1860	{
				1861	WITH_CUR_LOCALE (error (EXIT_FAILURE, 0,
				1862	_("too many errors; giving up")));
				1863	return;
				1864	}
				1865
				1866	ruleidx = 0;
				1867	do
				1868	{
				1869	struct section_list *osect = collate->sections;
				1870
				1871	while (osect != sect)
				1872	if (osect->rules != NULL
				1873	&& memcmp (osect->rules, sect->rules,
				1874	nrules * sizeof (osect->rules[0])) == 0)
				1875	break;
				1876	else
				1877	osect = osect->next;
				1878
				1879	if (osect == sect)
				1880	sect->ruleidx = ruleidx++;
				1881	else
				1882	sect->ruleidx = osect->ruleidx;
				1883
				1884	/* Next section. */
				1885	do
				1886	sect = sect->next;
				1887	while (sect != NULL && sect->rules == NULL);
				1888	}
				1889	while (sect != NULL);
				1890	/* We are currently not prepared for more than 128 rulesets. But this
				1891	should never really be a problem. */
				1892	assert (ruleidx <= 128);
				1893	}
				1894
				1895
				1896	static int32_t
				1897	output_weight (struct obstack pool, struct locale_collate_t collate,
				1898	struct element_t *elem)
				1899	{
				1900	size_t cnt;
				1901	int32_t retval;
				1902
				1903	/* Optimize the use of UNDEFINED. */
				1904	if (elem == &collate->undefined)
				1905	/* The weights are already inserted. */
				1906	return 0;
				1907
				1908	/* This byte can start exactly one collation element and this is
				1909	a single byte. We can directly give the index to the weights. */
				1910	retval = obstack_object_size (pool);
				1911
				1912	/* Construct the weight. */
				1913	for (cnt = 0; cnt < nrules; ++cnt)
				1914	{
				1915	char buf[elem->weights[cnt].cnt * 7];
				1916	int len = 0;
				1917	int i;
				1918
				1919	for (i = 0; i < elem->weights[cnt].cnt; ++i)
				1920	/* Encode the weight value. We do nothing for IGNORE entries. */
				1921	if (elem->weights[cnt].w[i] != NULL)
				1922	len += utf8_encode (&buf[len],
				1923	elem->weights[cnt].w[i]->mborder[cnt]);
				1924
				1925	/* And add the buffer content. */
				1926	obstack_1grow (pool, len);
				1927	obstack_grow (pool, buf, len);
				1928	}
				1929
				1930	return retval \| ((elem->section->ruleidx & 0x7f) << 24);
				1931	}
				1932
				1933
				1934	static int32_t
				1935	output_weightwc (struct obstack pool, struct locale_collate_t collate,
				1936	struct element_t *elem)
				1937	{
				1938	size_t cnt;
				1939	int32_t retval;
				1940
				1941	/* Optimize the use of UNDEFINED. */
				1942	if (elem == &collate->undefined)
				1943	/* The weights are already inserted. */
				1944	return 0;
				1945
				1946	/* This byte can start exactly one collation element and this is
				1947	a single byte. We can directly give the index to the weights. */
				1948	retval = obstack_object_size (pool) / sizeof (int32_t);
				1949
				1950	/* Construct the weight. */
				1951	for (cnt = 0; cnt < nrules; ++cnt)
				1952	{
				1953	int32_t buf[elem->weights[cnt].cnt];
				1954	int i;
				1955	int32_t j;
				1956
				1957	for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
				1958	if (elem->weights[cnt].w[i] != NULL)
				1959	buf[j++] = elem->weights[cnt].w[i]->wcorder;
				1960
				1961	/* And add the buffer content. */
				1962	obstack_int32_grow (pool, j);
				1963
				1964	obstack_grow (pool, buf, j * sizeof (int32_t));
				1965	maybe_swap_uint32_obstack (pool, j);
				1966	}
				1967
				1968	return retval \| ((elem->section->ruleidx & 0x7f) << 24);
				1969	}
				1970
				1971	/* If localedef is every threaded, this would need to be __thread var. */
				1972	static struct
				1973	{
				1974	struct obstack *weightpool;
				1975	struct obstack *extrapool;
				1976	struct obstack *indpool;
				1977	struct locale_collate_t *collate;
				1978	struct collidx_table *tablewc;
				1979	} atwc;
				1980
				1981	static void add_to_tablewc (uint32_t ch, struct element_t *runp);
				1982
				1983	static void
				1984	add_to_tablewc (uint32_t ch, struct element_t *runp)
				1985	{
				1986	if (runp->wcnext == NULL && runp->nwcs == 1)
				1987	{
				1988	int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
				1989	runp);
				1990	collidx_table_add (atwc.tablewc, ch, weigthidx);
				1991	}
				1992	else
				1993	{
				1994	/* As for the singlebyte table, we recognize sequences and
				1995	compress them. */
				1996
				1997	collidx_table_add (atwc.tablewc, ch,
				1998	-(obstack_object_size (atwc.extrapool)
				1999	/ sizeof (uint32_t)));
				2000
				2001	do
				2002	{
				2003	/* Store the current index in the weight table. We know that
				2004	the current position in the `extrapool' is aligned on a
				2005	32-bit address. */
				2006	int32_t weightidx;
				2007	int added;
				2008
				2009	/* Find out wether this is a single entry or we have more than
				2010	one consecutive entry. */
				2011	if (runp->wcnext != NULL
				2012	&& runp->nwcs == runp->wcnext->nwcs
				2013	&& wmemcmp ((wchar_t *) runp->wcs,
				2014	(wchar_t *)runp->wcnext->wcs,
				2015	runp->nwcs - 1) == 0
				2016	&& (runp->wcs[runp->nwcs - 1]
				2017	== runp->wcnext->wcs[runp->nwcs - 1] + 1))
				2018	{
				2019	int i;
				2020	struct element_t *series_startp = runp;
				2021	struct element_t *curp;
				2022
				2023	/* Now add first the initial byte sequence. */
				2024	added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
				2025	if (sizeof (int32_t) == sizeof (int))
				2026	obstack_make_room (atwc.extrapool, added);
				2027
				2028	/* More than one consecutive entry. We mark this by having
				2029	a negative index into the indirect table. */
				2030	obstack_int32_grow_fast (atwc.extrapool,
				2031	-(obstack_object_size (atwc.indpool)
				2032	/ sizeof (int32_t)));
				2033	obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
				2034
				2035	do
				2036	runp = runp->wcnext;
				2037	while (runp->wcnext != NULL
				2038	&& runp->nwcs == runp->wcnext->nwcs
				2039	&& wmemcmp ((wchar_t *) runp->wcs,
				2040	(wchar_t *)runp->wcnext->wcs,
				2041	runp->nwcs - 1) == 0
				2042	&& (runp->wcs[runp->nwcs - 1]
				2043	== runp->wcnext->wcs[runp->nwcs - 1] + 1));
				2044
				2045	/* Now walk backward from here to the beginning. */
				2046	curp = runp;
				2047
				2048	for (i = 1; i < runp->nwcs; ++i)
				2049	obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
				2050
				2051	/* Now find the end of the consecutive sequence and
				2052	add all the indeces in the indirect pool. */
				2053	do
				2054	{
				2055	weightidx = output_weightwc (atwc.weightpool, atwc.collate,
				2056	curp);
				2057	obstack_int32_grow (atwc.indpool, weightidx);
				2058
				2059	curp = curp->wclast;
				2060	}
				2061	while (curp != series_startp);
				2062
				2063	/* Add the final weight. */
				2064	weightidx = output_weightwc (atwc.weightpool, atwc.collate,
				2065	curp);
				2066	obstack_int32_grow (atwc.indpool, weightidx);
				2067
				2068	/* And add the end byte sequence. Without length this
				2069	time. */
				2070	for (i = 1; i < curp->nwcs; ++i)
				2071	obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
				2072	}
				2073	else
				2074	{
				2075	/* A single entry. Simply add the index and the length and
				2076	string (except for the first character which is already
				2077	tested for). */
				2078	int i;
				2079
				2080	/* Output the weight info. */
				2081	weightidx = output_weightwc (atwc.weightpool, atwc.collate,
				2082	runp);
				2083
				2084	assert (runp->nwcs > 0);
				2085	added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
				2086	if (sizeof (int) == sizeof (int32_t))
				2087	obstack_make_room (atwc.extrapool, added);
				2088
				2089	obstack_int32_grow_fast (atwc.extrapool, weightidx);
				2090	obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
				2091	for (i = 1; i < runp->nwcs; ++i)
				2092	obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
				2093	}
				2094
				2095	/* Next entry. */
				2096	runp = runp->wcnext;
				2097	}
				2098	while (runp != NULL);
				2099	}
				2100	}
				2101
				2102	void
				2103	collate_output (struct localedef_t locale, const struct charmap_t charmap,
				2104	const char *output_path)
				2105	{
				2106	struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
				2107	const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
				2108	struct locale_file file;
				2109	size_t ch;
				2110	int32_t tablemb[256];
				2111	struct obstack weightpool;
				2112	struct obstack extrapool;
				2113	struct obstack indirectpool;
				2114	struct section_list *sect;
				2115	struct collidx_table tablewc;
				2116	uint32_t elem_size;
				2117	uint32_t *elem_table;
				2118	int i;
				2119	struct element_t *runp;
				2120
				2121	init_locale_data (&file, nelems);
				2122	add_locale_uint32 (&file, nrules);
				2123
				2124	/* If we have no LC_COLLATE data emit only the number of rules as zero. */
				2125	if (collate == NULL)
				2126	{
				2127	size_t idx;
				2128	for (idx = 1; idx < nelems; idx++)
				2129	{
				2130	/* The words have to be handled specially. */
				2131	if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
				2132	add_locale_uint32 (&file, 0);
				2133	else
				2134	add_locale_empty (&file);
				2135	}
				2136	write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
				2137	return;
				2138	}
				2139
				2140	obstack_init (&weightpool);
				2141	obstack_init (&extrapool);
				2142	obstack_init (&indirectpool);
				2143
				2144	/* Since we are using the sign of an integer to mark indirection the
				2145	offsets in the arrays we are indirectly referring to must not be
				2146	zero since -0 == 0. Therefore we add a bit of dummy content. */
				2147	obstack_int32_grow (&extrapool, 0);
				2148	obstack_int32_grow (&indirectpool, 0);
				2149
				2150	/* Prepare the ruleset table. */
				2151	for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
				2152	if (sect->rules != NULL && sect->ruleidx == i)
				2153	{
				2154	int j;
				2155
				2156	obstack_make_room (&weightpool, nrules);
				2157
				2158	for (j = 0; j < nrules; ++j)
				2159	obstack_1grow_fast (&weightpool, sect->rules[j]);
				2160	++i;
				2161	}
				2162	/* And align the output. */
				2163	i = (nrules * i) % LOCFILE_ALIGN;
				2164	if (i > 0)
				2165	do
				2166	obstack_1grow (&weightpool, '\0');
				2167	while (++i < LOCFILE_ALIGN);
				2168
				2169	add_locale_raw_obstack (&file, &weightpool);
				2170
				2171	/* Generate the 8-bit table. Walk through the lists of sequences
				2172	starting with the same byte and add them one after the other to
				2173	the table. In case we have more than one sequence starting with
				2174	the same byte we have to use extra indirection.
				2175
				2176	First add a record for the NUL byte. This entry will never be used
				2177	so it does not matter. */
				2178	tablemb[0] = 0;
				2179
				2180	/* Now insert the `UNDEFINED' value if it is used. Since this value
				2181	will probably be used more than once it is good to store the
				2182	weights only once. */
				2183	if (collate->undefined.used_in_level != 0)
				2184	output_weight (&weightpool, collate, &collate->undefined);
				2185
				2186	for (ch = 1; ch < 256; ++ch)
				2187	if (collate->mbheads[ch]->mbnext == NULL
				2188	&& collate->mbheads[ch]->nmbs <= 1)
				2189	{
				2190	tablemb[ch] = output_weight (&weightpool, collate,
				2191	collate->mbheads[ch]);
				2192	}
				2193	else
				2194	{
				2195	/* The entries in the list are sorted by length and then
				2196	alphabetically. This is the order in which we will add the
				2197	elements to the collation table. This allows simply walking
				2198	the table in sequence and stopping at the first matching
				2199	entry. Since the longer sequences are coming first in the
				2200	list they have the possibility to match first, just as it
				2201	has to be. In the worst case we are walking to the end of
				2202	the list where we put, if no singlebyte sequence is defined
				2203	in the locale definition, the weights for UNDEFINED.
				2204
				2205	To reduce the length of the search list we compress them a bit.
				2206	This happens by collecting sequences of consecutive byte
				2207	sequences in one entry (having and begin and end byte sequence)
				2208	and add only one index into the weight table. We can find the
				2209	consecutive entries since they are also consecutive in the list. */
				2210	struct element_t *runp = collate->mbheads[ch];
				2211	struct element_t *lastp;
				2212
				2213	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
				2214
				2215	tablemb[ch] = -obstack_object_size (&extrapool);
				2216
				2217	do
				2218	{
				2219	/* Store the current index in the weight table. We know that
				2220	the current position in the `extrapool' is aligned on a
				2221	32-bit address. */
				2222	int32_t weightidx;
				2223	int added;
				2224
				2225	/* Find out wether this is a single entry or we have more than
				2226	one consecutive entry. */
				2227	if (runp->mbnext != NULL
				2228	&& runp->nmbs == runp->mbnext->nmbs
				2229	&& memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
				2230	&& (runp->mbs[runp->nmbs - 1]
				2231	== runp->mbnext->mbs[runp->nmbs - 1] + 1))
				2232	{
				2233	int i;
				2234	struct element_t *series_startp = runp;
				2235	struct element_t *curp;
				2236
				2237	/* Compute how much space we will need. */
				2238	added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
				2239	+ 2 * (runp->nmbs - 1));
				2240	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
				2241	obstack_make_room (&extrapool, added);
				2242
				2243	/* More than one consecutive entry. We mark this by having
				2244	a negative index into the indirect table. */
				2245	obstack_int32_grow_fast (&extrapool,
				2246	-(obstack_object_size (&indirectpool)
				2247	/ sizeof (int32_t)));
				2248
				2249	/* Now search first the end of the series. */
				2250	do
				2251	runp = runp->mbnext;
				2252	while (runp->mbnext != NULL
				2253	&& runp->nmbs == runp->mbnext->nmbs
				2254	&& memcmp (runp->mbs, runp->mbnext->mbs,
				2255	runp->nmbs - 1) == 0
				2256	&& (runp->mbs[runp->nmbs - 1]
				2257	== runp->mbnext->mbs[runp->nmbs - 1] + 1));
				2258
				2259	/* Now walk backward from here to the beginning. */
				2260	curp = runp;
				2261
				2262	assert (runp->nmbs <= 256);
				2263	obstack_1grow_fast (&extrapool, curp->nmbs - 1);
				2264	for (i = 1; i < curp->nmbs; ++i)
				2265	obstack_1grow_fast (&extrapool, curp->mbs[i]);
				2266
				2267	/* Now find the end of the consecutive sequence and
				2268	add all the indeces in the indirect pool. */
				2269	do
				2270	{
				2271	weightidx = output_weight (&weightpool, collate, curp);
				2272	obstack_int32_grow (&indirectpool, weightidx);
				2273
				2274	curp = curp->mblast;
				2275	}
				2276	while (curp != series_startp);
				2277
				2278	/* Add the final weight. */
				2279	weightidx = output_weight (&weightpool, collate, curp);
				2280	obstack_int32_grow (&indirectpool, weightidx);
				2281
				2282	/* And add the end byte sequence. Without length this
				2283	time. */
				2284	for (i = 1; i < curp->nmbs; ++i)
				2285	obstack_1grow_fast (&extrapool, curp->mbs[i]);
				2286	}
				2287	else
				2288	{
				2289	/* A single entry. Simply add the index and the length and
				2290	string (except for the first character which is already
				2291	tested for). */
				2292	int i;
				2293
				2294	/* Output the weight info. */
				2295	weightidx = output_weight (&weightpool, collate, runp);
				2296
				2297	added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
				2298	+ runp->nmbs - 1);
				2299	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
				2300	obstack_make_room (&extrapool, added);
				2301
				2302	obstack_int32_grow_fast (&extrapool, weightidx);
				2303	assert (runp->nmbs <= 256);
				2304	obstack_1grow_fast (&extrapool, runp->nmbs - 1);
				2305
				2306	for (i = 1; i < runp->nmbs; ++i)
				2307	obstack_1grow_fast (&extrapool, runp->mbs[i]);
				2308	}
				2309
				2310	/* Add alignment bytes if necessary. */
				2311	while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
				2312	obstack_1grow_fast (&extrapool, '\0');
				2313
				2314	/* Next entry. */
				2315	lastp = runp;
				2316	runp = runp->mbnext;
				2317	}
				2318	while (runp != NULL);
				2319
				2320	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
				2321
				2322	/* If the final entry in the list is not a single character we
				2323	add an UNDEFINED entry here. */
				2324	if (lastp->nmbs != 1)
				2325	{
				2326	int added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + 1);
				2327	obstack_make_room (&extrapool, added);
				2328
				2329	obstack_int32_grow_fast (&extrapool, 0);
				2330	/* XXX What rule? We just pick the first. */
				2331	obstack_1grow_fast (&extrapool, 0);
				2332	/* Length is zero. */
				2333	obstack_1grow_fast (&extrapool, 0);
				2334
				2335	/* Add alignment bytes if necessary. */
				2336	while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
				2337	obstack_1grow_fast (&extrapool, '\0');
				2338	}
				2339	}
				2340
				2341	/* Add padding to the tables if necessary. */
				2342	while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool)))
				2343	obstack_1grow (&weightpool, 0);
				2344
				2345	/* Now add the four tables. */
				2346	add_locale_uint32_array (&file, (const uint32_t *) tablemb, 256);
				2347	add_locale_raw_obstack (&file, &weightpool);
				2348	add_locale_raw_obstack (&file, &extrapool);
				2349	add_locale_raw_obstack (&file, &indirectpool);
				2350
				2351	/* Now the same for the wide character table. We need to store some
				2352	more information here. */
				2353	add_locale_empty (&file);
				2354	add_locale_empty (&file);
				2355	add_locale_empty (&file);
				2356
				2357	/* Since we are using the sign of an integer to mark indirection the
				2358	offsets in the arrays we are indirectly referring to must not be
				2359	zero since -0 == 0. Therefore we add a bit of dummy content. */
				2360	obstack_int32_grow (&extrapool, 0);
				2361	obstack_int32_grow (&indirectpool, 0);
				2362
				2363	/* Now insert the `UNDEFINED' value if it is used. Since this value
				2364	will probably be used more than once it is good to store the
				2365	weights only once. */
				2366	if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
				2367	abort ();
				2368
				2369	/* Generate the table. Walk through the lists of sequences starting
				2370	with the same wide character and add them one after the other to
				2371	the table. In case we have more than one sequence starting with
				2372	the same byte we have to use extra indirection. */
				2373	tablewc.p = 6;
				2374	tablewc.q = 10;
				2375	collidx_table_init (&tablewc);
				2376
				2377	atwc.weightpool = &weightpool;
				2378	atwc.extrapool = &extrapool;
				2379	atwc.indpool = &indirectpool;
				2380	atwc.collate = collate;
				2381	atwc.tablewc = &tablewc;
				2382
				2383	wchead_table_iterate (&collate->wcheads, add_to_tablewc);
				2384
				2385	memset (&atwc, 0, sizeof (atwc));
				2386
				2387	/* Now add the four tables. */
				2388	add_locale_collidx_table (&file, &tablewc);
				2389	add_locale_raw_obstack (&file, &weightpool);
				2390	add_locale_raw_obstack (&file, &extrapool);
				2391	add_locale_raw_obstack (&file, &indirectpool);
				2392
				2393	/* Finally write the table with collation element names out. It is
				2394	a hash table with a simple function which gets the name of the
				2395	character as the input. One character might have many names. The
				2396	value associated with the name is an index into the weight table
				2397	where we are then interested in the first-level weight value.
				2398
				2399	To determine how large the table should be we are counting the
				2400	elements have to put in. Since we are using internal chaining
				2401	using a secondary hash function we have to make the table a bit
				2402	larger to avoid extremely long search times. We can achieve
				2403	good results with a 40% larger table than there are entries. */
				2404	elem_size = 0;
				2405	runp = collate->start;
				2406	while (runp != NULL)
				2407	{
				2408	if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
				2409	/* Yep, the element really counts. */
				2410	++elem_size;
				2411
				2412	runp = runp->next;
				2413	}
				2414	/* Add 40% and find the next prime number. */
				2415	elem_size = next_prime (elem_size * 1.4);
				2416
				2417	/* Allocate the table. Each entry consists of two words: the hash
				2418	value and an index in a secondary table which provides the index
				2419	into the weight table and the string itself (so that a match can
				2420	be determined). */
				2421	elem_table = (uint32_t *) obstack_alloc (&extrapool,
				2422	elem_size * 2 * sizeof (uint32_t));
				2423	memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
				2424
				2425	/* Now add the elements. */
				2426	runp = collate->start;
				2427	while (runp != NULL)
				2428	{
				2429	if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
				2430	{
				2431	/* Compute the hash value of the name. */
				2432	uint32_t namelen = strlen (runp->name);
				2433	uint32_t hash = elem_hash (runp->name, namelen);
				2434	size_t idx = hash % elem_size;
				2435	#ifndef NDEBUG
				2436	size_t start_idx = idx;
				2437	#endif
				2438
				2439	if (elem_table[idx * 2] != 0)
				2440	{
				2441	/* The spot is already taken. Try iterating using the value
				2442	from the secondary hashing function. */
				2443	size_t iter = hash % (elem_size - 2) + 1;
				2444
				2445	do
				2446	{
				2447	idx += iter;
				2448	if (idx >= elem_size)
				2449	idx -= elem_size;
				2450	assert (idx != start_idx);
				2451	}
				2452	while (elem_table[idx * 2] != 0);
				2453	}
				2454	/* This is the spot where we will insert the value. */
				2455	elem_table[idx * 2] = hash;
				2456	elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
				2457
				2458	/* The string itself including length. */
				2459	obstack_1grow (&extrapool, namelen);
				2460	obstack_grow (&extrapool, runp->name, namelen);
				2461
				2462	/* And the multibyte representation. */
				2463	obstack_1grow (&extrapool, runp->nmbs);
				2464	obstack_grow (&extrapool, runp->mbs, runp->nmbs);
				2465
				2466	/* And align again to 32 bits. */
				2467	if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
				2468	obstack_grow (&extrapool, "\0\0",
				2469	(sizeof (int32_t)
				2470	- ((1 + namelen + 1 + runp->nmbs)
				2471	% sizeof (int32_t))));
				2472
				2473	/* Now some 32-bit values: multibyte collation sequence,
				2474	wide char string (including length), and wide char
				2475	collation sequence. */
				2476	obstack_int32_grow (&extrapool, runp->mbseqorder);
				2477
				2478	obstack_int32_grow (&extrapool, runp->nwcs);
				2479	obstack_grow (&extrapool, runp->wcs,
				2480	runp->nwcs * sizeof (uint32_t));
				2481	maybe_swap_uint32_obstack (&extrapool, runp->nwcs);
				2482
				2483	obstack_int32_grow (&extrapool, runp->wcseqorder);
				2484	}
				2485
				2486	runp = runp->next;
				2487	}
				2488
				2489	/* Prepare to write out this data. */
				2490	add_locale_uint32 (&file, elem_size);
				2491	add_locale_uint32_array (&file, elem_table, 2 * elem_size);
				2492	add_locale_raw_obstack (&file, &extrapool);
				2493	add_locale_raw_data (&file, collate->mbseqorder, 256);
				2494	add_locale_collseq_table (&file, &collate->wcseqorder);
				2495	add_locale_string (&file, charmap->code_set_name);
				2496	write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
				2497
				2498	obstack_free (&weightpool, NULL);
				2499	obstack_free (&extrapool, NULL);
				2500	obstack_free (&indirectpool, NULL);
				2501	}
				2502
				2503
				2504	static enum token_t
				2505	skip_to (struct linereader ldfile, struct locale_collate_t collate,
				2506	const struct charmap_t *charmap, int to_endif)
				2507	{
				2508	while (1)
				2509	{
				2510	struct token *now = lr_token (ldfile, charmap, NULL, NULL, 0);
				2511	enum token_t nowtok = now->tok;
				2512
				2513	if (nowtok == tok_eof \|\| nowtok == tok_end)
				2514	return nowtok;
				2515
				2516	if (nowtok == tok_ifdef \|\| nowtok == tok_ifndef)
				2517	{
				2518	lr_error (ldfile, _("%s: nested conditionals not supported"),
				2519	"LC_COLLATE");
				2520	nowtok = skip_to (ldfile, collate, charmap, tok_endif);
				2521	if (nowtok == tok_eof \|\| nowtok == tok_end)
				2522	return nowtok;
				2523	}
				2524	else if (nowtok == tok_endif \|\| (!to_endif && nowtok == tok_else))
				2525	{
				2526	lr_ignore_rest (ldfile, 1);
				2527	return nowtok;
				2528	}
				2529	else if (!to_endif && (nowtok == tok_elifdef \|\| nowtok == tok_elifndef))
				2530	{
				2531	/* Do not read the rest of the line. */
				2532	return nowtok;
				2533	}
				2534	else if (nowtok == tok_else)
				2535	{
				2536	lr_error (ldfile, _("%s: more than one 'else'"), "LC_COLLATE");
				2537	}
				2538
				2539	lr_ignore_rest (ldfile, 0);
				2540	}
				2541	}
				2542
				2543
				2544	void
				2545	collate_read (struct linereader ldfile, struct localedef_t result,
				2546	const struct charmap_t charmap, const char repertoire_name,
				2547	int ignore_content)
				2548	{
				2549	struct repertoire_t *repertoire = NULL;
				2550	struct locale_collate_t *collate;
				2551	struct token *now;
				2552	struct token *arg = NULL;
				2553	enum token_t nowtok;
				2554	enum token_t was_ellipsis = tok_none;
				2555	struct localedef_t *copy_locale = NULL;
				2556	/* Parsing state:
				2557	0 - start
				2558	1 - between `order-start' and `order-end'
				2559	2 - after `order-end'
				2560	3 - after `reorder-after', waiting for `reorder-end'
				2561	4 - after `reorder-end'
				2562	5 - after `reorder-sections-after', waiting for `reorder-sections-end'
				2563	6 - after `reorder-sections-end'
				2564	*/
				2565	int state = 0;
				2566
				2567	/* Get the repertoire we have to use. */
				2568	if (repertoire_name != NULL)
				2569	repertoire = repertoire_read (repertoire_name);
				2570
				2571	/* The rest of the line containing `LC_COLLATE' must be free. */
				2572	lr_ignore_rest (ldfile, 1);
				2573
				2574	while (1)
				2575	{
				2576	do
				2577	{
				2578	now = lr_token (ldfile, charmap, result, NULL, verbose);
				2579	nowtok = now->tok;
				2580	}
				2581	while (nowtok == tok_eol);
				2582
				2583	if (nowtok != tok_define)
				2584	break;
				2585
				2586	if (ignore_content)
				2587	lr_ignore_rest (ldfile, 0);
				2588	else
				2589	{
				2590	arg = lr_token (ldfile, charmap, result, NULL, verbose);
				2591	if (arg->tok != tok_ident)
				2592	SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
				2593	else
				2594	{
				2595	/* Simply add the new symbol. */
				2596	struct name_list newsym = xmalloc (sizeof (newsym)
				2597	+ arg->val.str.lenmb + 1);
				2598	memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
				2599	newsym->str[arg->val.str.lenmb] = '\0';
				2600	newsym->next = defined;
				2601	defined = newsym;
				2602
				2603	lr_ignore_rest (ldfile, 1);
				2604	}
				2605	}
				2606	}
				2607
				2608	if (nowtok == tok_copy)
				2609	{
				2610	now = lr_token (ldfile, charmap, result, NULL, verbose);
				2611	if (now->tok != tok_string)
				2612	{
				2613	SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
				2614
				2615	skip_category:
				2616	do
				2617	now = lr_token (ldfile, charmap, result, NULL, verbose);
				2618	while (now->tok != tok_eof && now->tok != tok_end);
				2619
				2620	if (now->tok != tok_eof
				2621	\|\| (now = lr_token (ldfile, charmap, result, NULL, verbose),
				2622	now->tok == tok_eof))
				2623	lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
				2624	else if (now->tok != tok_lc_collate)
				2625	{
				2626	lr_error (ldfile, _("\
				2627	%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
				2628	lr_ignore_rest (ldfile, 0);
				2629	}
				2630	else
				2631	lr_ignore_rest (ldfile, 1);
				2632
				2633	return;
				2634	}
				2635
				2636	if (! ignore_content)
				2637	{
				2638	/* Get the locale definition. */
				2639	copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
				2640	repertoire_name, charmap, NULL);
				2641	if ((copy_locale->avail & COLLATE_LOCALE) == 0)
				2642	{
				2643	/* Not yet loaded. So do it now. */
				2644	if (locfile_read (copy_locale, charmap) != 0)
				2645	goto skip_category;
				2646	}
				2647
				2648	if (copy_locale->categories[LC_COLLATE].collate == NULL)
				2649	return;
				2650	}
				2651
				2652	lr_ignore_rest (ldfile, 1);
				2653
				2654	now = lr_token (ldfile, charmap, result, NULL, verbose);
				2655	nowtok = now->tok;
				2656	}
				2657
				2658	/* Prepare the data structures. */
				2659	collate_startup (ldfile, result, copy_locale, ignore_content);
				2660	collate = result->categories[LC_COLLATE].collate;
				2661
				2662	while (1)
				2663	{
				2664	char ucs4buf[10];
				2665	char *symstr;
				2666	size_t symlen;
				2667
				2668	/* Of course we don't proceed beyond the end of file. */
				2669	if (nowtok == tok_eof)
				2670	break;
				2671
				2672	/* Ingore empty lines. */
				2673	if (nowtok == tok_eol)
				2674	{
				2675	now = lr_token (ldfile, charmap, result, NULL, verbose);
				2676	nowtok = now->tok;
				2677	continue;
				2678	}
				2679
				2680	switch (nowtok)
				2681	{
				2682	case tok_copy:
				2683	/* Allow copying other locales. */
				2684	now = lr_token (ldfile, charmap, result, NULL, verbose);
				2685	if (now->tok != tok_string)
				2686	goto err_label;
				2687
				2688	if (! ignore_content)
				2689	load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
				2690	charmap, result);
				2691
				2692	lr_ignore_rest (ldfile, 1);
				2693	break;
				2694
				2695	case tok_coll_weight_max:
				2696	/* Ignore the rest of the line if we don't need the input of
				2697	this line. */
				2698	if (ignore_content)
				2699	{
				2700	lr_ignore_rest (ldfile, 0);
				2701	break;
				2702	}
				2703
				2704	if (state != 0)
				2705	goto err_label;
				2706
				2707	arg = lr_token (ldfile, charmap, result, NULL, verbose);
				2708	if (arg->tok != tok_number)
				2709	goto err_label;
				2710	if (collate->col_weight_max != -1)
				2711	lr_error (ldfile, _("%s: duplicate definition of `%s'"),
				2712	"LC_COLLATE", "col_weight_max");
				2713	else
				2714	collate->col_weight_max = arg->val.num;
				2715	lr_ignore_rest (ldfile, 1);
				2716	break;
				2717
				2718	case tok_section_symbol:
				2719	/* Ignore the rest of the line if we don't need the input of
				2720	this line. */
				2721	if (ignore_content)
				2722	{
				2723	lr_ignore_rest (ldfile, 0);
				2724	break;
				2725	}
				2726
				2727	if (state != 0)
				2728	goto err_label;
				2729
				2730	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				2731	if (arg->tok != tok_bsymbol)
				2732	goto err_label;
				2733	else if (!ignore_content)
				2734	{
				2735	/* Check whether this section is already known. */
				2736	struct section_list *known = collate->sections;
				2737	while (known != NULL)
				2738	{
				2739	if (strcmp (known->name, arg->val.str.startmb) == 0)
				2740	break;
				2741	known = known->next;
				2742	}
				2743
				2744	if (known != NULL)
				2745	{
				2746	lr_error (ldfile,
				2747	_("%s: duplicate declaration of section `%s'"),
				2748	"LC_COLLATE", arg->val.str.startmb);
				2749	free (arg->val.str.startmb);
				2750	}
				2751	else
				2752	collate->sections = make_seclist_elem (collate,
				2753	arg->val.str.startmb,
				2754	collate->sections);
				2755
				2756	lr_ignore_rest (ldfile, known == NULL);
				2757	}
				2758	else
				2759	{
				2760	free (arg->val.str.startmb);
				2761	lr_ignore_rest (ldfile, 0);
				2762	}
				2763	break;
				2764
				2765	case tok_collating_element:
				2766	/* Ignore the rest of the line if we don't need the input of
				2767	this line. */
				2768	if (ignore_content)
				2769	{
				2770	lr_ignore_rest (ldfile, 0);
				2771	break;
				2772	}
				2773
				2774	if (state != 0 && state != 2)
				2775	goto err_label;
				2776
				2777	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				2778	if (arg->tok != tok_bsymbol)
				2779	goto err_label;
				2780	else
				2781	{
				2782	const char *symbol = arg->val.str.startmb;
				2783	size_t symbol_len = arg->val.str.lenmb;
				2784
				2785	/* Next the `from' keyword. */
				2786	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				2787	if (arg->tok != tok_from)
				2788	{
				2789	free ((char *) symbol);
				2790	goto err_label;
				2791	}
				2792
				2793	ldfile->return_widestr = 1;
				2794	ldfile->translate_strings = 1;
				2795
				2796	/* Finally the string with the replacement. */
				2797	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				2798
				2799	ldfile->return_widestr = 0;
				2800	ldfile->translate_strings = 0;
				2801
				2802	if (arg->tok != tok_string)
				2803	goto err_label;
				2804
				2805	if (!ignore_content && symbol != NULL)
				2806	{
				2807	/* The name is already defined. */
				2808	if (check_duplicate (ldfile, collate, charmap,
				2809	repertoire, symbol, symbol_len))
				2810	goto col_elem_free;
				2811
				2812	if (arg->val.str.startmb != NULL)
				2813	insert_entry (&collate->elem_table, symbol, symbol_len,
				2814	new_element (collate,
				2815	arg->val.str.startmb,
				2816	arg->val.str.lenmb - 1,
				2817	arg->val.str.startwc,
				2818	symbol, symbol_len, 0));
				2819	}
				2820	else
				2821	{
				2822	col_elem_free:
				2823	free ((char *) symbol);
				2824	free (arg->val.str.startmb);
				2825	free (arg->val.str.startwc);
				2826	}
				2827	lr_ignore_rest (ldfile, 1);
				2828	}
				2829	break;
				2830
				2831	case tok_collating_symbol:
				2832	/* Ignore the rest of the line if we don't need the input of
				2833	this line. */
				2834	if (ignore_content)
				2835	{
				2836	lr_ignore_rest (ldfile, 0);
				2837	break;
				2838	}
				2839
				2840	if (state != 0 && state != 2)
				2841	goto err_label;
				2842
				2843	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				2844	if (arg->tok != tok_bsymbol)
				2845	goto err_label;
				2846	else
				2847	{
				2848	char *symbol = arg->val.str.startmb;
				2849	size_t symbol_len = arg->val.str.lenmb;
				2850	char *endsymbol = NULL;
				2851	size_t endsymbol_len = 0;
				2852	enum token_t ellipsis = tok_none;
				2853
				2854	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				2855	if (arg->tok == tok_ellipsis2 \|\| arg->tok == tok_ellipsis4)
				2856	{
				2857	ellipsis = arg->tok;
				2858
				2859	arg = lr_token (ldfile, charmap, result, repertoire,
				2860	verbose);
				2861	if (arg->tok != tok_bsymbol)
				2862	{
				2863	free (symbol);
				2864	goto err_label;
				2865	}
				2866
				2867	endsymbol = arg->val.str.startmb;
				2868	endsymbol_len = arg->val.str.lenmb;
				2869
				2870	lr_ignore_rest (ldfile, 1);
				2871	}
				2872	else if (arg->tok != tok_eol)
				2873	{
				2874	free (symbol);
				2875	goto err_label;
				2876	}
				2877
				2878	if (!ignore_content)
				2879	{
				2880	if (symbol == NULL
				2881	\|\| (ellipsis != tok_none && endsymbol == NULL))
				2882	{
				2883	lr_error (ldfile, _("\
				2884	%s: unknown character in collating symbol name"),
				2885	"LC_COLLATE");
				2886	goto col_sym_free;
				2887	}
				2888	else if (ellipsis == tok_none)
				2889	{
				2890	/* A single symbol, no ellipsis. */
				2891	if (check_duplicate (ldfile, collate, charmap,
				2892	repertoire, symbol, symbol_len))
				2893	/* The name is already defined. */
				2894	goto col_sym_free;
				2895
				2896	insert_entry (&collate->sym_table, symbol, symbol_len,
				2897	new_symbol (collate, symbol, symbol_len));
				2898	}
				2899	else if (symbol_len != endsymbol_len)
				2900	{
				2901	col_sym_inv_range:
				2902	lr_error (ldfile,
				2903	_("invalid names for character range"));
				2904	goto col_sym_free;
				2905	}
				2906	else
				2907	{
				2908	/* Oh my, we have to handle an ellipsis. First, as
				2909	usual, determine the common prefix and then
				2910	convert the rest into a range. */
				2911	size_t prefixlen;
				2912	unsigned long int from;
				2913	unsigned long int to;
				2914	char *endp;
				2915
				2916	for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
				2917	if (symbol[prefixlen] != endsymbol[prefixlen])
				2918	break;
				2919
				2920	/* Convert the rest into numbers. */
				2921	symbol[symbol_len] = '\0';
				2922	from = strtoul (&symbol[prefixlen], &endp,
				2923	ellipsis == tok_ellipsis2 ? 16 : 10);
				2924	if (*endp != '\0')
				2925	goto col_sym_inv_range;
				2926
				2927	endsymbol[symbol_len] = '\0';
				2928	to = strtoul (&endsymbol[prefixlen], &endp,
				2929	ellipsis == tok_ellipsis2 ? 16 : 10);
				2930	if (*endp != '\0')
				2931	goto col_sym_inv_range;
				2932
				2933	if (from > to)
				2934	goto col_sym_inv_range;
				2935
				2936	/* Now loop over all entries. */
				2937	while (from <= to)
				2938	{
				2939	char *symbuf;
				2940
				2941	symbuf = (char *) obstack_alloc (&collate->mempool,
				2942	symbol_len + 1);
				2943
				2944	/* Create the name. */
				2945	sprintf (symbuf,
				2946	ellipsis == tok_ellipsis2
				2947	? "%.s%.lX" : "%.s%.lu",
				2948	(int) prefixlen, symbol,
				2949	(int) (symbol_len - prefixlen), from);
				2950
				2951	if (check_duplicate (ldfile, collate, charmap,
				2952	repertoire, symbuf, symbol_len))
				2953	/* The name is already defined. */
				2954	goto col_sym_free;
				2955
				2956	insert_entry (&collate->sym_table, symbuf,
				2957	symbol_len,
				2958	new_symbol (collate, symbuf,
				2959	symbol_len));
				2960
				2961	/* Increment the counter. */
				2962	++from;
				2963	}
				2964
				2965	goto col_sym_free;
				2966	}
				2967	}
				2968	else
				2969	{
				2970	col_sym_free:
				2971	free (symbol);
				2972	free (endsymbol);
				2973	}
				2974	}
				2975	break;
				2976
				2977	case tok_symbol_equivalence:
				2978	/* Ignore the rest of the line if we don't need the input of
				2979	this line. */
				2980	if (ignore_content)
				2981	{
				2982	lr_ignore_rest (ldfile, 0);
				2983	break;
				2984	}
				2985
				2986	if (state != 0)
				2987	goto err_label;
				2988
				2989	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				2990	if (arg->tok != tok_bsymbol)
				2991	goto err_label;
				2992	else
				2993	{
				2994	const char *newname = arg->val.str.startmb;
				2995	size_t newname_len = arg->val.str.lenmb;
				2996	const char *symname;
				2997	size_t symname_len;
				2998	void symval; / Actually struct symbol_t* */
				2999
				3000	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				3001	if (arg->tok != tok_bsymbol)
				3002	{
				3003	free ((char *) newname);
				3004	goto err_label;
				3005	}
				3006
				3007	symname = arg->val.str.startmb;
				3008	symname_len = arg->val.str.lenmb;
				3009
				3010	if (newname == NULL)
				3011	{
				3012	lr_error (ldfile, _("\
				3013	%s: unknown character in equivalent definition name"),
				3014	"LC_COLLATE");
				3015
				3016	sym_equiv_free:
				3017	free ((char *) newname);
				3018	free ((char *) symname);
				3019	break;
				3020	}
				3021	if (symname == NULL)
				3022	{
				3023	lr_error (ldfile, _("\
				3024	%s: unknown character in equivalent definition value"),
				3025	"LC_COLLATE");
				3026	goto sym_equiv_free;
				3027	}
				3028
				3029	/* See whether the symbol name is already defined. */
				3030	if (find_entry (&collate->sym_table, symname, symname_len,
				3031	&symval) != 0)
				3032	{
				3033	lr_error (ldfile, _("\
				3034	%s: unknown symbol `%s' in equivalent definition"),
				3035	"LC_COLLATE", symname);
				3036	goto sym_equiv_free;
				3037	}
				3038
				3039	if (insert_entry (&collate->sym_table,
				3040	newname, newname_len, symval) < 0)
				3041	{
				3042	lr_error (ldfile, _("\
				3043	error while adding equivalent collating symbol"));
				3044	goto sym_equiv_free;
				3045	}
				3046
				3047	free ((char *) symname);
				3048	}
				3049	lr_ignore_rest (ldfile, 1);
				3050	break;
				3051
				3052	case tok_script:
				3053	/* Ignore the rest of the line if we don't need the input of
				3054	this line. */
				3055	if (ignore_content)
				3056	{
				3057	lr_ignore_rest (ldfile, 0);
				3058	break;
				3059	}
				3060
				3061	/* We get told about the scripts we know. */
				3062	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				3063	if (arg->tok != tok_bsymbol)
				3064	goto err_label;
				3065	else
				3066	{
				3067	struct section_list *runp = collate->known_sections;
				3068	char *name;
				3069
				3070	while (runp != NULL)
				3071	if (strncmp (runp->name, arg->val.str.startmb,
				3072	arg->val.str.lenmb) == 0
				3073	&& runp->name[arg->val.str.lenmb] == '\0')
				3074	break;
				3075	else
				3076	runp = runp->def_next;
				3077
				3078	if (runp != NULL)
				3079	{
				3080	lr_error (ldfile, _("duplicate definition of script `%s'"),
				3081	runp->name);
				3082	lr_ignore_rest (ldfile, 0);
				3083	break;
				3084	}
				3085
				3086	runp = (struct section_list ) xcalloc (1, sizeof (runp));
				3087	name = (char *) xmalloc (arg->val.str.lenmb + 1);
				3088	memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
				3089	name[arg->val.str.lenmb] = '\0';
				3090	runp->name = name;
				3091
				3092	runp->def_next = collate->known_sections;
				3093	collate->known_sections = runp;
				3094	}
				3095	lr_ignore_rest (ldfile, 1);
				3096	break;
				3097
				3098	case tok_order_start:
				3099	/* Ignore the rest of the line if we don't need the input of
				3100	this line. */
				3101	if (ignore_content)
				3102	{
				3103	lr_ignore_rest (ldfile, 0);
				3104	break;
				3105	}
				3106
				3107	if (state != 0 && state != 1 && state != 2)
				3108	goto err_label;
				3109	state = 1;
				3110
				3111	/* The 14652 draft does not specify whether all `order_start' lines
				3112	must contain the same number of sort-rules, but 14651 does. So
				3113	we require this here as well. */
				3114	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				3115	if (arg->tok == tok_bsymbol)
				3116	{
				3117	/* This better should be a section name. */
				3118	struct section_list *sp = collate->known_sections;
				3119	while (sp != NULL
				3120	&& (sp->name == NULL
				3121	\|\| strncmp (sp->name, arg->val.str.startmb,
				3122	arg->val.str.lenmb) != 0
				3123	\|\| sp->name[arg->val.str.lenmb] != '\0'))
				3124	sp = sp->def_next;
				3125
				3126	if (sp == NULL)
				3127	{
				3128	lr_error (ldfile, _("\
				3129	%s: unknown section name `%.*s'"),
				3130	"LC_COLLATE", (int) arg->val.str.lenmb,
				3131	arg->val.str.startmb);
				3132	/* We use the error section. */
				3133	collate->current_section = &collate->error_section;
				3134
				3135	if (collate->error_section.first == NULL)
				3136	{
				3137	/* Insert &collate->error_section at the end of
				3138	the collate->sections list. */
				3139	if (collate->sections == NULL)
				3140	collate->sections = &collate->error_section;
				3141	else
				3142	{
				3143	sp = collate->sections;
				3144	while (sp->next != NULL)
				3145	sp = sp->next;
				3146
				3147	sp->next = &collate->error_section;
				3148	}
				3149	collate->error_section.next = NULL;
				3150	}
				3151	}
				3152	else
				3153	{
				3154	/* One should not be allowed to open the same
				3155	section twice. */
				3156	if (sp->first != NULL)
				3157	lr_error (ldfile, _("\
				3158	%s: multiple order definitions for section `%s'"),
				3159	"LC_COLLATE", sp->name);
				3160	else
				3161	{
				3162	/* Insert sp in the collate->sections list,
				3163	right after collate->current_section. */
				3164	if (collate->current_section != NULL)
				3165	{
				3166	sp->next = collate->current_section->next;
				3167	collate->current_section->next = sp;
				3168	}
				3169	else if (collate->sections == NULL)
				3170	/* This is the first section to be defined. */
				3171	collate->sections = sp;
				3172
				3173	collate->current_section = sp;
				3174	}
				3175
				3176	/* Next should come the end of the line or a semicolon. */
				3177	arg = lr_token (ldfile, charmap, result, repertoire,
				3178	verbose);
				3179	if (arg->tok == tok_eol)
				3180	{
				3181	uint32_t cnt;
				3182
				3183	/* This means we have exactly one rule: `forward'. */
				3184	if (nrules > 1)
				3185	lr_error (ldfile, _("\
				3186	%s: invalid number of sorting rules"),
				3187	"LC_COLLATE");
				3188	else
				3189	nrules = 1;
				3190	sp->rules = obstack_alloc (&collate->mempool,
				3191	(sizeof (enum coll_sort_rule)
				3192	* nrules));
				3193	for (cnt = 0; cnt < nrules; ++cnt)
				3194	sp->rules[cnt] = sort_forward;
				3195
				3196	/* Next line. */
				3197	break;
				3198	}
				3199
				3200	/* Get the next token. */
				3201	arg = lr_token (ldfile, charmap, result, repertoire,
				3202	verbose);
				3203	}
				3204	}
				3205	else
				3206	{
				3207	/* There is no section symbol. Therefore we use the unnamed
				3208	section. */
				3209	collate->current_section = &collate->unnamed_section;
				3210
				3211	if (collate->unnamed_section_defined)
				3212	lr_error (ldfile, _("\
				3213	%s: multiple order definitions for unnamed section"),
				3214	"LC_COLLATE");
				3215	else
				3216	{
				3217	/* Insert &collate->unnamed_section at the beginning of
				3218	the collate->sections list. */
				3219	collate->unnamed_section.next = collate->sections;
				3220	collate->sections = &collate->unnamed_section;
				3221	collate->unnamed_section_defined = true;
				3222	}
				3223	}
				3224
				3225	/* Now read the direction names. */
				3226	read_directions (ldfile, arg, charmap, repertoire, result);
				3227
				3228	/* From now we need the strings untranslated. */
				3229	ldfile->translate_strings = 0;
				3230	break;
				3231
				3232	case tok_order_end:
				3233	/* Ignore the rest of the line if we don't need the input of
				3234	this line. */
				3235	if (ignore_content)
				3236	{
				3237	lr_ignore_rest (ldfile, 0);
				3238	break;
				3239	}
				3240
				3241	if (state != 1)
				3242	goto err_label;
				3243
				3244	/* Handle ellipsis at end of list. */
				3245	if (was_ellipsis != tok_none)
				3246	{
				3247	handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
				3248	repertoire, result);
				3249	was_ellipsis = tok_none;
				3250	}
				3251
				3252	state = 2;
				3253	lr_ignore_rest (ldfile, 1);
				3254	break;
				3255
				3256	case tok_reorder_after:
				3257	/* Ignore the rest of the line if we don't need the input of
				3258	this line. */
				3259	if (ignore_content)
				3260	{
				3261	lr_ignore_rest (ldfile, 0);
				3262	break;
				3263	}
				3264
				3265	if (state == 1)
				3266	{
				3267	lr_error (ldfile, _("%s: missing `order_end' keyword"),
				3268	"LC_COLLATE");
				3269	state = 2;
				3270
				3271	/* Handle ellipsis at end of list. */
				3272	if (was_ellipsis != tok_none)
				3273	{
				3274	handle_ellipsis (ldfile, arg->val.str.startmb,
				3275	arg->val.str.lenmb, was_ellipsis, charmap,
				3276	repertoire, result);
				3277	was_ellipsis = tok_none;
				3278	}
				3279	}
				3280	else if (state == 0 && copy_locale == NULL)
				3281	goto err_label;
				3282	else if (state != 0 && state != 2 && state != 3)
				3283	goto err_label;
				3284	state = 3;
				3285
				3286	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				3287	if (arg->tok == tok_bsymbol \|\| arg->tok == tok_ucs4)
				3288	{
				3289	/* Find this symbol in the sequence table. */
				3290	char ucsbuf[10];
				3291	char *startmb;
				3292	size_t lenmb;
				3293	struct element_t *insp;
				3294	int no_error = 1;
				3295	void *ptr;
				3296
				3297	if (arg->tok == tok_bsymbol)
				3298	{
				3299	startmb = arg->val.str.startmb;
				3300	lenmb = arg->val.str.lenmb;
				3301	}
				3302	else
				3303	{
				3304	sprintf (ucsbuf, "U%08X", arg->val.ucs4);
				3305	startmb = ucsbuf;
				3306	lenmb = 9;
				3307	}
				3308
				3309	if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0)
				3310	/* Yes, the symbol exists. Simply point the cursor
				3311	to it. */
				3312	collate->cursor = (struct element_t *) ptr;
				3313	else
				3314	{
				3315	struct symbol_t *symbp;
				3316	void *ptr;
				3317
				3318	if (find_entry (&collate->sym_table, startmb, lenmb,
				3319	&ptr) == 0)
				3320	{
				3321	symbp = ptr;
				3322
				3323	if (symbp->order->last != NULL
				3324	\|\| symbp->order->next != NULL)
				3325	collate->cursor = symbp->order;
				3326	else
				3327	{
				3328	/* This is a collating symbol but its position
				3329	is not yet defined. */
				3330	lr_error (ldfile, _("\
				3331	%s: order for collating symbol %.*s not yet defined"),
				3332	"LC_COLLATE", (int) lenmb, startmb);
				3333	collate->cursor = NULL;
				3334	no_error = 0;
				3335	}
				3336	}
				3337	else if (find_entry (&collate->elem_table, startmb, lenmb,
				3338	&ptr) == 0)
				3339	{
				3340	insp = (struct element_t *) ptr;
				3341
				3342	if (insp->last != NULL \|\| insp->next != NULL)
				3343	collate->cursor = insp;
				3344	else
				3345	{
				3346	/* This is a collating element but its position
				3347	is not yet defined. */
				3348	lr_error (ldfile, _("\
				3349	%s: order for collating element %.*s not yet defined"),
				3350	"LC_COLLATE", (int) lenmb, startmb);
				3351	collate->cursor = NULL;
				3352	no_error = 0;
				3353	}
				3354	}
				3355	else
				3356	{
				3357	/* This is bad. The symbol after which we have to
				3358	insert does not exist. */
				3359	lr_error (ldfile, _("\
				3360	%s: cannot reorder after %.*s: symbol not known"),
				3361	"LC_COLLATE", (int) lenmb, startmb);
				3362	collate->cursor = NULL;
				3363	no_error = 0;
				3364	}
				3365	}
				3366
				3367	lr_ignore_rest (ldfile, no_error);
				3368	}
				3369	else
				3370	/* This must not happen. */
				3371	goto err_label;
				3372	break;
				3373
				3374	case tok_reorder_end:
				3375	/* Ignore the rest of the line if we don't need the input of
				3376	this line. */
				3377	if (ignore_content)
				3378	break;
				3379
				3380	if (state != 3)
				3381	goto err_label;
				3382	state = 4;
				3383	lr_ignore_rest (ldfile, 1);
				3384	break;
				3385
				3386	case tok_reorder_sections_after:
				3387	/* Ignore the rest of the line if we don't need the input of
				3388	this line. */
				3389	if (ignore_content)
				3390	{
				3391	lr_ignore_rest (ldfile, 0);
				3392	break;
				3393	}
				3394
				3395	if (state == 1)
				3396	{
				3397	lr_error (ldfile, _("%s: missing `order_end' keyword"),
				3398	"LC_COLLATE");
				3399	state = 2;
				3400
				3401	/* Handle ellipsis at end of list. */
				3402	if (was_ellipsis != tok_none)
				3403	{
				3404	handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
				3405	repertoire, result);
				3406	was_ellipsis = tok_none;
				3407	}
				3408	}
				3409	else if (state == 3)
				3410	{
				3411	WITH_CUR_LOCALE (error (0, 0, _("\
				3412	%s: missing `reorder-end' keyword"), "LC_COLLATE"));
				3413	state = 4;
				3414	}
				3415	else if (state != 2 && state != 4)
				3416	goto err_label;
				3417	state = 5;
				3418
				3419	/* Get the name of the sections we are adding after. */
				3420	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
				3421	if (arg->tok == tok_bsymbol)
				3422	{
				3423	/* Now find a section with this name. */
				3424	struct section_list *runp = collate->sections;
				3425
				3426	while (runp != NULL)
				3427	{
				3428	if (runp->name != NULL
				3429	&& strlen (runp->name) == arg->val.str.lenmb
				3430	&& memcmp (runp->name, arg->val.str.startmb,
				3431	arg->val.str.lenmb) == 0)
				3432	break;
				3433
				3434	runp = runp->next;
				3435	}
				3436
				3437	if (runp != NULL)
				3438	collate->current_section = runp;
				3439	else
				3440	{
				3441	/* This is bad. The section after which we have to
				3442	reorder does not exist. Therefore we cannot
				3443	process the whole rest of this reorder
				3444	specification. */
				3445	lr_error (ldfile, _("%s: section `%.*s' not known"),
				3446	"LC_COLLATE", (int) arg->val.str.lenmb,
				3447	arg->val.str.startmb);
				3448
				3449	do
				3450	{
				3451	lr_ignore_rest (ldfile, 0);
				3452
				3453	now = lr_token (ldfile, charmap, result, NULL, verbose);
				3454	}
				3455	while (now->tok == tok_reorder_sections_after
				3456	\|\| now->tok == tok_reorder_sections_end
				3457	\|\| now->tok == tok_end);
				3458
				3459	/* Process the token we just saw. */
				3460	nowtok = now->tok;
				3461	continue;
				3462	}
				3463	}
				3464	else
				3465	/* This must not happen. */
				3466	goto err_label;
				3467	break;
				3468
				3469	case tok_reorder_sections_end:
				3470	/* Ignore the rest of the line if we don't need the input of
				3471	this line. */
				3472	if (ignore_content)
				3473	break;
				3474
				3475	if (state != 5)
				3476	goto err_label;
				3477	state = 6;
				3478	lr_ignore_rest (ldfile, 1);
				3479	break;
				3480
				3481	case tok_bsymbol:
				3482	case tok_ucs4:
				3483	/* Ignore the rest of the line if we don't need the input of
				3484	this line. */
				3485	if (ignore_content)
				3486	{
				3487	lr_ignore_rest (ldfile, 0);
				3488	break;
				3489	}
				3490
				3491	if (state != 0 && state != 1 && state != 3 && state != 5)
				3492	goto err_label;
				3493
				3494	if ((state == 0 \|\| state == 5) && nowtok == tok_ucs4)
				3495	goto err_label;
				3496
				3497	if (nowtok == tok_ucs4)
				3498	{
				3499	snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
				3500	symstr = ucs4buf;
				3501	symlen = 9;
				3502	}
				3503	else if (arg != NULL)
				3504	{
				3505	symstr = arg->val.str.startmb;
				3506	symlen = arg->val.str.lenmb;
				3507	}
				3508	else
				3509	{
				3510	lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
				3511	(int) ldfile->token.val.str.lenmb,
				3512	ldfile->token.val.str.startmb);
				3513	break;
				3514	}
				3515
				3516	struct element_t *seqp;
				3517	if (state == 0)
				3518	{
				3519	/* We are outside an `order_start' region. This means
				3520	we must only accept definitions of values for
				3521	collation symbols since these are purely abstract
				3522	values and don't need directions associated. */
				3523	void *ptr;
				3524
				3525	if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
				3526	{
				3527	seqp = ptr;
				3528
				3529	/* It's already defined. First check whether this
				3530	is really a collating symbol. */
				3531	if (seqp->is_character)
				3532	goto err_label;
				3533
				3534	goto move_entry;
				3535	}
				3536	else
				3537	{
				3538	void *result;
				3539
				3540	if (find_entry (&collate->sym_table, symstr, symlen,
				3541	&result) != 0)
				3542	/* No collating symbol, it's an error. */
				3543	goto err_label;
				3544
				3545	/* Maybe this is the first time we define a symbol
				3546	value and it is before the first actual section. */
				3547	if (collate->sections == NULL)
				3548	collate->sections = collate->current_section =
				3549	&collate->symbol_section;
				3550	}
				3551
				3552	if (was_ellipsis != tok_none)
				3553	{
				3554	handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
				3555	charmap, repertoire, result);
				3556
				3557	/* Remember that we processed the ellipsis. */
				3558	was_ellipsis = tok_none;
				3559
				3560	/* And don't add the value a second time. */
				3561	break;
				3562	}
				3563	}
				3564	else if (state == 3)
				3565	{
				3566	/* It is possible that we already have this collation sequence.
				3567	In this case we move the entry. */
				3568	void *sym;
				3569	void *ptr;
				3570
				3571	/* If the symbol after which we have to insert was not found
				3572	ignore all entries. */
				3573	if (collate->cursor == NULL)
				3574	{
				3575	lr_ignore_rest (ldfile, 0);
				3576	break;
				3577	}
				3578
				3579	if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
				3580	{
				3581	seqp = (struct element_t *) ptr;
				3582	goto move_entry;
				3583	}
				3584
				3585	if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
				3586	&& (seqp = ((struct symbol_t *) sym)->order) != NULL)
				3587	goto move_entry;
				3588
				3589	if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0
				3590	&& (seqp = (struct element_t *) ptr,
				3591	seqp->last != NULL \|\| seqp->next != NULL
				3592	\|\| (collate->start != NULL && seqp == collate->start)))
				3593	{
				3594	move_entry:
				3595	/* Remove the entry from the old position. */
				3596	if (seqp->last == NULL)
				3597	collate->start = seqp->next;
				3598	else
				3599	seqp->last->next = seqp->next;
				3600	if (seqp->next != NULL)
				3601	seqp->next->last = seqp->last;
				3602
				3603	/* We also have to check whether this entry is the
				3604	first or last of a section. */
				3605	if (seqp->section->first == seqp)
				3606	{
				3607	if (seqp->section->first == seqp->section->last)
				3608	/* This section has no content anymore. */
				3609	seqp->section->first = seqp->section->last = NULL;
				3610	else
				3611	seqp->section->first = seqp->next;
				3612	}
				3613	else if (seqp->section->last == seqp)
				3614	seqp->section->last = seqp->last;
				3615
				3616	/* Now insert it in the new place. */
				3617	insert_weights (ldfile, seqp, charmap, repertoire, result,
				3618	tok_none);
				3619	break;
				3620	}
				3621
				3622	/* Otherwise we just add a new entry. */
				3623	}
				3624	else if (state == 5)
				3625	{
				3626	/* We are reordering sections. Find the named section. */
				3627	struct section_list *runp = collate->sections;
				3628	struct section_list *prevp = NULL;
				3629
				3630	while (runp != NULL)
				3631	{
				3632	if (runp->name != NULL
				3633	&& strlen (runp->name) == symlen
				3634	&& memcmp (runp->name, symstr, symlen) == 0)
				3635	break;
				3636
				3637	prevp = runp;
				3638	runp = runp->next;
				3639	}
				3640
				3641	if (runp == NULL)
				3642	{
				3643	lr_error (ldfile, _("%s: section `%.*s' not known"),
				3644	"LC_COLLATE", (int) symlen, symstr);
				3645	lr_ignore_rest (ldfile, 0);
				3646	}
				3647	else
				3648	{
				3649	if (runp != collate->current_section)
				3650	{
				3651	/* Remove the named section from the old place and
				3652	insert it in the new one. */
				3653	prevp->next = runp->next;
				3654
				3655	runp->next = collate->current_section->next;
				3656	collate->current_section->next = runp;
				3657	collate->current_section = runp;
				3658	}
				3659
				3660	/* Process the rest of the line which might change
				3661	the collation rules. */
				3662	arg = lr_token (ldfile, charmap, result, repertoire,
				3663	verbose);
				3664	if (arg->tok != tok_eof && arg->tok != tok_eol)
				3665	read_directions (ldfile, arg, charmap, repertoire,
				3666	result);
				3667	}
				3668	break;
				3669	}
				3670	else if (was_ellipsis != tok_none)
				3671	{
				3672	/* Using the information in the `ellipsis_weight'
				3673	element and this and the last value we have to handle
				3674	the ellipsis now. */
				3675	assert (state == 1);
				3676
				3677	handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
				3678	repertoire, result);
				3679
				3680	/* Remember that we processed the ellipsis. */
				3681	was_ellipsis = tok_none;
				3682
				3683	/* And don't add the value a second time. */
				3684	break;
				3685	}
				3686
				3687	/* Now insert in the new place. */
				3688	insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
				3689	break;
				3690
				3691	case tok_undefined:
				3692	/* Ignore the rest of the line if we don't need the input of
				3693	this line. */
				3694	if (ignore_content)
				3695	{
				3696	lr_ignore_rest (ldfile, 0);
				3697	break;
				3698	}
				3699
				3700	if (state != 1)
				3701	goto err_label;
				3702
				3703	if (was_ellipsis != tok_none)
				3704	{
				3705	lr_error (ldfile,
				3706	_("%s: cannot have `%s' as end of ellipsis range"),
				3707	"LC_COLLATE", "UNDEFINED");
				3708
				3709	unlink_element (collate);
				3710	was_ellipsis = tok_none;
				3711	}
				3712
				3713	/* See whether UNDEFINED already appeared somewhere. */
				3714	if (collate->undefined.next != NULL
				3715	\|\| &collate->undefined == collate->cursor)
				3716	{
				3717	lr_error (ldfile,
				3718	_("%s: order for `%.*s' already defined at %s:%Zu"),
				3719	"LC_COLLATE", 9, "UNDEFINED",
				3720	collate->undefined.file,
				3721	collate->undefined.line);
				3722	lr_ignore_rest (ldfile, 0);
				3723	}
				3724	else
				3725	/* Parse the weights. */
				3726	insert_weights (ldfile, &collate->undefined, charmap,
				3727	repertoire, result, tok_none);
				3728	break;
				3729
				3730	case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
				3731	case tok_ellipsis3: /* absolute ellipsis */
				3732	case tok_ellipsis4: /* symbolic decimal ellipsis */
				3733	/* This is the symbolic (decimal or hexadecimal) or absolute
				3734	ellipsis. */
				3735	if (was_ellipsis != tok_none)
				3736	goto err_label;
				3737
				3738	if (state != 0 && state != 1 && state != 3)
				3739	goto err_label;
				3740
				3741	was_ellipsis = nowtok;
				3742
				3743	insert_weights (ldfile, &collate->ellipsis_weight, charmap,
				3744	repertoire, result, nowtok);
				3745	break;
				3746
				3747	case tok_end:
				3748	seen_end:
				3749	/* Next we assume `LC_COLLATE'. */
				3750	if (!ignore_content)
				3751	{
				3752	if (state == 0 && copy_locale == NULL)
				3753	/* We must either see a copy statement or have
				3754	ordering values. */
				3755	lr_error (ldfile,
				3756	_("%s: empty category description not allowed"),
				3757	"LC_COLLATE");
				3758	else if (state == 1)
				3759	{
				3760	lr_error (ldfile, _("%s: missing `order_end' keyword"),
				3761	"LC_COLLATE");
				3762
				3763	/* Handle ellipsis at end of list. */
				3764	if (was_ellipsis != tok_none)
				3765	{
				3766	handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
				3767	repertoire, result);
				3768	was_ellipsis = tok_none;
				3769	}
				3770	}
				3771	else if (state == 3)
				3772	WITH_CUR_LOCALE (error (0, 0, _("\
				3773	%s: missing `reorder-end' keyword"), "LC_COLLATE"));
				3774	else if (state == 5)
				3775	WITH_CUR_LOCALE (error (0, 0, _("\
				3776	%s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
				3777	}
				3778	arg = lr_token (ldfile, charmap, result, NULL, verbose);
				3779	if (arg->tok == tok_eof)
				3780	break;
				3781	if (arg->tok == tok_eol)
				3782	lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
				3783	else if (arg->tok != tok_lc_collate)
				3784	lr_error (ldfile, _("\
				3785	%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
				3786	lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
				3787	return;
				3788
				3789	case tok_define:
				3790	if (ignore_content)
				3791	{
				3792	lr_ignore_rest (ldfile, 0);
				3793	break;
				3794	}
				3795
				3796	arg = lr_token (ldfile, charmap, result, NULL, verbose);
				3797	if (arg->tok != tok_ident)
				3798	goto err_label;
				3799
				3800	/* Simply add the new symbol. */
				3801	struct name_list newsym = xmalloc (sizeof (newsym)
				3802	+ arg->val.str.lenmb + 1);
				3803	memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
				3804	newsym->str[arg->val.str.lenmb] = '\0';
				3805	newsym->next = defined;
				3806	defined = newsym;
				3807
				3808	lr_ignore_rest (ldfile, 1);
				3809	break;
				3810
				3811	case tok_undef:
				3812	if (ignore_content)
				3813	{
				3814	lr_ignore_rest (ldfile, 0);
				3815	break;
				3816	}
				3817
				3818	arg = lr_token (ldfile, charmap, result, NULL, verbose);
				3819	if (arg->tok != tok_ident)
				3820	goto err_label;
				3821
				3822	/* Remove _all_ occurrences of the symbol from the list. */
				3823	struct name_list *prevdef = NULL;
				3824	struct name_list *curdef = defined;
				3825	while (curdef != NULL)
				3826	if (strncmp (arg->val.str.startmb, curdef->str,
				3827	arg->val.str.lenmb) == 0
				3828	&& curdef->str[arg->val.str.lenmb] == '\0')
				3829	{
				3830	if (prevdef == NULL)
				3831	defined = curdef->next;
				3832	else
				3833	prevdef->next = curdef->next;
				3834
				3835	struct name_list *olddef = curdef;
				3836	curdef = curdef->next;
				3837
				3838	free (olddef);
				3839	}
				3840	else
				3841	{
				3842	prevdef = curdef;
				3843	curdef = curdef->next;
				3844	}
				3845
				3846	lr_ignore_rest (ldfile, 1);
				3847	break;
				3848
				3849	case tok_ifdef:
				3850	case tok_ifndef:
				3851	if (ignore_content)
				3852	{
				3853	lr_ignore_rest (ldfile, 0);
				3854	break;
				3855	}
				3856
				3857	found_ifdef:
				3858	arg = lr_token (ldfile, charmap, result, NULL, verbose);
				3859	if (arg->tok != tok_ident)
				3860	goto err_label;
				3861	lr_ignore_rest (ldfile, 1);
				3862
				3863	if (collate->else_action == else_none)
				3864	{
				3865	curdef = defined;
				3866	while (curdef != NULL)
				3867	if (strncmp (arg->val.str.startmb, curdef->str,
				3868	arg->val.str.lenmb) == 0
				3869	&& curdef->str[arg->val.str.lenmb] == '\0')
				3870	break;
				3871	else
				3872	curdef = curdef->next;
				3873
				3874	if ((nowtok == tok_ifdef && curdef != NULL)
				3875	\|\| (nowtok == tok_ifndef && curdef == NULL))
				3876	{
				3877	/* We have to use the if-branch. */
				3878	collate->else_action = else_ignore;
				3879	}
				3880	else
				3881	{
				3882	/* We have to use the else-branch, if there is one. */
				3883	nowtok = skip_to (ldfile, collate, charmap, 0);
				3884	if (nowtok == tok_else)
				3885	collate->else_action = else_seen;
				3886	else if (nowtok == tok_elifdef)
				3887	{
				3888	nowtok = tok_ifdef;
				3889	goto found_ifdef;
				3890	}
				3891	else if (nowtok == tok_elifndef)
				3892	{
				3893	nowtok = tok_ifndef;
				3894	goto found_ifdef;
				3895	}
				3896	else if (nowtok == tok_eof)
				3897	goto seen_eof;
				3898	else if (nowtok == tok_end)
				3899	goto seen_end;
				3900	}
				3901	}
				3902	else
				3903	{
				3904	/* XXX Should it really become necessary to support nested
				3905	preprocessor handling we will push the state here. */
				3906	lr_error (ldfile, _("%s: nested conditionals not supported"),
				3907	"LC_COLLATE");
				3908	nowtok = skip_to (ldfile, collate, charmap, 1);
				3909	if (nowtok == tok_eof)
				3910	goto seen_eof;
				3911	else if (nowtok == tok_end)
				3912	goto seen_end;
				3913	}
				3914	break;
				3915
				3916	case tok_elifdef:
				3917	case tok_elifndef:
				3918	case tok_else:
				3919	if (ignore_content)
				3920	{
				3921	lr_ignore_rest (ldfile, 0);
				3922	break;
				3923	}
				3924
				3925	lr_ignore_rest (ldfile, 1);
				3926
				3927	if (collate->else_action == else_ignore)
				3928	{
				3929	/* Ignore everything until the endif. */
				3930	nowtok = skip_to (ldfile, collate, charmap, 1);
				3931	if (nowtok == tok_eof)
				3932	goto seen_eof;
				3933	else if (nowtok == tok_end)
				3934	goto seen_end;
				3935	}
				3936	else
				3937	{
				3938	assert (collate->else_action == else_none);
				3939	lr_error (ldfile, _("\
				3940	%s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
				3941	nowtok == tok_else ? "else"
				3942	: nowtok == tok_elifdef ? "elifdef" : "elifndef");
				3943	}
				3944	break;
				3945
				3946	case tok_endif:
				3947	if (ignore_content)
				3948	{
				3949	lr_ignore_rest (ldfile, 0);
				3950	break;
				3951	}
				3952
				3953	lr_ignore_rest (ldfile, 1);
				3954
				3955	if (collate->else_action != else_ignore
				3956	&& collate->else_action != else_seen)
				3957	lr_error (ldfile, _("\
				3958	%s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
				3959
				3960	/* XXX If we support nested preprocessor directives we pop
				3961	the state here. */
				3962	collate->else_action = else_none;
				3963	break;
				3964
				3965	default:
				3966	err_label:
				3967	SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
				3968	}
				3969
				3970	/* Prepare for the next round. */
				3971	now = lr_token (ldfile, charmap, result, NULL, verbose);
				3972	nowtok = now->tok;
				3973	}
				3974
				3975	seen_eof:
				3976	/* When we come here we reached the end of the file. */
				3977	lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
				3978	}