ap/build/uClibc/extra/locale/gen_collate.c - T106_DC - Gitiles

 /*
  * Usage:
  * gen_collate <INPUTDIR> [-o OUTPUTFILE] LOCALE ...
  *
  * Generate collation data from locales LOCALE.
  * Reads all LOCALE from INPUTDIR and writes collation data to OUTPUTFILE.
  *
  * The output file defaults to "locales_collate.h".
  */
 /* TODO:
  *
  * add UNDEFINED at end if not specified
  * convert POSITION -> FORWARD,POSITION
  *
  *
  * deal with lowercase in <Uhhhh>
  *
  * what about reorders that keep the same rule?
  *
  * remove "unused" collation elements? (probably doesn't save much)
  *
  * add_rule function ... returns index into rule table after possibly adding custom-indexed rule
  * but don't forget about multichar weights... replace with strings of indexes
  *
  */

 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdint.h>
 #include <stdarg.h>
 #include <limits.h>
 #include <ctype.h>
 #include <assert.h>
 #include <errno.h>
 #include <search.h>

 typedef struct {
 	char *name;					/*  */

 	int num_weights;			/*  */

 	int ii_shift;				/*  */
 	int ti_shift;				/*  */
 	int ii_len;					/*  */
 	int ti_len;					/*  */
 	int max_weight;				/*  */
 	int num_col_base;			/*  */
 	int max_col_index;			/*  */
 	int undefined_idx;			/*  */
 	int range_low;				/*  */
 	int range_count;			/* high - low */
 	int range_base_weight;		/*  */
 	int num_starters;			/*  */

 	int range_rule_offset;		/*  */
 	int wcs2colidt_offset;		/*  */
 	int index2weight_offset;	/*  */
 	int index2ruleidx_offset;	/*  */
 	int multistart_offset;		/*  */

 } base_locale_t;

 #define BASE_LOCALE_LEN 20
 static base_locale_t base_locale_array[BASE_LOCALE_LEN];
 static size_t base_locale_len;

 typedef struct {
 	char *name;					/*  */

 	int base_idx;				/*  */

 	int undefined_idx;			/*  */

 	int overrides_offset;		/*  */
 	int multistart_offset;		/*  */
 } der_locale_t;

 #define DER_LOCALE_LEN 300
 static der_locale_t der_locale_array[DER_LOCALE_LEN];
 static size_t der_locale_len;


 #define OVERRIDE_LEN  50000
 static uint16_t override_buffer[OVERRIDE_LEN];
 static size_t override_len;

 #define MULTISTART_LEN 10000
 static uint16_t multistart_buffer[MULTISTART_LEN];
 static size_t multistart_len;

 #define WCS2COLIDT_LEN 200000
 static uint16_t wcs2colidt_buffer[WCS2COLIDT_LEN];
 static size_t wcs2colidt_len;

 #define INDEX2WEIGHT_LEN 200000
 static uint16_t index2weight_buffer[INDEX2WEIGHT_LEN];
 static size_t index2weight_len;

 static uint16_t index2ruleidx_buffer[INDEX2WEIGHT_LEN];
 static size_t index2ruleidx_len;

 #define WEIGHTSTR_LEN 10000
 static uint16_t weightstr_buffer[WEIGHTSTR_LEN];
 static size_t weightstr_len;

 #define RULETABLE_LEN (1L<<16)
 static uint16_t ruletable_buffer[RULETABLE_LEN];
 static size_t ruletable_len;


 #define RANGE (0x10000UL)

 typedef uint16_t tbl_item;

 static uint16_t u16_buf[10000];
 static int u16_buf_len;
 static int u16_starter;

 typedef struct {
 	uint16_t ii_len;
 	uint16_t ti_len;
 	uint16_t ut_len;

 	unsigned char ii_shift;
 	unsigned char ti_shift;

 	tbl_item *ii;
 	tbl_item *ti;
 	tbl_item *ut;
 } table_data;


 static size_t newopt(tbl_item *ut, size_t usize, int shift, table_data *tbl);


 #define MAX_COLLATION_WEIGHTS 4

 #define MAX_FNO 1
 #define MAX_FILES  (MAX_FNO + 1)

 static FILE *fstack[MAX_FILES];
 static char *fname[MAX_FILES];
 static int lineno[MAX_FILES];
 static int fno = -1;

 static tbl_item wcs2index[RANGE];

 static char linebuf[1024];
 static char *pos;
 static char *pos_e = NULL;
 static char end_of_token = 0;		/* slot to save */

 #define IN_ORDER			0x01
 #define IN_REORDER			0x02
 #define IN_REORDER_SECTIONS	0x04
 static int order_state;
 static int cur_num_weights;		/* number of weights in current use */
 static char cur_rule[MAX_COLLATION_WEIGHTS];

 static int anonsection = 0;

 typedef struct ll_item_struct ll_item_t;

 struct ll_item_struct {
 	ll_item_t *next;
 	ll_item_t *prev;
 	void *data;
 	int data_type;
 	int idx;
 };

 static ll_item_t *reorder_section_ptr = NULL;
 static int superset;
 static int superset_order_start_cnt; /* only support one order for now */
 static int superset_in_sync;
 static ll_item_t *comm_cur_ptr;
 static ll_item_t *comm_prev_ptr;

 enum {
 	R_FORWARD =		0x01,
 	R_POSITION =	0x02,
 	R_BACKWARD =	0x04		/* must be largest in value */
 };

 typedef struct {
 	size_t num_weights;
 	char rule[MAX_COLLATION_WEIGHTS];
 	const char *colitem[MAX_COLLATION_WEIGHTS];
 } weight_t;

 static void *root_weight = NULL;
 size_t unique_weights = 0;

 typedef struct {
 	const char *symbol;
 	weight_t *weight;
 } weighted_item_t;

 typedef struct {
 	const char *symbol1;
 	const char *symbol2;
 	int length;
 	weight_t *weight;
 } range_item_t;

 typedef struct {
 	const char *name;
 	ll_item_t *itm_list;		/* weighted_item_t list .. circular!!! */
 	size_t num_items;
 	size_t num_rules;
 	char rules[MAX_COLLATION_WEIGHTS];
 } section_t;

 static section_t *cur_section = NULL;

 typedef struct {
 	const char *symbol;
 	ll_item_t *node;
 } wi_index_t;

 typedef struct col_locale_struct col_locale_t;

 struct  col_locale_struct {
 	char *name;
 	void *root_colitem;			/* all base and derived, or just derived */
 	void *root_element;
 	void *root_scripts;
 	void *root_wi_index;
 	void *root_wi_index_reordered;
 	ll_item_t *section_list;
 	col_locale_t *base_locale;	/* null if this is a base */
 	void *root_derived_wi;
 	ll_item_t *derived_list;
 	void *root_starter_char;
 	void *root_starter_all;
 	ll_item_t *undefined_idx;
 };

 typedef struct {
 	const char *symbol;
 	int idx;
 } col_index_t;

 static void *root_col_locale = NULL;

 typedef struct {
     const char *keyword;
     void (*handler)(void);
 } keyword_table_t;

 typedef struct {
     const char *string;
     const char *element;	/* NULL if collating symbol */
 } colitem_t;

 static col_locale_t *cur_base = NULL;
 static col_locale_t *cur_derived = NULL;
 static col_locale_t *cur_col = NULL;

 static void *root_sym = NULL;
 static size_t num_sym = 0;
 static size_t mem_sym = 0;

 static const char *inputdir;
 static size_t inputdir_len;
 static unsigned verbose = 0;
 enum {
 	VINFO = (1<<0),
 	VDETAIL = (1<<1),
 };

 static void error_msg(const char *fmt, ...) __attribute__ ((noreturn, format (printf, 1, 2)));
 static void *xmalloc(size_t n);
 static char *xsymdup(const char *s); /* only allocate once... store in a tree */
 static void pushfile(char *filename);
 static void popfile(void);
 static void processfile(void);
 static int iscommentchar(int);
 static void eatwhitespace(void);
 static int next_line(void);
 static char *next_token(void);
 static void do_unrecognized(void);
 static col_locale_t *new_col_locale(char *name);
 static ll_item_t *new_ll_item(int data_type, void *data);
 static weight_t *register_weight(weight_t *w);
 static size_t ll_len(ll_item_t *l);
 static size_t ll_count(ll_item_t *l, int mask);
 static void add_wi_index(ll_item_t *l);
 static size_t tnumnodes(const void *root);
 static ll_item_t *find_wi_index(const char *sym, col_locale_t *cl);
 static void mark_reordered(const char *sym);
 static ll_item_t *find_wi_index_reordered(const char *sym);
 static ll_item_t *next_comm_ptr(void);
 static ll_item_t *init_comm_ptr(void);
 static ll_item_t *find_ll_last(ll_item_t *p);
 static void dump_weights(const char *name);
 static void finalize_base(void);
 static int is_ucode(const char *s);
 static int sym_cmp(const void *n1, const void *n2);
 static void do_starter_lists(col_locale_t *cl);
 static void dump_base_locale(int n);
 static void dump_der_locale(int n);
 static void dump_collate(FILE *fp);

 enum {
 	DT_SECTION = 0x01,
 	DT_WEIGHTED = 0x02,
 	DT_REORDER = 0x04,		  /* a section to support reorder_after */
 	DT_COL_LOCALE = 0x08,
 	DT_RANGE = 0x10,
 };

 static int verbose_msg(const unsigned lvl, const char *fmt, ...)
 {
 	va_list arg;
 	int ret = 0;

 	if (verbose & lvl) {
 		va_start(arg, fmt);
 		ret = vfprintf(stderr, fmt, arg);
 		va_end(arg);
 	}
 	return ret;
 }
 static section_t *new_section(const char *name)
 {
 	section_t *p;
 	char buf[128];

 	p = xmalloc(sizeof(section_t));
 	if (!name) {				/* anonymous section */
 		name = buf;
 		snprintf(buf, sizeof(buf), "anon%05d", anonsection);
 		++anonsection;
 	} else if (*name != '<') {	/* reorder */
 		name = buf;
 		snprintf(buf, sizeof(buf), "%s %05d", cur_col->name, anonsection);
 		++anonsection;
 	}
 #warning devel code
 /* 	verbose_msg(VDETAIL, "section %s\n", name); */
 	p->name = xsymdup(name);
 	p->itm_list = NULL;
 	p->num_items = 0;
 	p->num_rules = 0;
 	memset(p->rules, 0, MAX_COLLATION_WEIGHTS);
 /* 	cur_num_weights = p->num_rules = 0; */
 /* 	memset(p->rules, 0, MAX_COLLATION_WEIGHTS); */
 /* 	memset(cur_rule, R_FORWARD, 4); */

 #warning devel code
 	if (*p->name == 'a') {
 		cur_num_weights = p->num_rules = 4;
 		memset(p->rules, R_FORWARD, 4);
 		memset(cur_rule, R_FORWARD, 4);
 		p->rules[3] |= R_POSITION;
 		cur_rule[3] |= R_POSITION;
 	}
 /* 	verbose_msg(VDETAIL, "new section %s -- cur_num_weights = %d\n", p->name, cur_num_weights); */

 	return p;
 }


 static void do_order_start(void);
 static void do_order_end(void);
 static void do_reorder_after(void);
 static void do_reorder_end(void);
 static void do_reorder_sections_after(void);
 static void do_reorder_sections_end(void);
 static void do_copy(void);
 static void do_colsym(void);
 static void do_colele(void);
 static void do_script(void);
 static void do_range(void);

 static col_locale_t *new_col_locale(char *name);
 static int colitem_cmp(const void *n1, const void *n2);
 static int colelement_cmp(const void *n1, const void *n2);
 static void del_colitem(colitem_t *p);
 static colitem_t *new_colitem(char *item, char *def);
 static void add_colitem(char *item, char *def);
 static void add_script(const char *s);
 static unsigned int add_rule(weighted_item_t *wi);
 static unsigned int add_range_rule(range_item_t *ri);

 static const keyword_table_t keyword_table[] = {
     { "collating-symbol", do_colsym },
     { "collating-element", do_colele },
 	{ "script", do_script },
     { "copy", do_copy },
     { "order_start", do_order_start },
     { "order_end", do_order_end },
     { "order-end", do_order_end },
     { "reorder-after", do_reorder_after },
     { "reorder-end", do_reorder_end },
     { "reorder-sections-after", do_reorder_sections_after },
     { "reorder-sections-end", do_reorder_sections_end },
 	{ "UCLIBC_RANGE", do_range },
     { NULL, do_unrecognized }
 };


 static void do_unrecognized(void)
 {
 #if 1
     error_msg("warning: unrecognized: %s", pos);
 #else
 /*     verbose_msg(VDETAIL, "warning: unrecognized initial keyword \"%s\"\n", pos); */
 	fprintf(stderr, "warning: unrecognized: %s", pos);
 	if (end_of_token) {
 		fprintf(stderr, "%c%s", end_of_token, pos_e+1);
 	}
 	fprintf(stderr, "\n");
 #endif
 }

 /* typedef struct { */
 /* 	const char *symbol1; */
 /* 	const char *symbol2; */
 /* 	int length; */
 /* 	weight_t *weight; */
 /* } range_item_t; */

 static void do_range(void)
 {
 	range_item_t *ri;
 	weight_t w;
 	int i;
 	char *s;
 	char *s1;
 	char *s2;
 	const char **ci;
 	ll_item_t *lli;

 	assert(!superset);
 	assert(order_state == IN_ORDER);

 	s1 = next_token();
 	if (!s1) {
 		error_msg("missing start of range");
 	}
 	if (!is_ucode(s1)) {
 		error_msg("start of range is not a ucode: %s", s1);
 	}
 	s1 = xsymdup(s1);

 	s2 = next_token();
 	if (!s2) {
 		error_msg("missing end of range");
 	}
 	if (!is_ucode(s2)) {
 		error_msg("end of range is not a ucode: %s", s2);
 	}
 	s2 = xsymdup(s2);

 	ri = (range_item_t *) xmalloc(sizeof(range_item_t));
 	ri->symbol1 = s1;
 	ri->symbol2 = s2;
 	ri->length = strtoul(s2+2, NULL, 16) - strtoul(s1+2, NULL, 16);
 	if (ri->length <= 0) {
 		error_msg("illegal range length %d", ri->length);
 	}

 	s = next_token();
 	w.num_weights = cur_num_weights;

 	for (i=0 ; i < cur_num_weights ; i++) {
 		w.rule[i] = cur_rule[i];
 	}
 	ci = w.colitem + (i-1);
 	/* now i == cur_num_weights */

 #define STR_DITTO "."

 	while (s && *s && i) {
 		--i;
 		if (*s == ';') {
 			ci[-i] = xsymdup(STR_DITTO);
 			if (*++s) {
 				continue;
 			}
 		}
 		if (*s) {
 			ci[-i] = xsymdup(s);
 		}
 		s = next_token();
 		if (s) {
 			if (*s == ';') {
 				++s;
 			} else if (i) {
 				error_msg("missing seperator");
 			}
 		}
 	}
 	if (s) {
 		error_msg("too many weights: %d %d |%s| %d", cur_num_weights, i, s, (int)*s);
 	}

 	while (i) {					/* missing weights are not an error */
 		--i;
 		ci[-i] = xsymdup(STR_DITTO);
 	}

 	ri->weight = register_weight(&w);

 /* 	if ((i = is_ucode(t)) != 0) { */
 /* 		assert(!t[i]); */
 /* 		add_colitem(t, NULL); */
 /* 	} */

 	lli = new_ll_item(DT_RANGE, ri);
 	if (!cur_section->itm_list) {
 /* 		printf("creating new item list: %s\n", wi->symbol); */
 		cur_section->itm_list = lli;
 		lli->prev = lli->next = lli;
 		++cur_section->num_items;
 	} else {
 		insque(lli, cur_section->itm_list->prev);
 /* 		printf("adding item to list: %d - %s\n", ll_len(cur_section->itm_list), wi->symbol); */
 		++cur_section->num_items;
 	}
 /* 	add_wi_index(lli); */


 }

 static weighted_item_t *add_weight(char *t)
 {
 	weighted_item_t *wi;
 	weight_t w;
 	int i;
 	char *s;
 	const char **ci;

 	t = xsymdup(t);

 	s = next_token();
 	w.num_weights = cur_num_weights;

 	for (i=0 ; i < cur_num_weights ; i++) {
 		w.rule[i] = cur_rule[i];
 	}
 	ci = w.colitem + (i-1);
 	/* now i == cur_num_weights */

 	while (s && *s && i) {
 		--i;
 		if (*s == ';') {
 			ci[-i] = xsymdup(STR_DITTO);
 			if (*++s) {
 				continue;
 			}
 		}
 		if (*s) {
 			if (!strcmp(s,t)) {
 				s = STR_DITTO;
 			}
 			ci[-i] = xsymdup(s);
 		}
 		s = next_token();
 		if (s) {
 			if (*s == ';') {
 				++s;
 			} else if (i) {
 				error_msg("missing seperator");
 			}
 		}
 	}
 	if (s) {
 		error_msg("too many weights: %d %d |%s| %d", cur_num_weights, i, s, (int)*s);
 	}

 	while (i) {					/* missing weights are not an error */
 		--i;
 		ci[-i] = xsymdup(STR_DITTO);
 	}

 	wi = xmalloc(sizeof(weighted_item_t));
 	wi->symbol = t;
 	wi->weight = register_weight(&w);

 	if ((i = is_ucode(t)) != 0) {
 		assert(!t[i]);
 		add_colitem(t, NULL);
 	}

 	return wi;
 }

 static void add_superset_weight(char *t)
 {
 	ll_item_t *lli;
 	weighted_item_t *wi;

 	if (!comm_cur_ptr
 		|| (strcmp(t, ((weighted_item_t *)(comm_cur_ptr->data))->symbol) != 0)
 		) {						/* now out of sync */
 		if (superset_in_sync) {	/* need a new section */
 			superset_in_sync = 0;

 			cur_section = new_section("R");
 			cur_num_weights = cur_section->num_rules
 				= ((section_t *)(cur_base->section_list->data))->num_rules;
 			memcpy(cur_rule,
 				   ((section_t *)(cur_base->section_list->data))->rules,
 				   MAX_COLLATION_WEIGHTS);
 			memcpy(cur_section->rules,
 				   ((section_t *)(cur_base->section_list->data))->rules,
 				   MAX_COLLATION_WEIGHTS);

 			insque(new_ll_item(DT_REORDER, cur_section), find_ll_last(cur_col->section_list));
 			assert(comm_prev_ptr);
 			lli = new_ll_item(DT_REORDER, cur_section);
 			lli->prev = lli->next = lli;
 			insque(lli, comm_prev_ptr);
 /* 			verbose_msg(VDETAIL, "  subsection -----------------------\n"); */
 		}

 /* 		verbose_msg(VDETAIL, "     %s   %s\n", t, ((weighted_item_t *)(comm_cur_ptr->data))->symbol); */
 		wi = add_weight(t);
 		lli = new_ll_item(DT_WEIGHTED, wi);
 		mark_reordered(wi->symbol);
 		/* 			printf("reorder: %s\n", t); */
 		if (!cur_section->itm_list) {
 			cur_section->itm_list = lli;
 			lli->prev = lli->next = lli;
 			++cur_section->num_items;
 		} else {
 			insque(lli, cur_section->itm_list->prev);
 			++cur_section->num_items;
 		}
 		add_wi_index(lli);

 	} else {					/* in sync */
 		superset_in_sync = 1;
 		next_comm_ptr();
 	}
 }

 static void do_weight(char *t)
 {
 	weighted_item_t *wi;
 	ll_item_t *lli;

 	if (superset) {
 		add_superset_weight(t);
 		return;
 	}

 	switch(order_state) {
 		case 0:
 /* 			fprintf(stdout, "no-order weight: %s\n", t); */
 /* 			break; */
 		case IN_ORDER:
 			/* in a section */
 /* 			fprintf(stdout, "weight: %s\n", t); */
 			wi = add_weight(t);
 			lli = new_ll_item(DT_WEIGHTED, wi);
 			if (!cur_section->itm_list) {
 /* 				fprintf(stdout, "creating new item list: %s  %s  %p\n", wi->symbol, cur_section->name, lli); */
 				cur_section->itm_list = lli;
 				lli->prev = lli->next = lli;
 				++cur_section->num_items;
 			} else {
 				insque(lli, cur_section->itm_list->prev);
 /* 				fprintf(stdout, "adding item to list: %d - %s  %p\n", ll_len(cur_section->itm_list), wi->symbol, lli); */
 				++cur_section->num_items;
 			}
 			add_wi_index(lli);
 			break;
 		case IN_REORDER:
 			/* std rule - but in a block with an insert-after pt */
 			wi = add_weight(t);
 			lli = new_ll_item(DT_WEIGHTED, wi);
 			mark_reordered(wi->symbol);
 /* 			fprintf(stdout, "reorder: %s  %s  %p\n", t, cur_section->name, lli); */
 			if (!cur_section->itm_list) {
 				cur_section->itm_list = lli;
 				lli->prev = lli->next = lli;
 				++cur_section->num_items;
 			} else {
 				insque(lli, cur_section->itm_list->prev);
 				++cur_section->num_items;
 			}
 			add_wi_index(lli);
 			break;
 		case IN_REORDER_SECTIONS:
 			t = xsymdup(t);
 			if (next_token() != NULL) {
 				error_msg("trailing text in reorder section item: %s", pos);
 			}
 			lli = cur_col->section_list;
 			do {
 				if (lli->data_type & DT_SECTION) {
 					if (!strcmp(((section_t *)(lli->data))->name, t)) {
 						lli->data_type = DT_REORDER;
 						lli = new_ll_item(DT_REORDER, (section_t *)(lli->data));
 						insque(lli, reorder_section_ptr);
 						reorder_section_ptr = lli;
 						return;
 					}
 				}
 				lli = lli->next;
 			} while (lli);
 			error_msg("reorder_sections_after for non-base item currently not supported: %s", t);
 /* 			fprintf(stdout, "reorder_secitons: %s\n", t); */
 			break;
 		default:
 			error_msg("invalid order_state %d", order_state);
 	}
 }

 static int col_locale_cmp(const void *n1, const void *n2)
 {
     return strcmp(((const col_locale_t *) n1)->name, ((const col_locale_t *) n2)->name);
 }

 static void processfile(void)
 {
 	char *t;
 	const keyword_table_t *k;

 	order_state = 0;
 #warning devel code
 /* 	cur_num_weights = 0; */
 /* 	cur_num_weights = 4; */
 /* 	memset(cur_rule, R_FORWARD, 4); */

 	if (cur_col != cur_base) {
 		cur_col->base_locale = cur_base;
 		cur_col->undefined_idx = cur_base->undefined_idx;
 		if (!cur_base->derived_list) {
 			cur_base->derived_list = new_ll_item(DT_COL_LOCALE, cur_col);
 		} else {
 			insque(new_ll_item(DT_COL_LOCALE, cur_col), find_ll_last(cur_base->derived_list));
 		}
 	}

 	if (tfind(cur_col, &root_col_locale, col_locale_cmp)) {
 		error_msg("attempt to read locale: %s", cur_col->name);
 	}
 	if (!tsearch(cur_col, &root_col_locale, col_locale_cmp)) {
 		error_msg("OUT OF MEMORY!");
 	}

 	if (superset) {
 		superset_order_start_cnt = 0;
 		superset_in_sync = 0;
 		init_comm_ptr();
 	}

 	while (next_line()) {
 /* 		printf("%5d:", lineno[fno]); */
 /* 		while ((t = next_token()) != NULL) { */
 /* 			printf(" |%s|", t); */
 /* 		printf("\n"); */
 /* 		} */
 		t = next_token();
 		assert(t);
 		assert(t == pos);
 		if ((*t == '<') || (!strcmp(t, "UNDEFINED"))) {
 			do_weight(t);
 		} else {
 			for (k = keyword_table ; k->keyword ; k++) {
 				if (!strcmp(k->keyword, t)) {
 					break;
 				}
 			}
 			k->handler();
 		}
 	}

 	if (cur_base == cur_col) {
 		verbose_msg(VDETAIL, "Base: %15s", cur_col->name);
 	} else {
 #if 1
 		if (!cur_col->undefined_idx) {
 #if 0
 			if (superset) {
 				if (superset_order_start_cnt == 1) {
 					--superset_order_start_cnt;	/* ugh.. hack this */
 				}
 			}
 #endif
 			/* This is an awful hack to get around the problem of unspecified UNDEFINED
 			 * definitions in the supported locales derived from iso14651_t1. */
 			if (!strcmp(cur_base->name, "iso14651_t1")) {
 				fprintf(stderr, "Warning: adding UNDEFINED entry for %s\n", cur_col->name);
 				strcpy(linebuf, "script <UNDEFINED_SECTION>\n");
 				pos_e = NULL;
 				pos = linebuf;
 				t = next_token();
 				assert(t);
 				assert(t == pos);
 				do_script();
 				strcpy(linebuf, "order_start <UNDEFINED_SECTION>;forward;backward;forward;forward,position\n");
 				pos_e = NULL;
 				pos = linebuf;
 				t = next_token();
 				assert(t);
 				assert(t == pos);
 				do_order_start();
 				strcpy(linebuf, "UNDEFINED IGNORE;IGNORE;IGNORE\n");
 				pos_e = NULL;
 				pos = linebuf;
 				t = next_token();
 				assert(t);
 				assert(t == pos);
 				do_weight(t);
 				strcpy(linebuf, "order_end\n");
 				pos_e = NULL;
 				pos = linebuf;
 				t = next_token();
 				assert(t);
 				assert(t == pos);
 				do_order_end();
 			} else {
 				error_msg("no definition of UNDEFINED for %s", cur_col->name);
 			}
 		}
 #endif

 		verbose_msg(VDETAIL, " Der: %15s", cur_col->name);
 	}
 	{
 #if 0
 		ll_item_t *p = cur_col->section_list;
 #endif
 		verbose_msg(VDETAIL, "%6u weights", tnumnodes(cur_col->root_wi_index));
 		if (cur_base) {
 			verbose_msg(VDETAIL, "  %6u der %6u reor %6u starter - %u new stubs",
 					tnumnodes(cur_base->root_derived_wi),
 					tnumnodes(cur_base->root_wi_index_reordered),
 					tnumnodes(cur_base->root_starter_char),
 					ll_count(cur_col->section_list, DT_REORDER));
 		}
 		verbose_msg(VDETAIL, "\n");

 #if 0
 		while (p) {
 			assert(((section_t *)(p->data))->num_items ==
 				   ll_len(((section_t *)(p->data))->itm_list));


 			if (!p->next &&
 				((*((section_t *)(p->data))->name == 'a')
 				 && (((section_t *)(p->data))->num_items == 0))
 				) {
 				break;
 			}

 			if (!(p->data_type & DT_REORDER)) {
 				if ((*((section_t *)(p->data))->name != 'a')
 					|| (((section_t *)(p->data))->num_items > 0)
 					) {
 					verbose_msg(VDETAIL,
 /* 							"\t%-15s %zu\n", */
 							"\t%-15s %6u\n",
 							((section_t *)(p->data))->name,
 							((section_t *)(p->data))->num_items);
 				}
 			}
 			p = p->next;
 		}
 #endif
 	}


 }

 static void print_colnode(const void *ptr, VISIT order, int level)
 {
     const colitem_t *p = *(const colitem_t **) ptr;

     if (order == postorder || order == leaf)  {
         printf("collating item = \"%s\"", p->string);
 		if (p->element) {
 			printf(" is %s", p->element);
 		}
         printf("\n");
     }
 }

 static void print_weight_node(const void *ptr, VISIT order, int level)
 {
     const weight_t *p = *(const weight_t **) ptr;
 	int i;

     if (order == postorder || order == leaf)  {
         printf("weight: (%d)  ", p->num_weights);
 		for (i = 0 ; i < p->num_weights ; i++) {
 			if (p->rule[i] & R_FORWARD) {
 				printf("F");
 			}
 			if (p->rule[i] & R_BACKWARD) {
 				printf("B");
 			}
 			if (p->rule[i] & R_POSITION) {
 				printf("P");
 			}
 			printf(",");
 		}
 		for (i = 0 ; i < p->num_weights ; i++) {
 			printf("   %s", p->colitem[i]);
 		}
         printf("\n");
     }
 }


 typedef struct {
 	const char *der_name;
 	int base_locale;
 } deps_t;

 enum {
 	BASE_iso14651_t1,
 	BASE_comm,
 	BASE_cs_CZ,
 	BASE_ar_SA,
 	BASE_th_TH,
 	BASE_ja_JP,
 	BASE_ko_KR,
 	BASE_MAX
 };

 static const char *base_name[] = {
 	"iso14651_t1",
 	"comm",
 	"cs_CZ",
 	"ar_SA",
 	"th_TH",
 	"ja_JP",
 	"ko_KR"
 };


 static ll_item_t *locale_list[BASE_MAX];

 static void init_locale_list(void)
 {
 	int i;

 	for (i=0 ; i < BASE_MAX ; i++) {
 		locale_list[i] = (ll_item_t *) xmalloc(sizeof(ll_item_t));
 		locale_list[i]->prev = locale_list[i]->next = locale_list[i];
 		locale_list[i]->data = (void *) base_name[i];
 	}
 }


 deps_t deps[] = {
 	{ "af_ZA", BASE_iso14651_t1 },
 	{ "am_ET", BASE_iso14651_t1 },
 	{ "ar_AE", BASE_iso14651_t1 },
 	{ "ar_BH", BASE_iso14651_t1 },
 	{ "ar_DZ", BASE_iso14651_t1 },
 	{ "ar_EG", BASE_iso14651_t1 },
 	{ "ar_IN", BASE_iso14651_t1 },
 	{ "ar_IQ", BASE_iso14651_t1 },
 	{ "ar_JO", BASE_iso14651_t1 },
 	{ "ar_KW", BASE_iso14651_t1 },
 	{ "ar_LB", BASE_iso14651_t1 },
 	{ "ar_LY", BASE_iso14651_t1 },
 	{ "ar_MA", BASE_iso14651_t1 },
 	{ "ar_OM", BASE_iso14651_t1 },
 	{ "ar_QA", BASE_iso14651_t1 },
 	{ "ar_SA", BASE_ar_SA },
 	{ "ar_SD", BASE_iso14651_t1 },
 	{ "ar_SY", BASE_iso14651_t1 },
 	{ "ar_TN", BASE_iso14651_t1 },
 	{ "ar_YE", BASE_iso14651_t1 },
 	{ "az_AZ", BASE_iso14651_t1 },
 	{ "be_BY", BASE_iso14651_t1 },
 	{ "bg_BG", BASE_iso14651_t1 },
 	{ "bn_BD", BASE_iso14651_t1 },
 	{ "bn_IN", BASE_iso14651_t1 },
 	{ "br_FR", BASE_iso14651_t1 },
 	{ "bs_BA", BASE_iso14651_t1 },
 	{ "ca_ES", BASE_comm },
 	{ "cs_CZ", BASE_cs_CZ },
 	{ "cy_GB", BASE_iso14651_t1 },
 	{ "da_DK", BASE_comm },
 	{ "de_AT", BASE_iso14651_t1 },
 	{ "de_BE", BASE_iso14651_t1 },
 	{ "de_CH", BASE_iso14651_t1 },
 	{ "de_DE", BASE_iso14651_t1 },
 	{ "de_LU", BASE_iso14651_t1 },
 	{ "el_GR", BASE_iso14651_t1 },
 	{ "en_AU", BASE_iso14651_t1 },
 	{ "en_BW", BASE_iso14651_t1 },
 	{ "en_CA", BASE_comm },
 	{ "en_DK", BASE_iso14651_t1 },
 	{ "en_GB", BASE_iso14651_t1 },
 	{ "en_HK", BASE_iso14651_t1 },
 	{ "en_IE", BASE_iso14651_t1 },
 	{ "en_IN", BASE_iso14651_t1 },
 	{ "en_NZ", BASE_iso14651_t1 },
 	{ "en_PH", BASE_iso14651_t1 },
 	{ "en_SG", BASE_iso14651_t1 },
 	{ "en_US", BASE_iso14651_t1 },
 	{ "en_ZA", BASE_iso14651_t1 },
 	{ "en_ZW", BASE_iso14651_t1 },
 	{ "eo_EO", BASE_iso14651_t1 },
 	{ "es_AR", BASE_comm },
 	{ "es_BO", BASE_comm },
 	{ "es_CL", BASE_comm },
 	{ "es_CO", BASE_comm },
 	{ "es_CR", BASE_comm },
 	{ "es_DO", BASE_comm },
 	{ "es_EC", BASE_comm },
 	{ "es_ES", BASE_comm },
 	{ "es_GT", BASE_comm },
 	{ "es_HN", BASE_comm },
 	{ "es_MX", BASE_comm },
 	{ "es_NI", BASE_comm },
 	{ "es_PA", BASE_comm },
 	{ "es_PE", BASE_comm },
 	{ "es_PR", BASE_comm },
 	{ "es_PY", BASE_comm },
 	{ "es_SV", BASE_comm },
 	{ "es_US", BASE_comm },
 	{ "es_UY", BASE_comm },
 	{ "es_VE", BASE_comm },
 	{ "et_EE", BASE_comm },
 	{ "eu_ES", BASE_iso14651_t1 },
 	{ "fa_IR", BASE_iso14651_t1 },
 	{ "fi_FI", BASE_comm },
 	{ "fo_FO", BASE_comm },
 	{ "fr_BE", BASE_iso14651_t1 },
 	{ "fr_CA", BASE_comm },
 	{ "fr_CH", BASE_iso14651_t1 },
 	{ "fr_FR", BASE_iso14651_t1 },
 	{ "fr_LU", BASE_iso14651_t1 },
 	{ "ga_IE", BASE_iso14651_t1 },
 	{ "gd_GB", BASE_iso14651_t1 },
 	{ "gl_ES", BASE_comm },
 	{ "gv_GB", BASE_iso14651_t1 },
 	{ "he_IL", BASE_iso14651_t1 },
 	{ "hi_IN", BASE_iso14651_t1 },
 	{ "hr_HR", BASE_comm },
 	{ "hu_HU", BASE_iso14651_t1 },
 	{ "hy_AM", BASE_iso14651_t1 },
 	{ "id_ID", BASE_iso14651_t1 },
 	{ "is_IS", BASE_comm },
 	{ "it_CH", BASE_iso14651_t1 },
 	{ "it_IT", BASE_iso14651_t1 },
 	{ "iw_IL", BASE_iso14651_t1 },
 	{ "ja_JP", BASE_ja_JP },
 	{ "ka_GE", BASE_iso14651_t1 },
 	{ "kl_GL", BASE_comm },
 	{ "ko_KR", BASE_ko_KR },
 	{ "kw_GB", BASE_iso14651_t1 },
 	{ "lt_LT", BASE_comm },
 	{ "lv_LV", BASE_comm },
 	{ "mi_NZ", BASE_iso14651_t1 },
 	{ "mk_MK", BASE_iso14651_t1 },
 	{ "mr_IN", BASE_iso14651_t1 },
 	{ "ms_MY", BASE_iso14651_t1 },
 	{ "mt_MT", BASE_iso14651_t1 },
 	{ "nl_BE", BASE_iso14651_t1 },
 	{ "nl_NL", BASE_iso14651_t1 },
 	{ "nn_NO", BASE_iso14651_t1 },
 	{ "no_NO", BASE_comm },
 	{ "oc_FR", BASE_iso14651_t1 },
 	{ "pl_PL", BASE_comm },
 	{ "pt_BR", BASE_iso14651_t1 },
 	{ "pt_PT", BASE_iso14651_t1 },
 	{ "ro_RO", BASE_iso14651_t1 },
 	{ "ru_RU", BASE_iso14651_t1 },
 	{ "ru_UA", BASE_iso14651_t1 },
 	{ "se_NO", BASE_iso14651_t1 },
 	{ "sk_SK", BASE_cs_CZ },
 	{ "sl_SI", BASE_comm },
 	{ "sq_AL", BASE_iso14651_t1 },
 	{ "sr_YU", BASE_iso14651_t1 },
 	{ "sv_FI", BASE_comm },
 	{ "sv_SE", BASE_iso14651_t1 },
 	{ "ta_IN", BASE_iso14651_t1 },
 	{ "te_IN", BASE_iso14651_t1 },
 	{ "tg_TJ", BASE_iso14651_t1 },
 	{ "th_TH", BASE_th_TH },
 	{ "ti_ER", BASE_iso14651_t1 },
 	{ "ti_ET", BASE_iso14651_t1 },
 	{ "tl_PH", BASE_iso14651_t1 },
 	{ "tr_TR", BASE_comm },
 	{ "tt_RU", BASE_iso14651_t1 },
 	{ "uk_UA", BASE_iso14651_t1 },
 	{ "ur_PK", BASE_iso14651_t1 },
 	{ "uz_UZ", BASE_iso14651_t1 },
 	{ "vi_VN", BASE_iso14651_t1 },
 	{ "wa_BE", BASE_iso14651_t1 },
 	{ "yi_US", BASE_iso14651_t1 },
 	{ "zh_CN", BASE_iso14651_t1 },
 	{ "zh_HK", BASE_iso14651_t1 },
 	{ "zh_SG", BASE_iso14651_t1 },
 	{ "zh_TW", BASE_iso14651_t1 },
 };


 static int der_count[BASE_MAX];
 static const char *new_args[500];
 static int new_arg_count;

 static int dep_cmp(const void *s1, const void *s2)
 {
 	return strcmp( (const char *) s1, ((const deps_t *) s2)->der_name);
 }

 static int old_main(int argc, char **argv);

 int main(int argc, char **argv)
 {
 	const deps_t *p;
 	ll_item_t *lli;
 	int i;
 	int total;
 	char *output_file = "locale_collate.h";
 	unsigned verbosity = 0;

 	if (argc < 3) {
 		return EXIT_FAILURE;
 	}
 	--argc;
 	inputdir = strdup(*++argv);
 	inputdir_len = strlen(inputdir);
 	init_locale_list();

 	while (--argc) {
 		++argv;
 		if (!strcmp(*argv, "-o")) {
 			--argc;
 			if (*++argv == NULL) {
 				printf("-o <outfile> requires an argument\n");
 				return EXIT_FAILURE;
 			}
 			output_file = strdup(*argv);
 			continue;
 		} else if (!strcmp(*argv, "-v")) {
 			verbosity++;
 			continue;
 		}
 		p = (const deps_t *) bsearch(*argv, deps, sizeof(deps)/sizeof(deps[0]), sizeof(deps[0]), dep_cmp);
 		if (!p) {
 			if (!strcmp("C", *argv)) {
 				printf("ignoring %s locale\n", *argv);
 				continue;
 			} else {
 				printf("%s not found\n", *argv);
 				return EXIT_FAILURE;
 			}
 		}

 		i = p->base_locale;
 		++der_count[i];

 		if (!strcmp(base_name[i], *argv)) {
 			/* same name as base, so skip after count incremented */
 			continue;
 		}

 		/* add it to the list.  the main body will catch duplicates */
 		lli = (ll_item_t *) xmalloc(sizeof(ll_item_t));
 		lli->prev = lli->next = NULL;
 		lli->data = (void *) *argv;
 		insque(lli, locale_list[i]);
 	}

 	total = 0;
 	for (i=0 ; i < BASE_MAX ; i++) {
 /*		printf("der_count[%2d] = %3d\n", i, der_count[i]); */
 		total += der_count[i];
 	}
 /*	printf("total = %d\n", total); */

 	new_args[new_arg_count++] = "dummyprogramname";
 	for (i=0 ; i < BASE_MAX ; i++) {
 		if (!der_count[i]) {
 			continue;
 		}
 		new_args[new_arg_count++] = (i == BASE_comm) ? "-c" : "-b";
 		lli = locale_list[i];
 		do {
 			new_args[new_arg_count++] = (const char *) (lli->data);
 			lli = lli->next;
 		} while (lli != locale_list[i]);
 		new_args[new_arg_count++] = "-f";
 	}
 	for (i=0; i < verbosity; i++)
 		new_args[new_arg_count++] = "-v";

 	new_args[new_arg_count++] = "-o";
 	new_args[new_arg_count++] = output_file;
 /*
 	for (i=0 ; i < new_arg_count ; i++) {
 		printf("%3d: %s\n", i, new_args[i]);
 	}
 */
 	return old_main(new_arg_count, (char **) new_args);
 }


 /* usage...  prog -b basefile derived {derived} -s single {single} */

 static int old_main(int argc, char **argv)
 {
 	int next_is_base = 0;
 	int next_is_subset = 0;
 	char *output_file = NULL;

 	superset = 0;

 	while (--argc) {
 		++argv;
 		if (**argv == '-') {
 			if ((*argv)[1] == 'd') {
 				dump_weights((*argv) + 2);
 			} else if ((*argv)[1] == 'f') {	/* dump all weight rules */
 				finalize_base();
 			} else if ((*argv)[1] == 'R') {	/* dump all weight rules */
 				twalk(root_weight, print_weight_node);
 			} else if (((*argv)[1] == 'c') && !(*argv)[2]) { /* new common subset */
 				cur_base = cur_derived = NULL;
 				next_is_subset = 1;
 				next_is_base = 1;
 				superset = 0;
 			} else if (((*argv)[1] == 'b') && !(*argv)[2]) { /* new base locale */
 				cur_base = cur_derived = NULL;
 				next_is_subset = 0;
 				next_is_base = 1;
 				superset = 0;
 			} else if (((*argv)[1] == 's') && !(*argv)[2]) { /* single locales follow */
 				cur_base = cur_derived = NULL;
 				next_is_subset = 0;
 				next_is_base = 2;
 				superset = 0;
 			} else if (((*argv)[1] == 'o') && !(*argv)[2]) { /* output file */
 				--argc;
 				output_file = *++argv;
 			} else if (((*argv)[1] == 'v') && !(*argv)[2]) { /* verbose */
 				++verbose;
 			} else {
 				error_msg("unrecognized option %s", *argv);
 			}
 			continue;
 		}
 		/* new file */
 		new_col_locale(*argv);	/* automaticly sets cur_col */
 		if (next_is_base) {
 			cur_base = cur_col;
 		} else {
 			cur_derived = cur_col;
 		}
 		pushfile(*argv);
 /* 		verbose_msg(VDETAIL, "processing file %s\n", *argv); */
 		processfile();			/* this does a popfile */

 /* 		twalk(cur_col->root_colitem, print_colnode); */

 		if (next_is_base == 1) {
 			next_is_base = 0;
 		}
 		if (next_is_subset) {
 			next_is_subset = 0;
 			superset = 1;
 		}
 	}

 	verbose_msg(VINFO, "success!\n");
 	verbose_msg(VINFO,
 /* 			"num_sym=%zu mem_sym=%zu  unique_weights=%zu\n", */
 			"num_sym=%u mem_sym=%u  unique_weights=%u\n",
 			num_sym, mem_sym, unique_weights);
 /* 	twalk(root_weight, print_weight_node); */

 	verbose_msg(VINFO, "num base locales = %d    num derived locales = %d\n",
 			base_locale_len, der_locale_len);

 	verbose_msg(VINFO,
 			"override_len = %d      multistart_len = %d    weightstr_len = %d\n"
 			"wcs2colidt_len = %d    index2weight_len = %d  index2ruleidx_len = %d\n"
 			"ruletable_len = %d\n"
 			"total size is %d bytes or %d kB\n",
 			override_len, multistart_len, weightstr_len,
 			wcs2colidt_len, index2weight_len, index2ruleidx_len,
 			ruletable_len,
 #warning mult by 2 for rule indecies
 			(override_len + multistart_len + weightstr_len
 			 + wcs2colidt_len + index2weight_len + index2ruleidx_len + ruletable_len) * 2,
 			(override_len + multistart_len + weightstr_len
 			 + wcs2colidt_len + index2weight_len + index2ruleidx_len + ruletable_len + 511) / 512);

 #if 0
 	{
 		int i;

 		for (i=0 ; i < base_locale_len ; i++) {
 			dump_base_locale(i);
 		}
 		for (i=0 ; i < der_locale_len ; i++) {
 			dump_der_locale(i);
 		}
 	}
 #endif

 	{
 		FILE *fp = fopen(output_file, "w");

 		if (!fp) {
 			error_msg("cannot open output file '%s'!", output_file);
 		}
 		dump_collate(fp);
 		if (ferror(fp) || fclose(fp)) {
 			error_msg("write error or close error for output file!\n");
 		}
 	}

     return EXIT_SUCCESS;
 }

 static void error_msg(const char *fmt, ...)
 {
 	va_list arg;

 	fprintf(stderr, "Error: ");
 	if (fno >= 0) {
 	    fprintf(stderr, "file %s (%d): ", fname[fno], lineno[fno]);
 	}
 	va_start(arg, fmt);
 	vfprintf(stderr, fmt, arg);
 	va_end(arg);
 	fprintf(stderr, "\n");

 	exit(EXIT_FAILURE);
 }

 static void pushfile(char *filename)
 {
 	char *inputfile;
 	size_t inputfile_len;

 	if (fno >= MAX_FNO) {
 		error_msg("file stack size exceeded");
 	}

 	inputfile_len = inputdir_len + strlen(filename) + 2;
 	inputfile = xmalloc(inputfile_len);
 	memset(inputfile, 0, inputfile_len);
 	sprintf(inputfile, "%s/%s", inputdir, filename);
 	if (!(fstack[++fno] = fopen(inputfile, "r"))) {
 		--fno;					/* oops */
 		error_msg("cannot open file %s: %s", inputfile, strerror(errno));
 	}

 	fname[fno] = xsymdup(inputfile);
 	lineno[fno] = 0;
 }

 static void popfile(void)
 {
 	if (fno < 0) {
 		error_msg("pop on empty file stack");
 	}

 /* 	free(fname[fno]); */
 	fclose(fstack[fno]);
 	--fno;
 }

 static void eatwhitespace(void)
 {
 	while (isspace(*pos)) {
 		++pos;
 	}
 }

 static int iscommentchar(int c)
 {
 	return ((c == '#') || (c == '%'));
 }

 static int next_line(void)
 {
 	size_t n;
 	char *s = linebuf;

 	assert(fno >= 0);

 	pos_e = NULL;
 	do {
 		if (fgets(s, sizeof(linebuf), fstack[fno]) != NULL) {
 			++lineno[fno];
 			n = strlen(linebuf);
 			if ((n == sizeof(linebuf) - 1) && (linebuf[n-1] != '\n')) {
 				/* Either line is too long or last line is very long with
 				 * no trailing newline.  But we'll always treat it as an
 				 * errro. */
 				error_msg("line too long?");
 			}

 			--n;
 			/* Be careful... last line doesn't need a newline. */
 			if (linebuf[n] == '\n') {
 				linebuf[n--] = 0;	/* trim trailing newline */
 			}

 			pos = linebuf;
 			eatwhitespace();
 			if (*pos && !iscommentchar(*pos)) { /* not empty or comment line */
 				return 1;		/* got a line */
 			}
 		} else {				/* eof */
 			popfile();
 		}
 	} while (fno >= 0);

 	return 0;
 }

 static char *next_token(void)
 {
 	char *p;

 #if 0
 	if (pos_e == NULL) {
 		return NULL
 		pos = pos_e;
 		*pos = end_of_token;
 		end_of_token = 0;
 	}
 #else
 	if (pos_e != NULL) {
 		pos = pos_e;
 		*pos = end_of_token;
 		end_of_token = 0;
 	}
 #endif
 	eatwhitespace();
 	p = pos;

 	if (!*p || iscommentchar(*p)) {	/* end of line or start of comment */
 		pos = pos_e = NULL;
 		*p = 0;					/* treat comment as end of line */
 /* 		fprintf(stdout, "returning NUL token |%s|\n", pos); */
 		return NULL;
 #if 1
 	} else if (*p == '<') {	 /* collating symbol, element, or value */
 		while (*++p) {
 			if ((*p == '/') && p[1]) {
 				++p;
 				continue;
 			}
 			if (*p == '>') {
 				pos_e = ++p;
 				end_of_token = *p;
 				*p = 0;
 /* 				fprintf(stdout, "returning col token |%s|\n", pos); */
 				return pos;
 			}
 		}
 	} else if (*p == '"') {		/* collating element value? */
 		while (*++p) {
 			if (*p == '"') {	/* found the end of the quoted string */
 				pos_e = ++p;
 				end_of_token = *p;
 				*p = 0;
 /* 				fprintf(stdout, "returning quote token |%s|\n", pos); */
 				return pos;
 			}
 		}
 #endif
 	} else {					/* some kind of keyword */
 		while (*++p) {
 			if (isspace(*p) || (*p == ';')) {
 				break;
 			}
 		}
 		pos_e = p;
 		end_of_token = *p;
 		*p = 0;
 /* 		fprintf(stdout, "returning key token |%s|\n", pos); */
 		return pos;
 	}

 	error_msg("illegal token |%s|", pos);
 }

 static void *xmalloc(size_t n)
 {
 	void *p;

 	if (!(p = malloc(n))) {
 		error_msg("OUT OF MEMORY");
 	}
 	return p;
 }

 static void do_copy(void)
 {
 	char *s;
 	char *e;

 	if ((s = next_token()) != NULL) {
 		e = strchr(s + 1, '"');
 		if ((*s == '"') && e && (*e == '"') && !e[1]) {
 			if (next_token() != NULL) {
 				error_msg("illegal trailing text: %s", pos);
 			}
 			*e = 0;
 			++s;
 			if (cur_base && !strcmp(cur_base->name,s)) {
 /* 				verbose_msg(VDETAIL, "skipping copy of base file %s\n", s); */
 #warning need to update last in order and position or check
 				return;
 			}
 /* 			verbose_msg(VDETAIL, "full copy of %s\n", s); */
 			pushfile(s);
 			return;
 		}
 	}
 	error_msg("illegal or missing arg for copy: %s", s);
 }

 static void do_colsym(void)
 {
 	char *s;
 	char *e;

 	if ((s = next_token()) != NULL) {
 		e = strrchr(s,'>');
 		if ((*s == '<') && e && (*e == '>') && !e[1]) {
 			if (next_token() != NULL) {
 				error_msg("illegal trailing text: %s", pos);
 			}
 			e[1] = 0; /* cleanup in case next_token stored something */
 			add_colitem(s,NULL);
 			return;
 		}
 	}
 	error_msg("illegal or missing arg for collating-symbol: %s", s);
 }

 static void do_colele(void)
 {
 	char *s;
 	char *e;
 	char *s1;
 	char *e1;
 	int n;

 	if ((s = next_token()) != NULL) {
 		e = strrchr(s,'>');
 		if ((*s == '<') && e && (*e == '>') && !e[1]) {
 			if (((s1 = next_token()) == NULL)
 				|| (strcmp(s1,"from") != 0)
 				|| ((s1 = next_token()) == NULL)
 				|| (*s1 != '\"')
 				) {
 				error_msg("illegal format for collating-element spec");
 			}
 			e1 = strchr(s1 + 1, '"');
 			if ((*s1 != '"') || !e1 || (*e1 != '"') || (e1[1] != 0)) {
 				error_msg("illegal definition for collating-element: %s", s1);
 			}
 			if (next_token() != NULL) {
 				error_msg("illegal trailing text: %s", pos);
 			}
 			e[1] = 0; /* cleanup in case next_token stored something */
 			e1[1] = 0;
 			add_colitem(s,s1);
 			++s1;
 			if (!(n = is_ucode(s1))) {
 				error_msg("starting char must be a <U####> code: %s", s1);
 			}
 			assert(s1[n] == '<');
 			s1[n] = 0;
 			s = xsymdup(s1);
 			if (!(tsearch(s, &cur_base->root_starter_char, sym_cmp))) {
 				error_msg("OUT OF MEMORY");
 			}

 			return;
 		}
 	}
 	error_msg("illegal or missing arg for collating-element: %s", s);
 }

 static ll_item_t *find_section_list_item(const char *name, col_locale_t *loc)
 {
 	ll_item_t *p;

 	if (!loc) {
 		return NULL;
 	}

 	p = loc->section_list;

 	while (p) {
 #warning devel code
 /* 		if (!((p->data_type == DT_SECTION) || (p->data_type == DT_REORDER))) { */
 /* 			verbose_msg(VDETAIL, "fsli = %d\n", p->data_type); */
 /* 		} */
 		assert((p->data_type == DT_SECTION) || (p->data_type == DT_REORDER));
 		if (!strcmp(name, ((section_t *)(p->data))->name)) {
 			break;
 		}
 		p = p->next;
 	}
 	return p;
 }

 static ll_item_t *find_ll_last(ll_item_t *p)
 {
 	assert(p);

 	while (p->next) {
 		p = p->next;
 	}
 	return p;
 }

 static void do_script(void)
 {
 	char *s;
 	char *e;

 	if ((s = next_token()) != NULL) {
 		e = strrchr(s,'>');
 		if ((*s == '<') && e && (*e == '>') && !e[1]) {
 			if (next_token() != NULL) {
 				error_msg("illegal trailing text: %s", pos);
 			}
 			e[1] = 0; /* cleanup in case next_token stored something */
 			add_script(s);
 			return;
 		}
 	}
 	error_msg("illegal or missing arg for script: %s", s);
 }

 static col_locale_t *new_col_locale(char *name)
 {
 	ll_item_t *lli;
 	ll_item_t *lli2;

 	cur_col = (col_locale_t *) xmalloc(sizeof(col_locale_t));
 	cur_col->name = name;
 	cur_col->root_colitem = NULL;
 	cur_col->root_element = NULL;
 	cur_col->root_scripts = NULL;
 	cur_col->base_locale = NULL;
 	if (!superset) {
 		/* start with an anonymous section */
 		cur_section = new_section(NULL);
 		cur_col->section_list = new_ll_item(DT_SECTION, cur_section);
 	} else {
 		/* start with a reorder section */
 		cur_section = new_section("R");
 		cur_num_weights = cur_section->num_rules
 			= ((section_t *)(cur_base->section_list->data))->num_rules;
 		memcpy(cur_rule,
 			   ((section_t *)(cur_base->section_list->data))->rules,
 			   MAX_COLLATION_WEIGHTS);
 		memcpy(cur_section->rules,
 			   ((section_t *)(cur_base->section_list->data))->rules,
 			   MAX_COLLATION_WEIGHTS);
 		cur_col->section_list = new_ll_item(DT_REORDER, cur_section);
 		assert(cur_base->section_list->next == NULL); /* currently only one section allowed */
 		lli = ((section_t *)(cur_base->section_list->data))->itm_list;
 		assert(lli);
 		lli2 = new_ll_item(DT_REORDER, cur_section);
 		lli2->prev = lli2->next = lli2;
 		insque(lli2, lli->prev);
 		((section_t *)(cur_base->section_list->data))->itm_list = lli2;
 	}
 /* 	cur_col->section_list = NULL; */
 /* 	add_script(((section_t *)(cur_col->section_list->data))->name); */
 	cur_col->root_wi_index = NULL;
 	cur_col->root_wi_index_reordered = NULL;
 	cur_col->root_derived_wi = NULL;
 	cur_col->derived_list = NULL;
 	cur_col->root_starter_char = NULL;
 	cur_col->root_starter_all = NULL;
 	cur_col->undefined_idx = NULL;
 	return cur_col;
 }

 static int colitem_cmp(const void *n1, const void *n2)
 {
     return strcmp(((colitem_t *)n1)->string, ((colitem_t *)n2)->string);
 }

 static int colelement_cmp(const void *n1, const void *n2)
 {
     int r;

     r = strcmp(((colitem_t *)n1)->string, ((colitem_t *)n2)->string);
     if (!r) {
 		if (((colitem_t *)n1)->element && ((colitem_t *)n2)->element) {
 			r = strcmp(((colitem_t *)n1)->element, ((colitem_t *)n2)->element);
 		} else if (((colitem_t *)n1)->element == ((colitem_t *)n2)->element) {
 			r = 0;				/* both null */
 		} else {
 			r = (((colitem_t *)n1)->element == NULL) ? -1 : 1;
 		}
     }
     return r;
 }

 static void del_colitem(colitem_t *p)
 {
 /*     free((void *) p->element); */
 /*     free((void *) p->string); */
     free(p);
 }

 static colitem_t *new_colitem(char *item, char *def)
 {
 	colitem_t *p;

 	p = xmalloc(sizeof(colitem_t));
 	p->string = xsymdup(item);
 	p->element = (!def) ? def : xsymdup(def);

 	return p;
 }

 static void add_colitem(char *item, char *def)
 {
 	colitem_t *p;

 #if 0
 	printf("adding collation item %s", item);
 	if (def) {
 		printf(" with definition %s", def);
 	}
 	printf("\n");
 #endif

 	p = new_colitem(item, def);

 #warning devel code
 	if (superset) {
 		if (tfind(p, &cur_base->root_colitem, colitem_cmp)) {
 /* 			verbose_msg(VDETAIL, "skipping superset duplicate collating item \"%s\"\n", p->string); */
 			del_colitem(p);
 			return;
 /* 		} else { */
 /* 			verbose_msg(VDETAIL, "superset: new collating item \"%s\" = %s\n", p->string, p->element); */
 		}
 	}

 	if (cur_col == cur_derived) {
 		if (!tfind(p, &cur_base->root_colitem, colitem_cmp)) {
 			/* not in current but could be in base */
 			if (!tsearch(p, &cur_base->root_colitem, colitem_cmp)) {
 				error_msg("OUT OF MEMORY!");
 			}
 		} else if (!tfind(p,  &cur_base->root_colitem, colelement_cmp)) {
 			error_msg("collating element/symbol mismatch: item=%s def=%s", item, def);
 		}
 	}


 	if (!tfind(p, &cur_col->root_colitem, colitem_cmp)) {
 		/* not in current but could be in base */
 		if (!tsearch(p, &cur_col->root_colitem, colitem_cmp)) {
 			error_msg("OUT OF MEMORY!");
 		}
 	} else if (!tfind(p,  &cur_col->root_colitem, colelement_cmp)) {
 		error_msg("collating element/symbol mismatch");
 	} else {					/* already there */
 		fprintf(stderr, "duplicate collating item \"%s\"\n", p->string);
 		del_colitem(p);
 	}
 }

 /* add a script (section) to the current locale */
 static void add_script(const char *s)
 {
 	ll_item_t *l;

 	/* make sure it isn't in base if working with derived */
 	if (cur_base != cur_col) {
 		if (find_section_list_item(s, cur_base)) {
 			error_msg("attempt to add script %s for derived when already in base", s);
 		}
 	}

 	if (find_section_list_item(s, cur_col)) {
 		error_msg("attempt to readd script %s", s);
 	}

 	l = find_ll_last(cur_col->section_list);
 	insque(new_ll_item(DT_SECTION, new_section(s)), l);
 }

 static const char str_forward[] =  "forward";
 static const char str_backward[] = "backward";
 static const char str_position[] = "position";

 static void do_order_start(void)
 {
 	const char *s;
 	char *e;
 	ll_item_t *l;
 	section_t *sect;
 	int rule;

 	if (order_state & ~IN_ORDER) {
 		error_msg("order_start following reorder{_sections}_after");
 	}
 	order_state |= IN_ORDER;

 	if (superset) {
 		if (++superset_order_start_cnt > 1) {
 			error_msg("currently only a common order_start is supported in superset");
 		}
 		return;
 	}

 	if (!(s = next_token())) {
 		s = str_forward;		/* if no args */
 	}

 	if (*s == '<') {		/* section (script) */
 		e = strrchr(s,'>');
 		if ((*s == '<') && e && (*e == '>') && !e[1]) {
 			e[1] = 0; /* cleanup in case next_token stored something */

 			if (!(l = find_section_list_item(s, cur_col))) {
 				error_msg("ref of undefined sections: %s", s);
 			}
 			sect = (section_t *)(l->data);
 			if (sect->num_rules) {
 				error_msg("sections already defined: %s", s);
 			}
 		} else {
 			error_msg("illegal section ref: %s", s);
 		}

 		if (!(s = next_token())) {
 			s = str_forward;		/* if no args */
 		} else if (*s != ';') {
 			error_msg("missing seperator!");
 		}
 	} else {					/* need an anonymous section */
 		if ((*cur_section->name != '<') && (cur_section->num_items == 0)) { /* already in an empty anonymous section */
 			sect = cur_section;
 /* 			fprintf(stdout, "using empty anon section %s\n", sect->name); */
 		} else {
 			sect = new_section(NULL);
 			l = find_ll_last(cur_col->section_list);
 			insque(new_ll_item(DT_SECTION, sect), l);
 /* 			fprintf(stdout, "adding order section after section %s\n", ((section_t *)(l->data))->name); */
 /* 			fprintf(stdout, "    last section is %s\n", ((section_t *)(l->next->data))->name); */
 		}
 		sect->num_rules = 0;	/* setting this below so nix default */
 	}
 	cur_section = sect;
 /* 	fprintf(stdout, "cur_section now %s\n", cur_section->name); */

 #warning need to add section to weight list?

 	/* now do rules */
 	do {
 		rule = 0;
 		if (*s == ';') {
 			++s;
 		}
 		while (*s) {
 			if (!strncmp(str_forward, s, 7)) {
 				rule |= R_FORWARD;
 				s += 7;
 			} else if (!strncmp(str_backward, s, 8)) {
 				rule |= R_BACKWARD;
 				s += 8;
 			} else if (!strncmp(str_position, s, 8)) {
 				rule |= R_POSITION;
 				s += 8;
 			}

 			if (*s == ',') {
 				++s;
 				continue;
 			}

 			if (!*s || (*s == ';')) {
 				if (sect->num_rules >= MAX_COLLATION_WEIGHTS) {
 					error_msg("more than %d weight rules!", MAX_COLLATION_WEIGHTS);
 				}
 				if (!rule) {
 					error_msg("missing weight rule!");
 				}
 				if ((rule & (R_FORWARD|R_BACKWARD|R_POSITION)) > R_BACKWARD) {
 					error_msg("backward paired with  forward and/or position!");
 				}

 				sect->rules[sect->num_rules++] = rule;
 				rule = 0;
 				continue;
 			}

 			error_msg("illegal weight rule: %s", s);
 		}
 	} while ((s = next_token()) != NULL);

 	cur_section = sect;

 /* 	verbose_msg(VDETAIL, "setting cur_num_weights to %d for %s\n", sect->num_rules, sect->name); */
 	cur_num_weights = sect->num_rules;
 	memcpy(cur_rule, sect->rules, MAX_COLLATION_WEIGHTS);
 }

 static void do_order_end(void)
 {
 	if (!(order_state & IN_ORDER)) {
 		error_msg("order_end with no matching order_start");
 	}
 	order_state &= ~IN_ORDER;

 	cur_section = new_section(NULL);
 }

 static void do_reorder_after(void)
 {
 	char *t;
 	ll_item_t *lli;
 	const weight_t *w;
 	int save_cur_num_weights;
 	char save_cur_rule[MAX_COLLATION_WEIGHTS];


 	if (order_state & ~IN_REORDER) {
 		error_msg("reorder_after following order_start or reorder_sections_after");
 	}
 	order_state |= IN_REORDER;

 	if (superset) {
 		error_msg("currently reorder_after is not supported in supersets");
 	}

 #warning have to use rule for current section!!!

 	if (!(t = next_token())) {
 		error_msg("missing arg for reorder_after");
 	}

 	t = xsymdup(t);

 	if (next_token() != NULL) {
 		error_msg("trailing text reorder_after: %s", pos);
 	}

 	if (cur_col == cur_base) {
 		error_msg("sorry.. reorder_after in base locale is not currently supported");
 	}

 	if (!(lli = find_wi_index(t, cur_base))) {
 		error_msg("reorder_after for non-base item currently not supported: %s", t);
 	}

 	w = ((weighted_item_t *)(lli->data))->weight;


 	save_cur_num_weights = cur_num_weights;
 	memcpy(save_cur_rule, cur_rule, MAX_COLLATION_WEIGHTS);

 	cur_section = new_section("R");
 	insque(new_ll_item(DT_REORDER, cur_section), lli);

 #if 0

 	{
 		ll_item_t *l1;
 		ll_item_t *l2;
 		ll_item_t *l3;
 		l1 = new_ll_item(DT_REORDER, cur_section);
 		l2 = find_ll_last(cur_col->section_list);
 		insque(l1, l2);
 		l3 = find_ll_last(cur_col->section_list);

 		verbose_msg(VDETAIL, "reorder_after %p %p %p %s\n", l1, l2, l3, cur_section->name);
 	}
 #else
 	insque(new_ll_item(DT_REORDER, cur_section), find_ll_last(cur_col->section_list));
 #endif

 	cur_num_weights = cur_section->num_rules = save_cur_num_weights;
 	memcpy(cur_rule, save_cur_rule, MAX_COLLATION_WEIGHTS);
 	memcpy(cur_section->rules, save_cur_rule, MAX_COLLATION_WEIGHTS);


 #warning devel code
 /* 	verbose_msg(VDETAIL, "reorder -- %s %d\n", ((weighted_item_t *)(lli->data))->symbol, w->num_weights); */

 #warning hack to get around hu_HU reorder-after problem
 /* 	if (!w->num_weights) { */

 /* 	} else { */
 /* 		cur_num_weights = w->num_weights; */
 /* 		memcpy(cur_rule, w->rule, MAX_COLLATION_WEIGHTS); */
 /* 	}	 */

 /* 	verbose_msg(VDETAIL, "reorder_after succeeded for %s\n", t); */
 }

 static void do_reorder_end(void)
 {
 	if (!(order_state & IN_REORDER)) {
 		error_msg("reorder_end with no matching reorder_after");
 	}
 	order_state &= ~IN_REORDER;
 }

 static void do_reorder_sections_after(void)
 {
 	const char *t;
 	ll_item_t *lli;

 	if (order_state & ~IN_REORDER_SECTIONS) {
 		error_msg("reorder_sections_after following order_start or reorder_after");
 	}
 	order_state |= IN_REORDER_SECTIONS;

 	if (superset) {
 		error_msg("currently reorder_sections_after is not supported in supersets");
 	}

 	if (!(t = next_token())) {
 		error_msg("missing arg for reorder_sections_after");
 	}

 	t = xsymdup(t);

 	if (next_token() != NULL) {
 		error_msg("trailing text reorder_sections_after: %s", pos);
 	}

 	if (cur_col == cur_base) {
 		error_msg("sorry.. reorder_sections_after in base locale is not currently supported");
 	}

 	lli = cur_base->section_list;
 	do {
 /* 		verbose_msg(VDETAIL, "hmm -- |%s|%d|\n", ((section_t *)(lli->data))->name, lli->data_type); */
 		if (lli->data_type & DT_SECTION) {
 /* 			verbose_msg(VDETAIL, "checking |%s|%s|\n", ((section_t *)(lli->data))->name, t); */
 			if (!strcmp(((section_t *)(lli->data))->name, t)) {
 				reorder_section_ptr = lli;
 				return;
 			}
 		}
 		lli = lli->next;
 	} while (lli);

 	error_msg("reorder_sections_after for non-base item currently not supported: %s", t);
 }

 static void do_reorder_sections_end(void)
 {
 	if (!(order_state & IN_REORDER_SECTIONS)) {
 		error_msg("reorder_sections_end with no matching reorder_sections_after");
 	}
 	order_state &= ~IN_REORDER_SECTIONS;

 	reorder_section_ptr = NULL;
 }

 static ll_item_t *new_ll_item(int data_type, void *data)
 {
 	ll_item_t *p;

 	p = xmalloc(sizeof(ll_item_t));
 	p->next = p->prev = NULL;
 	p->data_type = data_type;
 	p->data = data;
 	p->idx = INT_MIN;

 	return p;
 }

 static int sym_cmp(const void *n1, const void *n2)
 {
 /* 	verbose_msg(VDETAIL, "sym_cmp: |%s| |%s|\n", (const char *)n1, (const char *)n2); */
     return strcmp((const char *) n1, (const char *) n2);
 }

 static char *xsymdup(const char *s)
 {
 	void *p;

 	if (!(p = tfind(s, &root_sym, sym_cmp))) { /* not a currently known symbol */
 		if (!(s = strdup(s)) || !(p = tsearch(s, &root_sym, sym_cmp))) {
 			error_msg("OUT OF MEMORY!");
 		}
 		++num_sym;
 		mem_sym += strlen(s) + 1;
 /* 		verbose_msg(VDETAIL, "xsymdup: alloc |%s| %p |%s| %p\n", *(char **)p, p, s, s); */
 /* 	} else { */
 /* 		verbose_msg(VDETAIL, "xsymdup: found |%s| %p\n", *(char **)p, p); */
 	}
 	return *(char **) p;
 }

 static int weight_cmp(const void *n1, const void *n2)
 {
 	const weight_t *w1 = (const weight_t *) n1;
 	const weight_t *w2 = (const weight_t *) n2;
 	int i, r;

 	if (w1->num_weights != w2->num_weights) {
 		return w1->num_weights - w2->num_weights;
 	}

 	for (i=0 ; i < w1->num_weights ; i++) {
 		if (w1->rule[i] != w2->rule[i]) {
 			return w1->rule[i] - w2->rule[i];
 		}
 		if ((r = strcmp(w1->colitem[i], w2->colitem[i])) != 0) {
 			return r;
 		}
 	}
 	return 0;
 }

 static weight_t *register_weight(weight_t *w)
 {
 	void *p;

 	if (!(p = tfind(w, &root_weight, weight_cmp))) { /* new weight */
 		p = xmalloc(sizeof(weight_t));
 		memcpy(p, w, sizeof(weight_t));
 		if (!(p = tsearch(p, &root_weight, weight_cmp))) {
 			error_msg("OUT OF MEMORY!");
 		}
 		++unique_weights;
 /* 	} else { */
 /* 		verbose_msg(VDETAIL, "rw: found\n"); */
 	}
 	return *(weight_t **)p;
 }

 static size_t ll_len(ll_item_t *l)
 {
 	size_t n = 0;
 	ll_item_t *p = l;

 	while (p) {
 		++n;
 		p = p->next;
 		if (p == l) {			/* work for circular too */
 			break;
 		}
 	}
 	return n;
 }

 static size_t ll_count(ll_item_t *l, int mask)
 {
 	size_t n = 0;
 	ll_item_t *p = l;

 	while (p) {
 		if (p->data_type & mask) {
 			++n;
 		}
 		p = p->next;
 		if (p == l) {			/* work for circular too */
 			break;
 		}
 	}
 	return n;
 }


 static int wi_index_cmp(const void *n1, const void *n2)
 {
 	const char *s1 = ((weighted_item_t *)(((ll_item_t *) n1)->data))->symbol;
 	const char *s2 = ((weighted_item_t *)(((ll_item_t *) n2)->data))->symbol;

     return strcmp(s1, s2);
 }

 static void add_wi_index(ll_item_t *l)
 {
 	assert(l->data_type == DT_WEIGHTED);

 	if (!strcmp(((weighted_item_t *)(l->data))->symbol, "UNDEFINED")) {
 		cur_col->undefined_idx = l;
 	}

 	if (!tfind(l, &cur_col->root_wi_index, wi_index_cmp)) { /* new wi_index */
 		if (!tsearch(l, &cur_col->root_wi_index, wi_index_cmp)) {
 			error_msg("OUT OF MEMORY!");
 		}
 	}

 	if (cur_base != cur_col) {
 		if (!tfind(l, &cur_base->root_wi_index, wi_index_cmp)) {/* not a base val */
 /* 			printf("derived: %s\n", ((weighted_item_t *)(l->data))->symbol); */
 			if (!tfind(l, &cur_base->root_derived_wi, wi_index_cmp)) { /* new derived */
 				if (!tsearch(l, &cur_base->root_derived_wi, wi_index_cmp)) {
 					error_msg("OUT OF MEMORY!");
 				}
 			}
 		}
 	}
 }

 static int final_index;


 static int is_ucode(const char *s)
 {
 	if ((s[0] == '<')
 		&& (s[1] == 'U')
 		&& isxdigit(s[2])
 		&& isxdigit(s[3])
 		&& isxdigit(s[4])
 		&& isxdigit(s[5])
 		&& (s[6] == '>')
 		) {
 		return 7;
 	} else {
 		return 0;
 	}
 }

 static void add_final_col_index(const char *s)
 {
 	ENTRY e;

 	e.key = (char *) s;
 	e.data = (void *)(final_index);
 	if (!hsearch(e, FIND)) {	/* not in the table */
 		if (!hsearch(e, ENTER)) {
 			error_msg("OUT OF MEMORY! (hsearch)");
 		}
 #if 0
 		{
 			int n;
 			void *v;
 			colitem_t ci;
 			colitem_t *p;
 			const char *t;

 			if (!strcmp(s, "UNDEFINED")) {
 				printf("%6d: %s\n", final_index, s);
 			} else {
 				assert(*s == '<');
 				if ((n = is_ucode(s)) != 0) {
 					assert(!s[n]);
 					printf("%6d: %s\n", final_index, s);
 				} else {
 					ci.string = (char *) s;
 					ci.element = NULL; /* don't care */
 					v = tfind(&ci, &cur_base->root_colitem, colitem_cmp);
 					if (!v) {
 						verbose_msg(VDETAIL, "%s  NOT DEFINED!!!\n", s);
 					} else {
 						p = *((colitem_t **) v);
 						if (p->element != NULL) {
 							t = p->element;
 							assert(*t == '"');
 							++t;
 							n = is_ucode(t);
 							assert(n);
 							printf("%6d: %.*s | ", final_index, n, t);
 							do {
 								t += n;
 								assert(*t);
 								if (*t == '"') {
 									assert(!t[1]);
 									break;
 								}
 								n = is_ucode(t);
 								assert(n);
 								printf("%.*s", n, t);
 							} while (1);
 							printf("   collating-element %s\n", s);
 						} else {
 							printf("%6d: %s  (collating-symbol)\n", final_index, s);
 						}
 					}
 				}
 			}
 		}
 #endif
 		++final_index;
 	}

 }

 static int final_index_val0(const char *s)
 {
 	ENTRY *p;
 	ENTRY e;
 	e.key = (char *) s;

 	if (!(p = hsearch(e, FIND))) {	/* not in the table */
 		return 0;
 	}

 	return (int)(p->data);
 }

 static int final_index_val(const char *s)
 {
 	ENTRY *p;
 	ENTRY e;
 	e.key = (char *) s;

 	if (!(p = hsearch(e, FIND))) {	/* not in the table */
 		error_msg("can't find final index: %s", s);
 	}

 	return (int)(p->data);
 }

 static size_t num_tree_nodes;

 static void count_nodes(const void *ptr, VISIT order, int level)
 {
     if ((order == postorder) || (order == leaf))  {
 		++num_tree_nodes;
     }
 }

 static size_t tnumnodes(const void *root)
 {
 	num_tree_nodes = 0;

 	twalk(root, count_nodes);

 	return num_tree_nodes;

 }

 static ll_item_t *find_wi_index(const char *sym, col_locale_t *cl)
 {
 	weighted_item_t w;
 	ll_item_t l;
 	void *p;

 	w.symbol = sym;
 	l.data = &w;
 	l.data_type = DT_WEIGHTED;

 	p = tfind(&l, &cl->root_wi_index, wi_index_cmp);

 	if (p) {
 		p = *(ll_item_t **)p;
 	}

 	return (ll_item_t *) p;
 }

 static void mark_reordered(const char *sym)
 {
 	ll_item_t *lli;

 	lli = find_wi_index(sym, cur_base);

 	if (lli) {
 		if (!tsearch(lli, &cur_base->root_wi_index_reordered, wi_index_cmp)) {
 			error_msg("OUT OF MEMORY!");
 		}
 	}
 }

 static ll_item_t *find_wi_index_reordered(const char *sym)
 {
 	weighted_item_t w;
 	ll_item_t l;
 	void *p;

 	w.symbol = sym;
 	l.data = &w;
 	l.data_type = DT_WEIGHTED;

 	p = tfind(&l, &cur_base->root_wi_index_reordered, wi_index_cmp);

 	if (p) {
 		p = *(ll_item_t **)p;
 	}

 	return (ll_item_t *) p;
 }

 static ll_item_t *init_comm_ptr(void)
 {
 	assert(cur_base);
 	assert(cur_base->section_list);
 	/* at the moment, only support one section in comm */
 	assert(cur_base->section_list->next == NULL);

 	comm_cur_ptr = ((section_t *)(cur_base->section_list->data))->itm_list;

 	while (comm_cur_ptr && (comm_cur_ptr->data_type & DT_REORDER)) {
 		comm_cur_ptr = comm_cur_ptr->next;
 	}

 #warning devel code
 /* 	{ */
 /* 		ll_item_t *p = comm_cur_ptr; */
 /* 		verbose_msg(VDETAIL, "init_comm_ptr\n"); */

 /* 		while (p != comm_cur_ptr) { */
 /* 			if (p->data_type & DT_WEIGHTED) { */
 /* 				verbose_msg(VDETAIL, "%s", ((weighted_item_t *)p)->symbol); */
 /* 			} */
 /* 			p = p->next; */
 /* 		} */
 /* 	} */

 	assert(comm_cur_ptr);

 /* 	verbose_msg(VDETAIL, "init_comm_ptr -- %s %p %p %p %d\n", */
 /* 			((weighted_item_t *)(comm_cur_ptr->data))->symbol, */
 /* 			comm_cur_ptr, comm_cur_ptr->prev, comm_cur_ptr->next, */
 /* 			ll_len(comm_cur_ptr)); */

 	comm_prev_ptr = NULL;
 	return comm_cur_ptr;
 }

 static ll_item_t *next_comm_ptr(void)
 {
 	/* at the moment, only support one section in comm */
 	assert(cur_base->section_list->next == NULL);

 	comm_prev_ptr = comm_cur_ptr;

     while (comm_cur_ptr && ((comm_cur_ptr = comm_cur_ptr->next) != NULL)) {
 		if (!(comm_cur_ptr->data_type & DT_REORDER)) {
 			break;
 		}
 	}

 	return comm_cur_ptr;
 }

 static int dump_count;

 #if 0
 static void dump_section(section_t *s, int mask, col_locale_t *der)
 {
 	ll_item_t *lli;
 	ll_item_t *lli0;
 	weighted_item_t *w;
 	weight_t *p;
 	int i;

 	lli0 = lli = s->itm_list;

 	if (!lli0) {
 		return;
 	}

 	do {
 		if (!(lli->data_type & mask)) {
 			lli = lli->next;
 			continue;
 		}
 		if (lli->data_type & DT_WEIGHTED) {
 			++dump_count;
 			w = (weighted_item_t *)(lli->data);
 			p = w->weight;
 			printf("%6d: %s (%d) ", dump_count, w->symbol, p->num_weights);
 			for (i = 0 ; i < p->num_weights ; i++) {
 				if (p->rule[i] & R_FORWARD) {
 					printf("F");
 				}
 				if (p->rule[i] & R_BACKWARD) {
 					printf("B");
 				}
 				if (p->rule[i] & R_POSITION) {
 					printf("P");
 				}
 				printf(",");
 			}
 			for (i = 0 ; i < p->num_weights ; i++) {
 				printf("   %s", p->colitem[i]);
 			}
 			printf("\n");
 		} else if (lli->data_type & (DT_SECTION|DT_REORDER)) {

 			if (lli->data_type == DT_REORDER) {
 				assert(der);
 				if (strncmp(((section_t *)(lli->data))->name, der->name, strlen(der->name))) {
 					lli = lli->next;
 					continue;
 				}
 			}

 			if (lli->data_type & DT_SECTION) {
 				printf("SECTION -----------------\n");
 			} else {
 				printf("REORDER -----------------\n");
 			}

 			dump_section((section_t *)(lli->data), mask, der);
 			printf("DONE --------------------\n");
 		}
 		lli = lli->next;
 	} while (lli != lli0);
 }
 #else
 static int in_reorder_section = 0;

 static void dump_section(section_t *s, int mask, col_locale_t *der)
 {
 	ll_item_t *lli;
 	ll_item_t *lli0;
 	weighted_item_t *w;
 	weight_t *p;
 	int i;

 	lli0 = lli = s->itm_list;

 	if (!lli0) {
 		return;
 	}

 	do {
 		if (!(lli->data_type & mask)) {
 			lli = lli->next;
 			continue;
 		}
 		if (lli->data_type & DT_WEIGHTED) {
 			++dump_count;
 			w = (weighted_item_t *)(lli->data);
 			p = w->weight;
 #if 1
 			if (in_reorder_section) {
 				printf(" %p", w);
 			}
 #else
 			printf("%6d: %s (%d) ", dump_count, w->symbol, p->num_weights);
 			for (i = 0 ; i < p->num_weights ; i++) {
 				if (p->rule[i] & R_FORWARD) {
 					printf("F");
 				}
 				if (p->rule[i] & R_BACKWARD) {
 					printf("B");
 				}
 				if (p->rule[i] & R_POSITION) {
 					printf("P");
 				}
 				printf(",");
 			}
 			for (i = 0 ; i < p->num_weights ; i++) {
 				printf("   %s", p->colitem[i]);
 			}
 			printf("\n");
 #endif
 		} else if (lli->data_type & (DT_SECTION|DT_REORDER)) {

 			if (lli->data_type == DT_REORDER) {
 				assert(der);
 				if (strncmp(((section_t *)(lli->data))->name, der->name, strlen(der->name))) {
 					lli = lli->next;
 					continue;
 				}
 			}

 			if (lli->data_type & DT_SECTION) {
 /* 				printf("SECTION -----------------\n"); */
 				assert(0);
 			} else {
 /* 				printf("REORDER -----------------\n"); */
 				in_reorder_section = 1;
 			}

 			dump_section((section_t *)(lli->data), mask, der);
 /* 			printf("DONE --------------------\n"); */
 			printf("\n");
 			in_reorder_section = 0;
 		}
 		lli = lli->next;
 	} while (lli != lli0);
 }
 #endif

 static void dump_weights(const char *name)
 {
 	ll_item_t *lli;
 	col_locale_t *base;
 	col_locale_t *der;
 	col_locale_t cl;
 	void *p;

 	assert(name);

 	if (!*name) {				/* use last */
 		base = cur_base;
 		der = cur_derived;
 	} else {
 		cl.name = (char *) name;
 		if (!(p = tfind(&cl, &root_col_locale, col_locale_cmp))) {
 			error_msg("unknown locale: %s", name);
 		}
 		base = *((col_locale_t **) p);
 		der = NULL;
 		if (base->base_locale) { /* oops... really derived */
 			der = base;
 			base = der->base_locale;
 		}
 	}

 	dump_count = 0;

 	if (base) {
 /* 		printf("BASE - %s\n", base->name); */
 		for (lli = base->section_list ; lli ; lli = lli->next) {
 /* 			printf("SECTION %s\n", ((section_t *)(lli->data))->name); */
 			dump_section((section_t *)(lli->data), ~0, der);
 		}
 	}

 	assert(der != base);

 	if (der) {
 /* 		printf("DERIVED - %s\n", der->name); */
 		for (lli = der->section_list ; lli ; lli = lli->next) {
 			if (lli->data_type == DT_SECTION) {
 				dump_section((section_t *)(lli->data), DT_WEIGHTED, der);
 			}
 		}
 	}
 /* 	printf("DONE\n"); */
 }

 static void print_starter_node(const void *ptr, VISIT order, int level)
 {
     if (order == postorder || order == leaf)  {
 		fprintf(stderr, "   %s\n", *(const char **) ptr);
     }
 }

 static void finalize_base(void)
 {
 	ll_item_t *s;
 	ll_item_t *h;
 	ll_item_t *lli;
 	ll_item_t *h2;
 	ll_item_t *l2;
 	ll_item_t *cli;
 	ll_item_t *rli = NULL;
 	weighted_item_t *w;
 	weight_t *p;
 	int i, n, mr, r, mi;
 	col_locale_t *cl;
 	void *mm;

 	int num_invariant = 0;
 	int num_varying = 0;
 	int max_weight;
 	int index2weight_len_inc = 1;

 	assert(cur_base);
 	assert(base_locale_len+1 < BASE_LOCALE_LEN);

 	base_locale_array[base_locale_len].name = cur_base->name;
 	base_locale_array[base_locale_len].num_weights = 1;
 	base_locale_array[base_locale_len].index2weight_offset = index2weight_len;
 	base_locale_array[base_locale_len].index2ruleidx_offset = index2ruleidx_len;
 	if (!strcmp(cur_base->name,"ja_JP") || !strcmp(cur_base->name,"ko_KR")) {
 #warning fix the index2weight check!!
 		index2weight_len_inc = 0;
 	}
 /* 	printf("%s -- index2weight_len = %d\n", cur_base->name, index2weight_len); */

 	if (!hcreate(30000)) {
 		error_msg("OUT OF MEMORY!");
 	}

 	/* first pass ... set the fixed indexes */
 	final_index = i = 1;
 	mr = 0;
 	for (s = cur_base->section_list ; s ; s = s->next) {
 #if 1
 		if (s->data_type & DT_REORDER) { /* a reordered section */
 			verbose_msg(VDETAIL, "pass1: reordered section %s - xxx\n", ((section_t *)(s->data))->name);
 			lli = ((section_t *)(s->data))->itm_list;
 			r = 0;
 			if (lli) {
 /* 				r = ll_len( ((section_t *)(lli->data))->itm_list ); */
 				r = ll_len(lli) + 1;
 			}
 			if (r > mr) {
 				mr = r;
 			}
 			verbose_msg(VDETAIL, "pass1: reordered section %s - %d\n", ((section_t *)(s->data))->name, r);
 			continue;
 		}
 #endif
 		h = lli = ((section_t *)(s->data))->itm_list;
 		if (!lli) {
 			continue;
 		}
 		do {
 			if (lli->data_type & DT_RANGE) {
 				i += mr;
 				mr = 0;
 #warning check ko_kR and 9
 /* 				++i; */
 				lli->idx = i;
 				assert(!rli);
 				rli = lli;
 				verbose_msg(VDETAIL, "range pre = %d  after = ", i);
 				i += ((range_item_t *)(lli->data))->length + 1;
 #warning check ko_kR and 9
 /* 				++i; */
 				verbose_msg(VDETAIL, "%d\n", i);
 				if (!index2weight_len_inc) { /* ko_KR hack */
 					final_index += ((range_item_t *)(lli->data))->length + 1;
 				}
 /* 				add_final_col_index("RANGE"); */
 			} else if (lli->data_type & DT_WEIGHTED) {
 				i += mr;
 				mr = 0;
 				w = (weighted_item_t *)(lli->data);
 				if (find_wi_index_reordered(w->symbol)) { /* reordered symbol so skip on first pass */
 					++num_varying;
 					++i;
 					continue;
 				}
 				++num_invariant;
 				index2weight_buffer[index2weight_len] = lli->idx = i++;
 				index2weight_len += index2weight_len_inc;
 				add_final_col_index(w->symbol);

 			} else {
 				assert(lli->data_type & DT_REORDER);
 				r = ll_len( ((section_t *)(lli->data))->itm_list );
 #warning check ko_kR and 9
 				if (r > mr) {
 					mr = r;
 				}
 /* 				r = 0; */
 			}
 		} while ((lli = lli->next) != h);
 	}

 	/* second pass ... set the reordered indexes */
 	mi = i + mr;
 	mr = i = 0;
 	for (s = cur_base->section_list ; s ; s = s->next) {
 		h = lli = ((section_t *)(s->data))->itm_list;
 		if (!lli) {
 			continue;
 		}
 		do {
 			if (lli->data_type & DT_RANGE) {
 				i += mr;
 				mr = 0;
 				i = lli->idx + ((range_item_t *)(lli->data))->length + 1;
 #warning check
 			} else if ((lli->data_type & DT_WEIGHTED) && !(s->data_type & DT_REORDER)) {
 				i += mr;
 				mr = 0;
 				w = (weighted_item_t *)(lli->data);
 				if (find_wi_index_reordered(w->symbol) /* reordered symbol skipped on first pass */
 #if 0
 					|| (s->data_type & DT_REORDER) /* or in a reordered section */
 #endif
 					) {
 					assert(!(s->data_type & DT_REORDER));
 					index2weight_buffer[index2weight_len] = lli->idx = ++i;
 					index2weight_len += index2weight_len_inc;
 					add_final_col_index(w->symbol);

 /* 					fprintf(stdout, "%11s: r %6d %6d %s\n", */
 /* 							cur_base->name, lli->idx, final_index_val(w->symbol), w->symbol); */

 					continue;
 				}
 				i = lli->idx;

 /* 				fprintf(stdout, "%11s: w %6d %6d %s\n", */
 /* 						cur_base->name, lli->idx, final_index_val(w->symbol), w->symbol); */

 			} else {
 /* 				verbose_msg(VDETAIL, "section: %s  %d  %d\n", ((section_t *)(s->data))->name, */
 /* 						s->data_type, lli->data_type); */
 /* 					assert(!(s->data_type & DT_REORDER)); */
 /* 				assert(lli->data_type & DT_REORDER); */
 #if 1
 				if (s->data_type & DT_REORDER) {
 					h2 = l2 = lli;
 					if (!h2) {
 						continue;
 					}
 				} else {
 					assert(s->data_type & DT_SECTION);
 					h2 = l2 = ((section_t *)(lli->data))->itm_list;
 					if (!h2) {
 						continue;
 					}
 				}


 #else
 				h2 = l2 = ((section_t *)(lli->data))->itm_list;
 				if (!h2) {
 					continue;
 				}
 #endif
 				r = 0;
 				do {
 					assert(l2->data_type & DT_WEIGHTED);
 					++r;
 					l2->idx = i + r;

 /* 					fprintf(stdout, "%s: R %6d        %s\n", */
 /* 							((section_t *)(lli->data))->name, l2->idx, ((weighted_item_t *)(l2->data))->symbol); */

 				} while ((l2 = l2->next) != h2);
 				if (r > mr) {
 					mr = r;
 				}
 			}
 		} while ((lli = lli->next) != h);
 	}

 	/* finally, walk through all derived locales and set non-reordered section items */
 	mr = mi;
 	for (cli = cur_base->derived_list ; cli ; cli = cli->next) {
 		cl = (col_locale_t *)(cli->data);
 /* 		verbose_msg(VDETAIL, "pass3: %d  %s\n", cli->data_type, cl->name); */

 /* 		fprintf(stdout, "pass3: %d  %s\n", cli->data_type, cl->name); */

 		assert(cli->data_type == DT_COL_LOCALE);

 		i = mi;
 		for (s = cl->section_list ; s ; s = s->next) {
 /* 			if (s->data_type & DT_REORDER) { */
 /* 				continue; */
 /* 			} */
 			h = lli = ((section_t *)(s->data))->itm_list;
 			if (!lli) {
 				continue;
 			}
 			do {
 				assert(!(lli->data_type & DT_RANGE));
 				if (lli->data_type & DT_WEIGHTED) {
 /* 					verbose_msg(VDETAIL, "     %d %d %s\n", lli->data_type, lli->idx, ((weighted_item_t *)(lli->data))->symbol); */
 					add_final_col_index(((weighted_item_t *)(lli->data))->symbol);
 					if (s->data_type & DT_REORDER) {
 						continue;
 					}
 					assert(lli->idx == INT_MIN);
 					lli->idx = ++i;

 /* 					fprintf(stdout, "%11s: S %6d %6d %s\n", */
 /* 							cl->name, lli->idx, */
 /* 							final_index_val(((weighted_item_t *)(lli->data))->symbol), */
 /* 							((weighted_item_t *)(lli->data))->symbol); */

 				} else {
 					assert(0);
 					assert(lli->data_type & DT_SECTION);

 					h2 = l2 = ((section_t *)(lli->data))->itm_list;
 					if (!h2) {
 						continue;
 					}
 					do {
 						assert(l2->data_type & DT_WEIGHTED);
 						assert(l2->idx == INT_MIN);
 						l2->idx = ++i;
 						add_final_col_index(((weighted_item_t *)(l2->data))->symbol);
 					} while ((l2 = l2->next) != h2);
 				}
 			} while ((lli = lli->next) != h);
 		}
 		if (i > mr) {
 			mr = i;
 		}
 	}
 	max_weight = mr;

 	assert(num_varying == tnumnodes(cur_base->root_wi_index_reordered));

 	/* we can now initialize the wcs2index array */
 	{
 		ENTRY *p;
 		ENTRY e;
 		char buf[8];
 		static const char xd[] = "0123456789ABCDEF";
 		int starter_index = final_index;
 		int wcs2index_count = 0;

 		strcpy(buf, "<U....>");
 		memset(wcs2index, 0, sizeof(wcs2index));
 		e.key = (char *) buf;
 		for (i=1 ; i <= 0xffff ; i++) {
 			buf[5] = xd[ i & 0xf ];
 			buf[4] = xd[ (i >> 4) & 0xf ];
 			buf[3] = xd[ (i >> 8) & 0xf ];
 			buf[2] = xd[ (i >> 12) & 0xf ];

 			if ((p = hsearch(e, FIND)) != NULL) {
 				++wcs2index_count;
 				if ((tfind(buf, &cur_base->root_starter_char, sym_cmp)) != NULL) {
 					wcs2index[i] = ++starter_index;
 /* 					verbose_msg(VDETAIL, "wcs2index[ %#06x ] = %d  (starter)\n", i, wcs2index[i]); */
 				} else {
 					wcs2index[i] = (int)(p->data);
 /* 					verbose_msg(VDETAIL, "wcs2index[ %#06x ] = %d\n", i, wcs2index[i]); */
 				}
 			} else {
 				if ((tfind(buf, &cur_base->root_starter_char, sym_cmp)) != NULL) {
 					error_msg("marked starter but not in hash: %s", buf);
 				}
 			}
 		}


 	/* ---------------------------------------------------------------------- */
 		{
 			int i, n;
 			table_data table;
 			size_t t, smallest;

 			n = 0;
 			smallest = SIZE_MAX;
 			table.ii = NULL;
 			for (i=0 ; i < 14 ; i++) {
 				if ((RANGE >> i) < 4) {
 					break;
 				}
 				t = newopt(wcs2index, RANGE, i, &table);
 				if (smallest >= t) {
 					n = i;
 					smallest = t;
 					/*  			} else { */
 					/*  				break; */
 				}
 			}


 /* 			printf("smallest = %u  for range %#x (%u)\n", smallest, RANGE, RANGE); */
 			assert(smallest != SIZE_MAX);
 			if (smallest + wcs2colidt_len >= WCS2COLIDT_LEN) {
 				error_msg("WCS2COLIDT_LEN too small");
 			}
 			base_locale_array[base_locale_len].wcs2colidt_offset = wcs2colidt_len;
 			table.ii = wcs2colidt_buffer + wcs2colidt_len;
 			t = smallest;
 			smallest = SIZE_MAX;
 			smallest = newopt(wcs2index, RANGE, n, &table);
 			assert(t == smallest);
 			wcs2colidt_len += smallest;
 /* 			verbose_msg(VDETAIL, "smallest = %d   wcs2colidt_len = %d\n", smallest, wcs2colidt_len); */

 #if 0
 			{
 				unsigned int sc, n, i0, i1;
 				unsigned int u = 0xe40;
 				table_data *tbl = &table;

 #define __LOCALE_DATA_WCctype_TI_MASK ((1 << tbl->ti_shift)-1)
 #define __LOCALE_DATA_WCctype_TI_SHIFT (tbl->ti_shift)
 #define __LOCALE_DATA_WCctype_TI_LEN (tbl->ti_len)
 #define __LOCALE_DATA_WCctype_II_MASK ((1 << tbl->ii_shift)-1)
 #define __LOCALE_DATA_WCctype_II_SHIFT (tbl->ii_shift)
 #define __LOCALE_DATA_WCctype_II_LEN (tbl->ii_len)

 				sc = u & __LOCALE_DATA_WCctype_TI_MASK;
 				u >>= __LOCALE_DATA_WCctype_TI_SHIFT;
 				n = u & __LOCALE_DATA_WCctype_II_MASK;
 				u >>= __LOCALE_DATA_WCctype_II_SHIFT;

 				i0 = tbl->ii[u];
 				verbose_msg(VDETAIL, "i0 = %d\n", i0);
 				i0 <<= __LOCALE_DATA_WCctype_II_SHIFT;
 				i1 = tbl->ii[__LOCALE_DATA_WCctype_II_LEN + i0 + n];
 				/* 	i1 = tbl->ti[i0 + n]; */
 				verbose_msg(VDETAIL, "i1 = %d\n", i1);
 				i1 <<= __LOCALE_DATA_WCctype_TI_SHIFT;
 				/* 	return *(uint16_t *)(&(tbl->ii[__LOCALE_DATA_WCctype_II_LEN + __LOCALE_DATA_WCctype_TI_LEN + i1 + sc])); */
 				verbose_msg(VDETAIL, "i2 = %d\n", __LOCALE_DATA_WCctype_II_LEN + __LOCALE_DATA_WCctype_TI_LEN + i1 + sc);
 				verbose_msg(VDETAIL, "val = %d\n",  tbl->ii[__LOCALE_DATA_WCctype_II_LEN + __LOCALE_DATA_WCctype_TI_LEN + i1 + sc]);
 				/* 	return tbl->ut[i1 + sc]; */


 			}
 #endif
 			base_locale_array[base_locale_len].ii_shift = table.ii_shift;
 			base_locale_array[base_locale_len].ti_shift = table.ti_shift;
 			base_locale_array[base_locale_len].ii_len = table.ii_len;
 			base_locale_array[base_locale_len].ti_len = table.ti_len;
 		}
 	/* ---------------------------------------------------------------------- */

 		base_locale_array[base_locale_len].num_col_base = num_invariant + num_varying;
 		base_locale_array[base_locale_len].max_col_index = final_index;
 		base_locale_array[base_locale_len].max_weight = max_weight;

 		verbose_msg(VDETAIL, "%s: %6u invariant  %6u varying  %6u derived  %6u total  %6u max weight  %6u wcs2\n",
 				cur_base->name, num_invariant, num_varying,
 				tnumnodes(cur_base->root_derived_wi), final_index, max_weight,
 				wcs2index_count);

 	}

 #if 1
 	/* ok, now we need to dump out the base and derived tables... */
 	/* don't forget to break up collating elements!!! */

 /* 	fprintf(stdout, "**************************************************\n"); */
 	/* first pass ... set the invariants */
 	for (s = cur_base->section_list ; s ; s = s->next) {
 #if 1
 		if (s->data_type & DT_REORDER) {
 			verbose_msg(VDETAIL, "1: skipping reordered section %s\n", ((section_t *)(s->data))->name);
 			continue;
 		}
 #endif
 		h = lli = ((section_t *)(s->data))->itm_list;
 		if (!lli) {
 			continue;
 		}
 		do {
 			if (lli->data_type & DT_WEIGHTED) {
 				w = (weighted_item_t *)(lli->data);
 				if (find_wi_index_reordered(w->symbol)) { /* reordered symbol so skip on first pass */
 					continue;
 				}
 				if (index2weight_len_inc) {
 					index2ruleidx_buffer[index2ruleidx_len++] =
 						add_rule((weighted_item_t *)(lli->data));
 				}
 /* 				fprintf(stdout, "%11s: w %6d %6d %s\n", */
 /* 						cur_base->name, lli->idx, final_index_val(w->symbol), w->symbol); */
 			}
 		} while ((lli = lli->next) != h);
 	}

 	/* second pass ... set varying */
 	for (s = cur_base->section_list ; s ; s = s->next) {
 #if 1
 		if (s->data_type & DT_REORDER) {
 			verbose_msg(VDETAIL, "2: skipping reordered section %s\n", ((section_t *)(s->data))->name);
 			continue;
 		}
 #endif
 		h = lli = ((section_t *)(s->data))->itm_list;
 		if (!lli) {
 			continue;
 		}
 		do {
 			if (lli->data_type & DT_WEIGHTED) {
 				w = (weighted_item_t *)(lli->data);
 				if (find_wi_index_reordered(w->symbol)) { /* reordered symbol so skip on first pass */
 					if (index2weight_len_inc) {
 						index2ruleidx_buffer[index2ruleidx_len++] =
 							add_rule((weighted_item_t *)(lli->data));
 					}
 /* 					fprintf(stdout, "%11s: r %6d %6d %s\n", */
 /* 							cur_base->name, lli->idx, final_index_val(w->symbol), w->symbol); */
 					continue;
 				}
 			}
 		} while ((lli = lli->next) != h);
 	}

 	do_starter_lists(cur_base);


 /* 	verbose_msg(VDETAIL,"updated final_index = %d\n", final_index); */

 	if (rli) {
 		base_locale_array[base_locale_len].range_low
 			= strtoul(((range_item_t *)(rli->data))->symbol1 + 2, NULL, 16);
 		base_locale_array[base_locale_len].range_count
 			= ((range_item_t *)(rli->data))->length;
 		base_locale_array[base_locale_len].range_base_weight = rli->idx;
 		base_locale_array[base_locale_len].range_rule_offset = add_range_rule((range_item_t *)(rli->data));
 /* 		fprintf(stdout, "%11s:   %6d %6d %s %s (%d)\n", */
 /* 				"RANGE", rli->idx, -1, */
 /* 				((range_item_t *)(rli->data))->symbol1, */
 /* 				((range_item_t *)(rli->data))->symbol2, */
 /* 				((range_item_t *)(rli->data))->length); */
 	}

 /* 	fprintf(stdout,"\nDerived\n\n"); */

 	/* first, if base name is of the form ll_CC, add a derived locale for it */
 	if ((strlen(cur_base->name) == 5)
 		&& islower(cur_base->name[0])
 		&& islower(cur_base->name[1])
 		&& (cur_base->name[2] == '_')
 		&& isupper(cur_base->name[3])
 		&& isupper(cur_base->name[4])
 		) {

 		verbose_msg(VDETAIL, "adding special derived for %s\n", cur_base->name);
 /* 	verbose_msg(VDETAIL,"updated final_index = %d\n", final_index); */


 		assert(der_locale_len+1 < DER_LOCALE_LEN);

 		der_locale_array[der_locale_len].name = cur_base->name;
 		der_locale_array[der_locale_len].base_idx = base_locale_len;

 		u16_buf[0] = 1;
 		u16_buf[1] = 0;
 		u16_buf_len = 2;

 		mm = NULL;
 		if ((u16_buf_len > override_len) ||
 			!(mm = memmem(override_buffer, override_len*sizeof(override_buffer[0]),
 						  u16_buf, u16_buf_len*sizeof(u16_buf[0])))
 			) {
 			assert(override_len + u16_buf_len < OVERRIDE_LEN);
 			memcpy(override_buffer + override_len, u16_buf, u16_buf_len*sizeof(u16_buf[0]));
 			der_locale_array[der_locale_len].overrides_offset = override_len;
 			override_len += u16_buf_len;
 /* 			printf("%s: override_len = %d   u16_buf_len = %d\n", cl->name, override_len, u16_buf_len); */
 		} else if (!(u16_buf_len > override_len)) {
 			assert(mm);
 			der_locale_array[der_locale_len].overrides_offset = ((uint16_t *)(mm)) - override_buffer;
 /* 			printf("%s: memmem found a match with u16_buf_len = %d\n", cl->name, u16_buf_len); */
 		}
 		der_locale_array[der_locale_len].multistart_offset
 			= base_locale_array[base_locale_len].multistart_offset;
 		der_locale_array[der_locale_len].undefined_idx = final_index_val0("UNDEFINED");

 		if (!der_locale_array[der_locale_len].undefined_idx) {
 			error_msg("no UNDEFINED definition for %s", cur_base->name);
 		}

 		++der_locale_len;
 	} else {
 		verbose_msg(VDETAIL, "NOT adding special derived for %s\n", cur_base->name);
 	}

 	/* now all the derived... */
 	for (cli = cur_base->derived_list ; cli ; cli = cli->next) {
 		cl = (col_locale_t *)(cli->data);
 		assert(cli->data_type == DT_COL_LOCALE);

 		assert(der_locale_len+1 < DER_LOCALE_LEN);

 		der_locale_array[der_locale_len].name = cl->name;
 		der_locale_array[der_locale_len].base_idx = base_locale_len;

 		u16_buf_len = 0;

 		for (i = 0 ; i < 2 ; i++) {
 			if (i) {
 /* 				fprintf(stdout, "   section --- (singles)\n"); */
 				u16_buf[u16_buf_len++] = 1;	/* single */
 			}
 			/* we do this in two passes... first all sequences, then all single reorders */
 			for (s = cl->section_list ; s ; s = s->next) {
 /* 				verbose_msg(VDETAIL, "doing section %s\n", ((section_t *)(s->data))->name); */
 				h = lli = ((section_t *)(s->data))->itm_list;
 				if (!lli) {
 /* 					fprintf(stdout, "EMPTY ITEM LIST IN SECTION %s\n", ((section_t *)(s->data))->name ); */
 					continue;
 				}
 				assert(u16_buf_len +4 < sizeof(u16_buf)/sizeof(u16_buf[0]));
 				if ((!i && (ll_len(h) > 1) ) || (ll_len(h) == i)) {
 					if (!i) {
 /* 						fprintf(stdout, "   section ----------------- %d %d\n", i, ll_len(h)); */
 						u16_buf[u16_buf_len++] = ll_len(h);	/* multi */
 						assert(lli->data_type & DT_WEIGHTED);
 #if 0
 						u16_buf[u16_buf_len++] = final_index_val(((weighted_item_t *)(lli->data))->symbol);	/* start index */
 #endif
 						u16_buf[u16_buf_len++] = lli->idx; /* start weight */
 					}
 					do {
 						assert(lli->data_type & DT_WEIGHTED);
 						if (lli->data_type & DT_WEIGHTED) {
 /* 							fprintf(stdout, "%11s: S %6d %6d %s\n", */
 /* 									cl->name, lli->idx, */
 /* 									final_index_val(((weighted_item_t *)(lli->data))->symbol), */
 /* 									((weighted_item_t *)(lli->data))->symbol); */
 #if 0
 							if (i) {
 								assert(u16_buf_len +4 < sizeof(u16_buf)/sizeof(u16_buf[0]));
 								u16_buf[u16_buf_len++] = final_index_val(((weighted_item_t *)(lli->data))->symbol);
 								assert(u16_buf[u16_buf_len-1]);
 								u16_buf[u16_buf_len++] = lli->idx; /* weight */
 							}
 #else
 							assert(u16_buf_len +4 < sizeof(u16_buf)/sizeof(u16_buf[0]));
 							u16_buf[u16_buf_len++] = final_index_val(((weighted_item_t *)(lli->data))->symbol);
 							assert(u16_buf[u16_buf_len-1]);
 							if (i) {
 								u16_buf[u16_buf_len++] = lli->idx; /* weight */
 							}
 #endif
 							u16_buf[u16_buf_len++] = add_rule((weighted_item_t *)(lli->data));

 						}
 					} while ((lli = lli->next) != h);
 				}
 			}
 		}
 		u16_buf[u16_buf_len++] = 0;

 		mm = NULL;
 		if ((u16_buf_len > override_len) ||
 			!(mm = memmem(override_buffer, override_len*sizeof(override_buffer[0]),
 						  u16_buf, u16_buf_len*sizeof(u16_buf[0])))
 			) {
 			assert(override_len + u16_buf_len < OVERRIDE_LEN);
 			memcpy(override_buffer + override_len, u16_buf, u16_buf_len*sizeof(u16_buf[0]));
 			der_locale_array[der_locale_len].overrides_offset = override_len;
 			override_len += u16_buf_len;
 /* 			printf("%s: override_len = %d   u16_buf_len = %d\n", cl->name, override_len, u16_buf_len); */
 		} else if (!(u16_buf_len > override_len)) {
 			assert(mm);
 			der_locale_array[der_locale_len].overrides_offset = ((uint16_t *)(mm)) - override_buffer;
 /* 			printf("%s: memmem found a match with u16_buf_len = %d\n", cl->name, u16_buf_len); */
 		}

 		do_starter_lists(cl);

 		der_locale_array[der_locale_len].undefined_idx = final_index_val0("UNDEFINED");
 #if 0
 		assert(der_locale_array[der_locale_len].undefined_idx);
 		if (!der_locale_array[der_locale_len].undefined_idx) {
 			der_locale_array[der_locale_len].undefined_idx = base_locale_array[base_locale_len].undefined_idx;
 		}
 #endif

 		if (!der_locale_array[der_locale_len].undefined_idx) {
 			error_msg("no UNDEFINED definition for %s", cl->name);
 		}

 		++der_locale_len;
 	}

 #endif

 #warning handle UNDEFINED idx specially?  what if in only some of derived?
 /* 	base_locale_array[base_locale_len].undefined_idx = final_index_val0("UNDEFINED"); */
 	base_locale_array[base_locale_len].undefined_idx = 0;


 	hdestroy();

 	++base_locale_len;

 /* 	if (tnumnodes(cur_base->root_starter_char)) { */
 /* 		verbose_msg(VDETAIL, "starter nodes\n"); */
 /* 		twalk(cur_base->root_starter_char, print_starter_node); */
 /* 	} */
 }

 static int starter_all_cmp(const void *n1, const void *n2)
 {
 	const char *s1 = ((weighted_item_t *) n1)->symbol;
 	const char *s2 = ((weighted_item_t *) n2)->symbol;
 	colitem_t x;
 	colitem_t *p;
 	int n;

 	/* sort by 1st char ... then inverse for string */

 	x.element = NULL;
 	if (!is_ucode(s1)) {
 		x.string = s1;
 		p = tfind(&x, &cur_base->root_colitem, colitem_cmp);
 		s1 = (*((colitem_t **) p))->element + 1;
 	}
 	if (!is_ucode(s2)) {
 		x.string = s2;
 		p = tfind(&x, &cur_base->root_colitem, colitem_cmp);
 		s2 = (*((colitem_t **) p))->element + 1;
 	}

 	/* <U####>< */
 	/* 01234567 */

 	assert(is_ucode(s1));
 	assert(is_ucode(s2));

 	n = strncmp(s1+2, s2+2, 4);
 	if (n) {
 		return n;
 	}

 	s1 += 7;
 	s2 += 7;

 	return strcmp(s2, s1);
 }

 static void print_starter_all_node(const void *ptr, VISIT order, int level)
 {
     const weighted_item_t *w = *(const weighted_item_t **) ptr;
 	colitem_t *ci;
 	void *p;
 	int n;
 	colitem_t x;

     if (order == postorder || order == leaf)  {
 #if 0
 		if ((n = is_ucode(w->symbol)) != 0) {
 			printf(" %s\n", w->symbol);
 		} else {
 			x.string = w->symbol;
 			x.element = NULL;
 			p = tfind(&x, &cur_base->root_colitem, colitem_cmp);
 			assert(p);
 			ci = *((colitem_t **) p);
 			printf("%s = %s\n", ci->element, w->symbol);
 		}
 #else
 		printf("%s|", w->symbol);
 /* 		if ((n = is_ucode(w->symbol)) != 0) { */
 /* 			printf("\n"); */
 /* 		} */
 #endif
 	}
 }

 static void process_starter_node(const void *ptr, VISIT order, int level)
 {
     const weighted_item_t *w = *(const weighted_item_t **) ptr;
 	colitem_t *ci;
 	void *p;
 	int n;
 	colitem_t x;
 	const char *s;
 	char buf[32];

 	/* store index of collation item followed by (unprefixed) nul-terminated string */
     if (order == postorder || order == leaf)  {
 		if ((n = is_ucode(w->symbol)) != 0) {
 			u16_buf[u16_buf_len++] = final_index_val(w->symbol);
 			assert(u16_buf[u16_buf_len-1]);
 			u16_buf[u16_buf_len++] = 0;
 			if (++u16_starter < base_locale_array[base_locale_len].num_starters) {
 				u16_buf[u16_starter] = u16_buf_len;
 			}
 /* 			verbose_msg(VDETAIL, "ucode - %d %d\n", u16_buf[u16_starter-1], u16_buf_len); */
 		} else {
 			x.string = w->symbol;
 			x.element = NULL;
 			p = tfind(&x, &cur_base->root_colitem, colitem_cmp);
 			assert(p);
 			ci = *((colitem_t **) p);
 			s = ci->element;
 			u16_buf[u16_buf_len++] = final_index_val(w->symbol);
 			assert(u16_buf[u16_buf_len-1]);
 			assert(*s == '"');
 			n = is_ucode(++s);
 /* 			verbose_msg(VDETAIL, "s is |%s| with len %d (%d)\n", s, strlen(s), n); */
 			assert(n);
 			s += n;
 			while (*s != '"') {
 				n = is_ucode(s);
 				assert(n);
 				strncpy(buf, s, n+1);
 				buf[n] = 0;
 /* 				verbose_msg(VDETAIL, "buf is |%s| with len %d (%d)\n", buf, strlen(buf), n); */
 				u16_buf[u16_buf_len++] = final_index_val(buf);
 				assert(u16_buf[u16_buf_len-1]);
 				s += n;
 			}
 			u16_buf[u16_buf_len++] = 0;
 		}
 	}
 }

 static void **p_cl_root_starter_all;

 static void complete_starter_node(const void *ptr, VISIT order, int level)
 {
 	weighted_item_t w;
 	weighted_item_t *p;

     if (order == postorder || order == leaf)  {
 		w.symbol = *(const char **) ptr;
 		w.weight = NULL;
 		if (!tfind(&w, p_cl_root_starter_all, starter_all_cmp)) {
 			p = xmalloc(sizeof(weighted_item_t));
 			p->symbol = w.symbol;
 			p->weight = NULL;
 /* 			verbose_msg(VDETAIL, "complete_starter_node: %s\n", *(const char **) ptr); */
 			if (!tsearch(p, p_cl_root_starter_all, starter_all_cmp)) {
 				error_msg("OUT OF MEMORY");
 			}
 		}
     }
 }

 static void do_starter_lists(col_locale_t *cl)
 {
 	ll_item_t *s;
 	ll_item_t *h;
 	ll_item_t *lli;
 	col_locale_t *c;
 	colitem_t *ci;
 	weighted_item_t *w;
 	void *p;
 	char buf[32];
 	int n;
 	colitem_t x;
 	void *mm;

 	c = cl;
 	if (c != cur_base) {
 		c = cur_base;
 	}

 /* 	printf("STARTERS %s --------------------\n", cl->name); */
  LOOP:
 	for (s = c->section_list ; s ; s = s->next) {
 		h = lli = ((section_t *)(s->data))->itm_list;
 		if (!lli) {
 			continue;
 		}
 		do {
 			if (lli->data_type & DT_WEIGHTED) {
 				w = (weighted_item_t *)(lli->data);
 				ci = NULL;
 				if ((n = is_ucode(w->symbol)) != 0) {
 					strcpy(buf, w->symbol);
 				} else {
 /* 					fprintf(stdout, "looking for |%s|\n", w->symbol); */
 					x.string = w->symbol;
 					x.element = NULL;
 					p = tfind(&x, &cur_base->root_colitem, colitem_cmp);
 					if (!p) {
 /* 						verbose_msg(VDETAIL, "Whoa... processing starters for %s and couldn't find %s\n", */
 /* 								cl->name, w->symbol); */
 						continue;
 					}
 					ci = *((colitem_t **) p);
 					if (!ci->element) {	/* just a collating symbol */
 						continue;
 					}
 					assert(ci->element[0] == '"');
 					n = is_ucode(ci->element + 1);
 					assert(n);
 					strncpy(buf, ci->element + 1, n);
 				}
 				if ((tfind(buf, &cur_base->root_starter_char, sym_cmp)) != NULL) {
 /* 					fprintf(stdout, "adding from %s: %s", c->name, w->symbol); */
 /* 					if (ci) { */
 /* 						fprintf(stdout, " = %s", ci->element); */
 /* 					} */
 /* 					fprintf(stdout, "\n"); */

 					if (!tsearch(w, &cl->root_starter_all, starter_all_cmp)) {
 						error_msg("OUT OF MEMORY");
 					}
 				}
 			}
 		} while ((lli = lli->next) != h);
 	}

 	if (c != cl) {
 		c = cl;
 		goto LOOP;
 	}

 	p_cl_root_starter_all = &cl->root_starter_all;
 	twalk(cur_base->root_starter_char, complete_starter_node);

 	if (cl == cur_base) {
 		base_locale_array[base_locale_len].num_starters	= tnumnodes(cur_base->root_starter_char);
 	}

 #if 0
 	printf("\nNow walking tree...\n\n");
 	twalk(cl->root_starter_all, print_starter_all_node);
 	printf("\n\n");

 #endif
 	u16_starter = 0;
 	u16_buf[0] = u16_buf_len = base_locale_array[base_locale_len].num_starters;
 	twalk(cl->root_starter_all, process_starter_node);
 /* 	verbose_msg(VDETAIL, "s=%d n=%d\n", u16_starter,  base_locale_array[base_locale_len].num_starters); */
 	assert(u16_starter == base_locale_array[base_locale_len].num_starters);

 #if 0
 	{ int i;
 	for (i=0 ; i < u16_buf_len ; i++) {
 		verbose_msg(VDETAIL, "starter %2d: %d - %#06x\n", i, u16_buf[i], u16_buf[i]);
 	}}
 #endif

 	mm = NULL;
 	if (u16_buf_len) {
 /* 		assert(base_locale_array[base_locale_len].num_starters); */
 		if ((u16_buf_len > multistart_len) ||
 			!(mm = memmem(multistart_buffer, multistart_len*sizeof(multistart_buffer[0]),
 						  u16_buf, u16_buf_len*sizeof(u16_buf[0])))
 			) {
 			assert(multistart_len + u16_buf_len < MULTISTART_LEN);
 			memcpy(multistart_buffer + multistart_len, u16_buf, u16_buf_len*sizeof(u16_buf[0]));
 			if (cl == cur_base) {
 				base_locale_array[base_locale_len].multistart_offset = multistart_len;
 			} else {
 				der_locale_array[der_locale_len].multistart_offset = multistart_len;
 			}
 			multistart_len += u16_buf_len;
 /* 			verbose_msg(VDETAIL, "%s: multistart_len = %d   u16_buf_len = %d\n", cl->name, multistart_len, u16_buf_len); */
 		} else if (!(u16_buf_len > multistart_len)) {
 			assert(mm);
 			if (cl == cur_base) {
 				base_locale_array[base_locale_len].multistart_offset = ((uint16_t *)(mm)) - multistart_buffer;
 			} else {
 				der_locale_array[der_locale_len].multistart_offset = ((uint16_t *)(mm)) - multistart_buffer;
 			}
 /* 			verbose_msg(VDETAIL, "%s: memmem found a match with u16_buf_len = %d\n", cl->name, u16_buf_len); */
 		}
 	} else {
 		assert(!base_locale_array[base_locale_len].num_starters);
 	}

 /* 	printf("u16_buf_len = %d\n", u16_buf_len); */

 /* 	printf("STARTERS %s DONE ---------------\n", cl->name); */
 }


 /* For sorting the blocks of unsigned chars. */
 static size_t nu_val;

 int nu_memcmp(const void *a, const void *b)
 {
 	return memcmp(*(unsigned char**)a, *(unsigned char**)b, nu_val * sizeof(tbl_item));
 }


 size_t newopt(tbl_item *ut, size_t usize, int shift, table_data *tbl)
 {
 	static int recurse;
 	tbl_item *ti[RANGE];	/* table index */
 	size_t numblocks;
 	size_t blocksize;
 	size_t uniq;
 	size_t i, j;
 	size_t smallest, t;
 	tbl_item *ii_save;
 	int uniqblock[1 << (8*sizeof(tbl_item) - 1)];
 	tbl_item uit[RANGE];
 	int shift2;

 	if (shift > 15) {
 		return SIZE_MAX;
 	}

 	ii_save = NULL;
 	blocksize = 1 << shift;
 	numblocks = usize >> shift;

 	/* init table index */
 	for (i=j=0 ; i < numblocks ; i++) {
 		ti[i] = ut + j;
 		j += blocksize;
 	}

 	/* sort */
 	nu_val = blocksize;
 	qsort(ti, numblocks, sizeof(unsigned char *), nu_memcmp);

 	uniq = 1;
 	uit[(ti[0]-ut)/blocksize] = 0;
 	for (i=1 ; i < numblocks ; i++) {
 		if (memcmp(ti[i-1], ti[i], blocksize*sizeof(tbl_item)) < 0) {
 			if (++uniq > (1 << (8*sizeof(tbl_item) - 1))) {
 				break;
 			}
 			uniqblock[uniq - 1] = i;
 		}
 #if 1
 		else if (memcmp(ti[i-1], ti[i], blocksize*sizeof(tbl_item)) > 0) {
 			printf("bad sort %i!\n", i);
 			abort();
 		}
 #endif
 		uit[(ti[i]-ut)/blocksize] = uniq - 1;
 	}

 	smallest = SIZE_MAX;
 	shift2 = -1;
 	if (uniq <= (1 << (8*sizeof(tbl_item) - 1))) {
 		smallest = numblocks + uniq * blocksize;
 		if (!recurse) {
 			++recurse;
 			for (j=1 ; j < 14 ; j++) {
 				if ((numblocks >> j) < 2) break;
 				if (tbl) {
 					ii_save = tbl->ii;
 					tbl->ii = NULL;
 				}
 				if ((t = newopt(uit, numblocks, j, tbl)) < SIZE_MAX) {
 					t += uniq * blocksize;
 				}
 				if (tbl) {
 					tbl->ii = ii_save;
 				}
 				if (smallest >= t) {
 					shift2 = j;
 					smallest = t;
 /* 					if (!tbl->ii) { */
 /* 						printf("ishift %u  tshift %u  size %u\n", */
 /* 							   shift2, shift, t); */
 /* 					} */
 /*  				} else { */
 /*  					break; */
 				}
 			}
 			--recurse;
 		}
 	} else {
 		return SIZE_MAX;
 	}

 	if (tbl->ii) {
 		if (recurse) {
 			tbl->ii_shift = shift;
 			tbl->ii_len = numblocks;
 			memcpy(tbl->ii, uit, numblocks*sizeof(tbl_item));
 			tbl->ti = tbl->ii + tbl->ii_len;
 			tbl->ti_len = uniq * blocksize;
 			for (i=0 ; i < uniq ; i++) {
 				memcpy(tbl->ti + i * blocksize, ti[uniqblock[i]], blocksize*sizeof(tbl_item));
 			}
 		} else {
 			++recurse;
 /* 			printf("setting ishift %u  tshift %u\n", shift2, shift); */
 			newopt(uit, numblocks, shift2, tbl);
 			--recurse;
 			tbl->ti_shift = shift;
 			tbl->ut_len = uniq * blocksize;
 			tbl->ut = tbl->ti + tbl->ti_len;
 			for (i=0 ; i < uniq ; i++) {
 				memcpy(tbl->ut + i * blocksize, ti[uniqblock[i]], blocksize*sizeof(tbl_item));
 			}
 		}
 	}
 	return smallest;
 }

 static const int rule2val[8] = {
 	-1,
 	(1 << 14),					/* forward */
 	(2 << 14),					/* position */
 	(3 << 14),					/* forward,position */
 	0,							/* backward */
 	-1,
 	-1,
 	-1,
 };


 static int final_index_val_x(const char *s, const char *sym)
 {
 	int r;

 	if (!(r = final_index_val0(s))) {
 		if (!strcmp(s, "IGNORE")) {
 			r = 0;
 		} else if (!strcmp(s, "..") || !strcmp(sym, "RANGE")) {
 			if (*sym == '.') {
 				final_index_val(sym); /* make sure it's known */
 			}
 			r = 0x3fff;
 		} else if (!strcmp(s, ".")) {
 			r = 0x3ffe;
 		} else {
 			error_msg("can't find final index: %s", s);
 		}
 	}
 	return r;
 }

 /* store rule2val in 2 high bits and collation index in lower.
  * for sort strings, store (offset from base) + max colindex as index.
  */
 static unsigned int add_rule(weighted_item_t *wi)
 {
 	weight_t *w = wi->weight;
 	int i, j, r, n;
 	uint16_t rbuf[MAX_COLLATION_WEIGHTS];
 	uint16_t ws_buf[32];
 	void *mm;
 	char buf[32];
 	const char *s;
 	const char *e;

 	for (i=0 ; i < MAX_COLLATION_WEIGHTS ; i++) {
 		rbuf[i] = rule2val[R_FORWARD]; /* set a default to forward-ignore */
 	}

 	if (base_locale_array[base_locale_len].num_weights < w->num_weights) {
 		base_locale_array[base_locale_len].num_weights = w->num_weights;
 	}

 	for (i=0 ; i < w->num_weights ; i++) {
 		assert(rule2val[(int)(w->rule[i])] >= 0);
 		assert(w->colitem[i] && *w->colitem[i]);
 		if (*w->colitem[i] == '"') { /* string... */
 			s = w->colitem[i] + 1;
 			assert(*s == '<');
 			n = 0;
 			do {
 				e = s;
 				do {
 					if (*e == '/') {
 						e += 2;
 						continue;
 					}
 				} while (*e++ != '>');
 				assert(((size_t)(e-s) < sizeof(buf)));
 				memcpy(buf, s, (size_t)(e-s));
 				buf[(size_t)(e-s)] = 0;

 				r = final_index_val_x(buf, wi->symbol);
 				assert(n + 1 < sizeof(ws_buf)/sizeof(ws_buf[0]));
 				ws_buf[n++] = r | rule2val[(int)(w->rule[i])];

 				s = e;
 			} while (*s != '"');
 			ws_buf[n++] = 0;	/* terminator */

 			mm = memmem(weightstr_buffer, weightstr_len*sizeof(weightstr_buffer[0]),
 						ws_buf, n*sizeof(ws_buf[0]));

 			if (!mm) {
 				assert(weightstr_len + n < WEIGHTSTR_LEN);
 				memcpy(weightstr_buffer + weightstr_len, ws_buf, n*sizeof(ws_buf[0]));
 				mm = weightstr_buffer + weightstr_len;
 				weightstr_len += n;
 			}
 			r = (((uint16_t *)(mm)) - weightstr_buffer)
 				+ base_locale_array[base_locale_len].max_col_index + 2;
 			assert(r < (1 << 14));
 			rbuf[i] = r | rule2val[(int)(w->rule[i])];
 		} else {				/* item */
 			r = final_index_val_x(w->colitem[i], wi->symbol);
 			rbuf[i] = r | rule2val[(int)(w->rule[i])];
 		}
 	}

 	for (i=0 ; i < ruletable_len ; i += MAX_COLLATION_WEIGHTS) {
 		if (!memcmp(ruletable_buffer + i, rbuf, MAX_COLLATION_WEIGHTS*sizeof(ruletable_buffer[0]))) {
 			return i/MAX_COLLATION_WEIGHTS;
 		}
 	}

 	memcpy(ruletable_buffer + ruletable_len, rbuf, MAX_COLLATION_WEIGHTS*sizeof(ruletable_buffer[0]));
 	ruletable_len += MAX_COLLATION_WEIGHTS;

 	return  (ruletable_len / MAX_COLLATION_WEIGHTS)-1;
 }

 static unsigned int add_range_rule(range_item_t *ri)
 {
 	weight_t *w = ri->weight;
 	int i, j, r, n;
 	uint16_t rbuf[MAX_COLLATION_WEIGHTS];
 	uint16_t ws_buf[32];
 	void *mm;
 	char buf[32];
 	const char *s;
 	const char *e;

 	for (i=0 ; i < MAX_COLLATION_WEIGHTS ; i++) {
 		rbuf[i] = rule2val[R_FORWARD]; /* set a default to forward-ignore */
 	}

 	if (base_locale_array[base_locale_len].num_weights < w->num_weights) {
 		base_locale_array[base_locale_len].num_weights = w->num_weights;
 	}

 	for (i=0 ; i < w->num_weights ; i++) {
 		assert(rule2val[(int)(w->rule[i])] >= 0);
 		assert(w->colitem[i] && *w->colitem[i]);
 		if (*w->colitem[i] == '"') { /* string... */
 			s = w->colitem[i] + 1;
 			assert(*s == '<');
 			n = 0;
 			do {
 				e = s;
 				do {
 					if (*e == '/') {
 						e += 2;
 						continue;
 					}
 				} while (*e++ != '>');
 				assert(((size_t)(e-s) < sizeof(buf)));
 				memcpy(buf, s, (size_t)(e-s));
 				buf[(size_t)(e-s)] = 0;

 				r = final_index_val_x(buf, "RANGE");
 				assert(n + 1 < sizeof(ws_buf)/sizeof(ws_buf[0]));
 				ws_buf[n++] = r | rule2val[(int)(w->rule[i])];

 				s = e;
 			} while (*s != '"');
 			ws_buf[n++] = 0;	/* terminator */

 			mm = memmem(weightstr_buffer, weightstr_len*sizeof(weightstr_buffer[0]),
 						ws_buf, n*sizeof(ws_buf[0]));

 			if (!mm) {
 				assert(weightstr_len + n < WEIGHTSTR_LEN);
 				memcpy(weightstr_buffer + weightstr_len, ws_buf, n*sizeof(ws_buf[0]));
 				mm = weightstr_buffer + weightstr_len;
 				weightstr_len += n;
 			}
 			r = (((uint16_t *)(mm)) - weightstr_buffer)
 				+ base_locale_array[base_locale_len].max_col_index + 2;
 			assert(r < (1 << 14));
 			rbuf[i] = r | rule2val[(int)(w->rule[i])];
 		} else {				/* item */
 			r = final_index_val_x(w->colitem[i], "RANGE");
 			rbuf[i] = r | rule2val[(int)(w->rule[i])];
 		}
 	}

 	for (i=0 ; i < ruletable_len ; i += MAX_COLLATION_WEIGHTS) {
 		if (!memcmp(ruletable_buffer + i, rbuf, MAX_COLLATION_WEIGHTS*sizeof(ruletable_buffer[0]))) {
 			return i/MAX_COLLATION_WEIGHTS;
 		}
 	}

 	memcpy(ruletable_buffer + ruletable_len, rbuf, MAX_COLLATION_WEIGHTS*sizeof(ruletable_buffer[0]));
 	ruletable_len += MAX_COLLATION_WEIGHTS;

 	return  (ruletable_len / MAX_COLLATION_WEIGHTS)-1;
 }

 #define DUMPn(X) fprintf(stderr, "%10d-%-.20s", base_locale_array[n]. X, #X);

 static void dump_base_locale(int n)
 {
 	assert(n < base_locale_len);

 	fprintf(stderr, "Base Locale: %s\n", base_locale_array[n].name);

 	DUMPn(num_weights);

 	DUMPn(ii_shift);
 	DUMPn(ti_shift);
 	DUMPn(ii_len);
 	DUMPn(ti_len);
 	DUMPn(max_weight);
 	fprintf(stderr, "\n");
 	DUMPn(num_col_base);
 	DUMPn(max_col_index);
 	DUMPn(undefined_idx);
 	DUMPn(range_low);
 	DUMPn(range_count);
 	fprintf(stderr, "\n");
 	DUMPn(range_base_weight);
 	DUMPn(num_starters);

 	fprintf(stderr, "\n");
 	DUMPn(range_rule_offset);
 	DUMPn(wcs2colidt_offset);
 	DUMPn(index2weight_offset);
 	fprintf(stderr, "\n");
 	DUMPn(index2ruleidx_offset);
 	DUMPn(multistart_offset);
 	fprintf(stderr, "\n");
 }

 #undef DUMPn
 #define DUMPn(X) fprintf(stderr, "%10d-%s", der_locale_array[n]. X, #X);

 static void dump_der_locale(int n)
 {
 	assert(n < der_locale_len);

 	fprintf(stderr, "Derived Locale: %s (%.12s)",
 			der_locale_array[n].name,
 			base_locale_array[der_locale_array[n].base_idx].name);


 	DUMPn(base_idx);

 	DUMPn(undefined_idx);

 	DUMPn(overrides_offset);
 	DUMPn(multistart_offset);

 	fprintf(stderr, "\n");
 }


 static unsigned long collate_pos;

 static void dump_u16_array(FILE *fp, uint16_t *u, int len, const char *name)
 {
 	int i;

 	fprintf(fp, "\t/* %8lu %s */\n", collate_pos, name);
 	for (i=0 ; i < len ; i++) {
 		if (!(i & 7)) {
 			fprintf(fp, "\n\t");
 		}
 		fprintf(fp,"  %#06x,", (unsigned int)(u[i]));
 	}
 	fprintf(fp,"\n");
 	collate_pos += len;
 }

 #define OUT_U16C(X,N) fprintf(fp,"\t%10d, /* %8lu %s */\n", X, collate_pos++, N);

 static void dump_collate(FILE *fp)
 {
 	int n;

 	fprintf(fp, "const uint16_t __locale_collate_tbl[] = {\n");

 	OUT_U16C(base_locale_len, "numbef of base locales");
 	OUT_U16C(der_locale_len, "number of derived locales");
 	OUT_U16C(MAX_COLLATION_WEIGHTS, "max collation weights");
 	OUT_U16C(index2weight_len, "number of index2{weight|ruleidx} elements");
 	OUT_U16C(weightstr_len, "number of weightstr elements");
 	OUT_U16C(multistart_len, "number of multistart elements");
 	OUT_U16C(override_len, "number of override elements");
 	OUT_U16C(ruletable_len, "number of ruletable elements");

 #undef DUMPn
 #define DUMPn(X) fprintf(fp, "\t%10d, /* %8lu %s */\n", base_locale_array[n]. X, collate_pos++, #X);
 	for (n=0 ; n < base_locale_len ; n++) {
 		unsigned wcs2colidt_offset_low = base_locale_array[n].wcs2colidt_offset & 0xffffU;
 		unsigned wcs2colidt_offset_hi = base_locale_array[n].wcs2colidt_offset >> 16;
 		fprintf(fp, "\t/* Base Locale %2d: %s */\n", n, base_locale_array[n].name);
 		DUMPn(num_weights);
 		DUMPn(num_starters);
 		DUMPn(ii_shift);
 		DUMPn(ti_shift);
 		DUMPn(ii_len);
 		DUMPn(ti_len);
 		DUMPn(max_weight);
 		DUMPn(num_col_base);
 		DUMPn(max_col_index);
 		DUMPn(undefined_idx);
 		DUMPn(range_low);
 		DUMPn(range_count);
 		DUMPn(range_base_weight);
 		DUMPn(range_rule_offset);
 		DUMPn(index2weight_offset);
 		DUMPn(index2ruleidx_offset);
 		DUMPn(multistart_offset);
 #undef DUMPn
 #define DUMPn(X) fprintf(fp, "\t%10d, /* %8lu %s */\n", X, collate_pos++, #X);
 		DUMPn(wcs2colidt_offset_low);
 		DUMPn(wcs2colidt_offset_hi);
 	}
 #undef DUMPn


 	fprintf(fp, "#define COL_IDX_C     %5d\n", 0);
 #define DUMPn(X) fprintf(fp, "\t%10d, /* %8lu %s */\n", der_locale_array[n]. X, collate_pos++, #X);
 	for (n=0 ; n < der_locale_len ; n++) {
 		fprintf(fp, "#define COL_IDX_%s %5d\n", der_locale_array[n].name, n+1);
 		fprintf(fp, "\t/* Derived Locale %4d: %s (%.12s) */\n",
 				n, der_locale_array[n].name,
 				base_locale_array[der_locale_array[n].base_idx].name);
 		DUMPn(base_idx);
 		DUMPn(undefined_idx);
 		DUMPn(overrides_offset);
 		DUMPn(multistart_offset);
 	}
 #undef DUMPn

 	fprintf(fp, "\n");

 	dump_u16_array(fp, index2weight_buffer, index2weight_len, "index2weight");
 	dump_u16_array(fp, index2ruleidx_buffer, index2ruleidx_len, "index2ruleidx");
 	dump_u16_array(fp, multistart_buffer, multistart_len, "multistart");
 	dump_u16_array(fp, override_buffer, override_len, "override");
 	dump_u16_array(fp, ruletable_buffer, ruletable_len, "ruletable");
 	dump_u16_array(fp, weightstr_buffer, weightstr_len, "weightstr");
 	dump_u16_array(fp, wcs2colidt_buffer, wcs2colidt_len, "wcs2colidt");


 	fprintf(fp,"}; /* %8lu */\n", collate_pos);

 	fprintf(fp,"#define __lc_collate_data_LEN  %lu\n\n", collate_pos);
 }