Blame - ap/libc/glibc/glibc-2.22/locale/programs/linereader.c - T106_DC

blob: 2e051301616e7821e31c6ba614af265bc1f01843 [file] [log] [blame]

lh	9ed821d	2023-04-07 01:36:19 -0700	[diff] [blame]	1	/* Copyright (C) 1996-2015 Free Software Foundation, Inc.
				2	This file is part of the GNU C Library.
				3	Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
				4
				5	This program is free software; you can redistribute it and/or modify
				6	it under the terms of the GNU General Public License as published
				7	by the Free Software Foundation; version 2 of the License, or
				8	(at your option) any later version.
				9
				10	This program is distributed in the hope that it will be useful,
				11	but WITHOUT ANY WARRANTY; without even the implied warranty of
				12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				13	GNU General Public License for more details.
				14
				15	You should have received a copy of the GNU General Public License
				16	along with this program; if not, see <http://www.gnu.org/licenses/>. */
				17
				18	#ifdef HAVE_CONFIG_H
				19	# include <config.h>
				20	#endif
				21
				22	#include <assert.h>
				23	#include <ctype.h>
				24	#include <errno.h>
				25	#include <libintl.h>
				26	#include <stdarg.h>
				27	#include <stdlib.h>
				28	#include <string.h>
				29	#include <stdint.h>
				30
				31	#include "localedef.h"
				32	#include "charmap.h"
				33	#include "error.h"
				34	#include "linereader.h"
				35	#include "locfile.h"
				36
				37	/* Prototypes for local functions. */
				38	static struct token get_toplvl_escape (struct linereader lr);
				39	static struct token get_symname (struct linereader lr);
				40	static struct token get_ident (struct linereader lr);
				41	static struct token get_string (struct linereader lr,
				42	const struct charmap_t *charmap,
				43	struct localedef_t *locale,
				44	const struct repertoire_t *repertoire,
				45	int verbose);
				46
				47
				48	struct linereader *
				49	lr_open (const char *fname, kw_hash_fct_t hf)
				50	{
				51	FILE *fp;
				52
				53	if (fname == NULL \|\| strcmp (fname, "-") == 0
				54	\|\| strcmp (fname, "/dev/stdin") == 0)
				55	return lr_create (stdin, "<stdin>", hf);
				56	else
				57	{
				58	fp = fopen (fname, "rm");
				59	if (fp == NULL)
				60	return NULL;
				61	return lr_create (fp, fname, hf);
				62	}
				63	}
				64
				65	struct linereader *
				66	lr_create (FILE fp, const char fname, kw_hash_fct_t hf)
				67	{
				68	struct linereader *result;
				69	int n;
				70
				71	result = (struct linereader ) xmalloc (sizeof (result));
				72
				73	result->fp = fp;
				74	result->fname = xstrdup (fname);
				75	result->buf = NULL;
				76	result->bufsize = 0;
				77	result->lineno = 1;
				78	result->idx = 0;
				79	result->comment_char = '#';
				80	result->escape_char = '\\';
				81	result->translate_strings = 1;
				82	result->return_widestr = 0;
				83
				84	n = getdelim (&result->buf, &result->bufsize, '\n', result->fp);
				85	if (n < 0)
				86	{
				87	int save = errno;
				88	fclose (result->fp);
				89	free ((char *) result->fname);
				90	free (result);
				91	errno = save;
				92	return NULL;
				93	}
				94
				95	if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n')
				96	n -= 2;
				97
				98	result->buf[n] = '\0';
				99	result->bufact = n;
				100	result->hash_fct = hf;
				101
				102	return result;
				103	}
				104
				105
				106	int
				107	lr_eof (struct linereader *lr)
				108	{
				109	return lr->bufact = 0;
				110	}
				111
				112
				113	void
				114	lr_ignore_rest (struct linereader *lr, int verbose)
				115	{
				116	if (verbose)
				117	{
				118	while (isspace (lr->buf[lr->idx]) && lr->buf[lr->idx] != '\n'
				119	&& lr->buf[lr->idx] != lr->comment_char)
				120	if (lr->buf[lr->idx] == '\0')
				121	{
				122	if (lr_next (lr) < 0)
				123	return;
				124	}
				125	else
				126	++lr->idx;
				127
				128	if (lr->buf[lr->idx] != '\n' && ! feof (lr->fp)
				129	&& lr->buf[lr->idx] != lr->comment_char)
				130	lr_error (lr, _("trailing garbage at end of line"));
				131	}
				132
				133	/* Ignore continued line. */
				134	while (lr->bufact > 0 && lr->buf[lr->bufact - 1] != '\n')
				135	if (lr_next (lr) < 0)
				136	break;
				137
				138	lr->idx = lr->bufact;
				139	}
				140
				141
				142	void
				143	lr_close (struct linereader *lr)
				144	{
				145	fclose (lr->fp);
				146	free (lr->buf);
				147	free (lr);
				148	}
				149
				150
				151	int
				152	lr_next (struct linereader *lr)
				153	{
				154	int n;
				155
				156	n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp);
				157	if (n < 0)
				158	return -1;
				159
				160	++lr->lineno;
				161
				162	if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n')
				163	{
				164	#if 0
				165	/* XXX Is this correct? */
				166	/* An escaped newline character is substituted with a single <SP>. */
				167	--n;
				168	lr->buf[n - 1] = ' ';
				169	#else
				170	n -= 2;
				171	#endif
				172	}
				173
				174	lr->buf[n] = '\0';
				175	lr->bufact = n;
				176	lr->idx = 0;
				177
				178	return 0;
				179	}
				180
				181
				182	/* Defined in error.c. */
				183	/* This variable is incremented each time `error' is called. */
				184	extern unsigned int error_message_count;
				185
				186	/* The calling program should define program_name and set it to the
				187	name of the executing program. */
				188	extern char *program_name;
				189
				190
				191	struct token *
				192	lr_token (struct linereader lr, const struct charmap_t charmap,
				193	struct localedef_t locale, const struct repertoire_t repertoire,
				194	int verbose)
				195	{
				196	int ch;
				197
				198	while (1)
				199	{
				200	do
				201	{
				202	ch = lr_getc (lr);
				203
				204	if (ch == EOF)
				205	{
				206	lr->token.tok = tok_eof;
				207	return &lr->token;
				208	};
				209
				210	if (ch == '\n')
				211	{
				212	lr->token.tok = tok_eol;
				213	return &lr->token;
				214	}
				215	}
				216	while (isspace (ch));
				217
				218	if (ch != lr->comment_char)
				219	break;
				220
				221	/* Is there an newline at the end of the buffer? */
				222	if (lr->buf[lr->bufact - 1] != '\n')
				223	{
				224	/* No. Some people want this to mean that only the line in
				225	the file not the logical, concatenated line is ignored.
				226	Let's try this. */
				227	lr->idx = lr->bufact;
				228	continue;
				229	}
				230
				231	/* Ignore rest of line. */
				232	lr_ignore_rest (lr, 0);
				233	lr->token.tok = tok_eol;
				234	return &lr->token;
				235	}
				236
				237	/* Match escape sequences. */
				238	if (ch == lr->escape_char)
				239	return get_toplvl_escape (lr);
				240
				241	/* Match ellipsis. */
				242	if (ch == '.')
				243	{
				244	if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0)
				245	{
				246	int cnt;
				247	for (cnt = 0; cnt < 10; ++cnt)
				248	lr_getc (lr);
				249	lr->token.tok = tok_ellipsis4_2;
				250	return &lr->token;
				251	}
				252	if (strncmp (&lr->buf[lr->idx], "...", 3) == 0)
				253	{
				254	lr_getc (lr);
				255	lr_getc (lr);
				256	lr_getc (lr);
				257	lr->token.tok = tok_ellipsis4;
				258	return &lr->token;
				259	}
				260	if (strncmp (&lr->buf[lr->idx], "..", 2) == 0)
				261	{
				262	lr_getc (lr);
				263	lr_getc (lr);
				264	lr->token.tok = tok_ellipsis3;
				265	return &lr->token;
				266	}
				267	if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0)
				268	{
				269	int cnt;
				270	for (cnt = 0; cnt < 6; ++cnt)
				271	lr_getc (lr);
				272	lr->token.tok = tok_ellipsis2_2;
				273	return &lr->token;
				274	}
				275	if (lr->buf[lr->idx] == '.')
				276	{
				277	lr_getc (lr);
				278	lr->token.tok = tok_ellipsis2;
				279	return &lr->token;
				280	}
				281	}
				282
				283	switch (ch)
				284	{
				285	case '<':
				286	return get_symname (lr);
				287
				288	case '0' ... '9':
				289	lr->token.tok = tok_number;
				290	lr->token.val.num = ch - '0';
				291
				292	while (isdigit (ch = lr_getc (lr)))
				293	{
				294	lr->token.val.num *= 10;
				295	lr->token.val.num += ch - '0';
				296	}
				297	if (isalpha (ch))
				298	lr_error (lr, _("garbage at end of number"));
				299	lr_ungetn (lr, 1);
				300
				301	return &lr->token;
				302
				303	case ';':
				304	lr->token.tok = tok_semicolon;
				305	return &lr->token;
				306
				307	case ',':
				308	lr->token.tok = tok_comma;
				309	return &lr->token;
				310
				311	case '(':
				312	lr->token.tok = tok_open_brace;
				313	return &lr->token;
				314
				315	case ')':
				316	lr->token.tok = tok_close_brace;
				317	return &lr->token;
				318
				319	case '"':
				320	return get_string (lr, charmap, locale, repertoire, verbose);
				321
				322	case '-':
				323	ch = lr_getc (lr);
				324	if (ch == '1')
				325	{
				326	lr->token.tok = tok_minus1;
				327	return &lr->token;
				328	}
				329	lr_ungetn (lr, 2);
				330	break;
				331	}
				332
				333	return get_ident (lr);
				334	}
				335
				336
				337	static struct token *
				338	get_toplvl_escape (struct linereader *lr)
				339	{
				340	/* This is supposed to be a numeric value. We return the
				341	numerical value and the number of bytes. */
				342	size_t start_idx = lr->idx - 1;
				343	unsigned char *bytes = lr->token.val.charcode.bytes;
				344	size_t nbytes = 0;
				345	int ch;
				346
				347	do
				348	{
				349	unsigned int byte = 0;
				350	unsigned int base = 8;
				351
				352	ch = lr_getc (lr);
				353
				354	if (ch == 'd')
				355	{
				356	base = 10;
				357	ch = lr_getc (lr);
				358	}
				359	else if (ch == 'x')
				360	{
				361	base = 16;
				362	ch = lr_getc (lr);
				363	}
				364
				365	if ((base == 16 && !isxdigit (ch))
				366	\|\| (base != 16 && (ch < '0' \|\| ch >= (int) ('0' + base))))
				367	{
				368	esc_error:
				369	lr->token.val.str.startmb = &lr->buf[start_idx];
				370
				371	while (ch != EOF && !isspace (ch))
				372	ch = lr_getc (lr);
				373	lr->token.val.str.lenmb = lr->idx - start_idx;
				374
				375	lr->token.tok = tok_error;
				376	return &lr->token;
				377	}
				378
				379	if (isdigit (ch))
				380	byte = ch - '0';
				381	else
				382	byte = tolower (ch) - 'a' + 10;
				383
				384	ch = lr_getc (lr);
				385	if ((base == 16 && !isxdigit (ch))
				386	\|\| (base != 16 && (ch < '0' \|\| ch >= (int) ('0' + base))))
				387	goto esc_error;
				388
				389	byte *= base;
				390	if (isdigit (ch))
				391	byte += ch - '0';
				392	else
				393	byte += tolower (ch) - 'a' + 10;
				394
				395	ch = lr_getc (lr);
				396	if (base != 16 && isdigit (ch))
				397	{
				398	byte *= base;
				399	byte += ch - '0';
				400
				401	ch = lr_getc (lr);
				402	}
				403
				404	bytes[nbytes++] = byte;
				405	}
				406	while (ch == lr->escape_char
				407	&& nbytes < (int) sizeof (lr->token.val.charcode.bytes));
				408
				409	if (!isspace (ch))
				410	lr_error (lr, _("garbage at end of character code specification"));
				411
				412	lr_ungetn (lr, 1);
				413
				414	lr->token.tok = tok_charcode;
				415	lr->token.val.charcode.nbytes = nbytes;
				416
				417	return &lr->token;
				418	}
				419
				420
				421	#define ADDC(ch) \
				422	do \
				423	{ \
				424	if (bufact == bufmax) \
				425	{ \
				426	bufmax *= 2; \
				427	buf = xrealloc (buf, bufmax); \
				428	} \
				429	buf[bufact++] = (ch); \
				430	} \
				431	while (0)
				432
				433
				434	#define ADDS(s, l) \
				435	do \
				436	{ \
				437	size_t _l = (l); \
				438	if (bufact + _l > bufmax) \
				439	{ \
				440	if (bufact < _l) \
				441	bufact = _l; \
				442	bufmax *= 2; \
				443	buf = xrealloc (buf, bufmax); \
				444	} \
				445	memcpy (&buf[bufact], s, _l); \
				446	bufact += _l; \
				447	} \
				448	while (0)
				449
				450
				451	#define ADDWC(ch) \
				452	do \
				453	{ \
				454	if (buf2act == buf2max) \
				455	{ \
				456	buf2max *= 2; \
				457	buf2 = xrealloc (buf2, buf2max * 4); \
				458	} \
				459	buf2[buf2act++] = (ch); \
				460	} \
				461	while (0)
				462
				463
				464	static struct token *
				465	get_symname (struct linereader *lr)
				466	{
				467	/* Symbol in brackets. We must distinguish three kinds:
				468	1. reserved words
				469	2. ISO 10646 position values
				470	3. all other. */
				471	char *buf;
				472	size_t bufact = 0;
				473	size_t bufmax = 56;
				474	const struct keyword_t *kw;
				475	int ch;
				476
				477	buf = (char *) xmalloc (bufmax);
				478
				479	do
				480	{
				481	ch = lr_getc (lr);
				482	if (ch == lr->escape_char)
				483	{
				484	int c2 = lr_getc (lr);
				485	ADDC (c2);
				486
				487	if (c2 == '\n')
				488	ch = '\n';
				489	}
				490	else
				491	ADDC (ch);
				492	}
				493	while (ch != '>' && ch != '\n');
				494
				495	if (ch == '\n')
				496	lr_error (lr, _("unterminated symbolic name"));
				497
				498	/* Test for ISO 10646 position value. */
				499	if (buf[0] == 'U' && (bufact == 6 \|\| bufact == 10))
				500	{
				501	char *cp = buf + 1;
				502	while (cp < &buf[bufact - 1] && isxdigit (*cp))
				503	++cp;
				504
				505	if (cp == &buf[bufact - 1])
				506	{
				507	/* Yes, it is. */
				508	lr->token.tok = tok_ucs4;
				509	lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16);
				510
				511	return &lr->token;
				512	}
				513	}
				514
				515	/* It is a symbolic name. Test for reserved words. */
				516	kw = lr->hash_fct (buf, bufact - 1);
				517
				518	if (kw != NULL && kw->symname_or_ident == 1)
				519	{
				520	lr->token.tok = kw->token;
				521	free (buf);
				522	}
				523	else
				524	{
				525	lr->token.tok = tok_bsymbol;
				526
				527	buf = xrealloc (buf, bufact + 1);
				528	buf[bufact] = '\0';
				529
				530	lr->token.val.str.startmb = buf;
				531	lr->token.val.str.lenmb = bufact - 1;
				532	}
				533
				534	return &lr->token;
				535	}
				536
				537
				538	static struct token *
				539	get_ident (struct linereader *lr)
				540	{
				541	char *buf;
				542	size_t bufact;
				543	size_t bufmax = 56;
				544	const struct keyword_t *kw;
				545	int ch;
				546
				547	buf = xmalloc (bufmax);
				548	bufact = 0;
				549
				550	ADDC (lr->buf[lr->idx - 1]);
				551
				552	while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
				553	&& ch != '<' && ch != ',' && ch != EOF)
				554	{
				555	if (ch == lr->escape_char)
				556	{
				557	ch = lr_getc (lr);
				558	if (ch == '\n' \|\| ch == EOF)
				559	{
				560	lr_error (lr, _("invalid escape sequence"));
				561	break;
				562	}
				563	}
				564	ADDC (ch);
				565	}
				566
				567	lr_ungetc (lr, ch);
				568
				569	kw = lr->hash_fct (buf, bufact);
				570
				571	if (kw != NULL && kw->symname_or_ident == 0)
				572	{
				573	lr->token.tok = kw->token;
				574	free (buf);
				575	}
				576	else
				577	{
				578	lr->token.tok = tok_ident;
				579
				580	buf = xrealloc (buf, bufact + 1);
				581	buf[bufact] = '\0';
				582
				583	lr->token.val.str.startmb = buf;
				584	lr->token.val.str.lenmb = bufact;
				585	}
				586
				587	return &lr->token;
				588	}
				589
				590
				591	static struct token *
				592	get_string (struct linereader lr, const struct charmap_t charmap,
				593	struct localedef_t locale, const struct repertoire_t repertoire,
				594	int verbose)
				595	{
				596	int return_widestr = lr->return_widestr;
				597	char *buf;
				598	wchar_t *buf2 = NULL;
				599	size_t bufact;
				600	size_t bufmax = 56;
				601
				602	/* We must return two different strings. */
				603	buf = xmalloc (bufmax);
				604	bufact = 0;
				605
				606	/* We know it'll be a string. */
				607	lr->token.tok = tok_string;
				608
				609	/* If we need not translate the strings (i.e., expand <...> parts)
				610	we can run a simple loop. */
				611	if (!lr->translate_strings)
				612	{
				613	int ch;
				614
				615	buf2 = NULL;
				616	while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
				617	ADDC (ch);
				618
				619	/* Catch errors with trailing escape character. */
				620	if (bufact > 0 && buf[bufact - 1] == lr->escape_char
				621	&& (bufact == 1 \|\| buf[bufact - 2] != lr->escape_char))
				622	{
				623	lr_error (lr, _("illegal escape sequence at end of string"));
				624	--bufact;
				625	}
				626	else if (ch == '\n' \|\| ch == EOF)
				627	lr_error (lr, _("unterminated string"));
				628
				629	ADDC ('\0');
				630	}
				631	else
				632	{
				633	int illegal_string = 0;
				634	size_t buf2act = 0;
				635	size_t buf2max = 56 * sizeof (uint32_t);
				636	int ch;
				637	int warned = 0;
				638
				639	/* We have to provide the wide character result as well. */
				640	if (return_widestr)
				641	buf2 = xmalloc (buf2max);
				642
				643	/* Read until the end of the string (or end of the line or file). */
				644	while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
				645	{
				646	size_t startidx;
				647	uint32_t wch;
				648	struct charseq *seq;
				649
				650	if (ch != '<')
				651	{
				652	/* The standards leave it up to the implementation to decide
				653	what to do with character which stand for themself. We
				654	could jump through hoops to find out the value relative to
				655	the charmap and the repertoire map, but instead we leave
				656	it up to the locale definition author to write a better
				657	definition. We assume here that every character which
				658	stands for itself is encoded using ISO 8859-1. Using the
				659	escape character is allowed. */
				660	if (ch == lr->escape_char)
				661	{
				662	ch = lr_getc (lr);
				663	if (ch == '\n' \|\| ch == EOF)
				664	break;
				665	}
				666
				667	if (verbose && !warned)
				668	{
				669	lr_error (lr, _("\
				670	non-symbolic character value should not be used"));
				671	warned = 1;
				672	}
				673
				674	ADDC (ch);
				675	if (return_widestr)
				676	ADDWC ((uint32_t) ch);
				677
				678	continue;
				679	}
				680
				681	/* Now we have to search for the end of the symbolic name, i.e.,
				682	the closing '>'. */
				683	startidx = bufact;
				684	while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
				685	{
				686	if (ch == lr->escape_char)
				687	{
				688	ch = lr_getc (lr);
				689	if (ch == '\n' \|\| ch == EOF)
				690	break;
				691	}
				692	ADDC (ch);
				693	}
				694	if (ch == '\n' \|\| ch == EOF)
				695	/* Not a correct string. */
				696	break;
				697	if (bufact == startidx)
				698	{
				699	/* <> is no correct name. Ignore it and also signal an
				700	error. */
				701	illegal_string = 1;
				702	continue;
				703	}
				704
				705	/* It might be a Uxxxx symbol. */
				706	if (buf[startidx] == 'U'
				707	&& (bufact - startidx == 5 \|\| bufact - startidx == 9))
				708	{
				709	char *cp = buf + startidx + 1;
				710	while (cp < &buf[bufact] && isxdigit (*cp))
				711	++cp;
				712
				713	if (cp == &buf[bufact])
				714	{
				715	char utmp[10];
				716
				717	/* Yes, it is. */
				718	ADDC ('\0');
				719	wch = strtoul (buf + startidx + 1, NULL, 16);
				720
				721	/* Now forget about the name we just added. */
				722	bufact = startidx;
				723
				724	if (return_widestr)
				725	ADDWC (wch);
				726
				727	/* See whether the charmap contains the Uxxxxxxxx names. */
				728	snprintf (utmp, sizeof (utmp), "U%08X", wch);
				729	seq = charmap_find_value (charmap, utmp, 9);
				730
				731	if (seq == NULL)
				732	{
				733	/* No, this isn't the case. Now determine from
				734	the repertoire the name of the character and
				735	find it in the charmap. */
				736	if (repertoire != NULL)
				737	{
				738	const char *symbol;
				739
				740	symbol = repertoire_find_symbol (repertoire, wch);
				741
				742	if (symbol != NULL)
				743	seq = charmap_find_value (charmap, symbol,
				744	strlen (symbol));
				745	}
				746
				747	if (seq == NULL)
				748	{
				749	#ifndef NO_TRANSLITERATION
				750	/* Transliterate if possible. */
				751	if (locale != NULL)
				752	{
				753	uint32_t *translit;
				754
				755	if ((locale->avail & CTYPE_LOCALE) == 0)
				756	{
				757	/* Load the CTYPE data now. */
				758	int old_needed = locale->needed;
				759
				760	locale->needed = 0;
				761	locale = load_locale (LC_CTYPE,
				762	locale->name,
				763	locale->repertoire_name,
				764	charmap, locale);
				765	locale->needed = old_needed;
				766	}
				767
				768	if ((locale->avail & CTYPE_LOCALE) != 0
				769	&& ((translit = find_translit (locale,
				770	charmap, wch))
				771	!= NULL))
				772	/* The CTYPE data contains a matching
				773	transliteration. */
				774	{
				775	int i;
				776
				777	for (i = 0; translit[i] != 0; ++i)
				778	{
				779	char utmp[10];
				780
				781	snprintf (utmp, sizeof (utmp), "U%08X",
				782	translit[i]);
				783	seq = charmap_find_value (charmap, utmp,
				784	9);
				785	assert (seq != NULL);
				786	ADDS (seq->bytes, seq->nbytes);
				787	}
				788
				789	continue;
				790	}
				791	}
				792	#endif /* NO_TRANSLITERATION */
				793
				794	/* Not a known name. */
				795	illegal_string = 1;
				796	}
				797	}
				798
				799	if (seq != NULL)
				800	ADDS (seq->bytes, seq->nbytes);
				801
				802	continue;
				803	}
				804	}
				805
				806	/* We now have the symbolic name in buf[startidx] to
				807	buf[bufact-1]. Now find out the value for this character
				808	in the charmap as well as in the repertoire map (in this
				809	order). */
				810	seq = charmap_find_value (charmap, &buf[startidx],
				811	bufact - startidx);
				812
				813	if (seq == NULL)
				814	{
				815	/* This name is not in the charmap. */
				816	lr_error (lr, _("symbol `%.*s' not in charmap"),
				817	(int) (bufact - startidx), &buf[startidx]);
				818	illegal_string = 1;
				819	}
				820
				821	if (return_widestr)
				822	{
				823	/* Now the same for the multibyte representation. */
				824	if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
				825	wch = seq->ucs4;
				826	else
				827	{
				828	wch = repertoire_find_value (repertoire, &buf[startidx],
				829	bufact - startidx);
				830	if (seq != NULL)
				831	seq->ucs4 = wch;
				832	}
				833
				834	if (wch == ILLEGAL_CHAR_VALUE)
				835	{
				836	/* This name is not in the repertoire map. */
				837	lr_error (lr, _("symbol `%.*s' not in repertoire map"),
				838	(int) (bufact - startidx), &buf[startidx]);
				839	illegal_string = 1;
				840	}
				841	else
				842	ADDWC (wch);
				843	}
				844
				845	/* Now forget about the name we just added. */
				846	bufact = startidx;
				847
				848	/* And copy the bytes. */
				849	if (seq != NULL)
				850	ADDS (seq->bytes, seq->nbytes);
				851	}
				852
				853	if (ch == '\n' \|\| ch == EOF)
				854	{
				855	lr_error (lr, _("unterminated string"));
				856	illegal_string = 1;
				857	}
				858
				859	if (illegal_string)
				860	{
				861	free (buf);
				862	free (buf2);
				863	lr->token.val.str.startmb = NULL;
				864	lr->token.val.str.lenmb = 0;
				865	lr->token.val.str.startwc = NULL;
				866	lr->token.val.str.lenwc = 0;
				867
				868	return &lr->token;
				869	}
				870
				871	ADDC ('\0');
				872
				873	if (return_widestr)
				874	{
				875	ADDWC (0);
				876	lr->token.val.str.startwc = xrealloc (buf2,
				877	buf2act * sizeof (uint32_t));
				878	lr->token.val.str.lenwc = buf2act;
				879	}
				880	}
				881
				882	lr->token.val.str.startmb = xrealloc (buf, bufact);
				883	lr->token.val.str.lenmb = bufact;
				884
				885	return &lr->token;
				886	}