| /* Copyright (C) 1996-2016 Free Software Foundation, Inc. | 
 |    This file is part of the GNU C Library. | 
 |    Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. | 
 |  | 
 |    This program is free software; you can redistribute it and/or modify | 
 |    it under the terms of the GNU General Public License as published | 
 |    by the Free Software Foundation; version 2 of the License, or | 
 |    (at your option) any later version. | 
 |  | 
 |    This program is distributed in the hope that it will be useful, | 
 |    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
 |    GNU General Public License for more details. | 
 |  | 
 |    You should have received a copy of the GNU General Public License | 
 |    along with this program; if not, see <http://www.gnu.org/licenses/>.  */ | 
 |  | 
 | #ifdef HAVE_CONFIG_H | 
 | # include <config.h> | 
 | #endif | 
 |  | 
 | #include <assert.h> | 
 | #include <ctype.h> | 
 | #include <errno.h> | 
 | #include <libintl.h> | 
 | #include <stdarg.h> | 
 | #include <stdlib.h> | 
 | #include <string.h> | 
 | #include <stdint.h> | 
 |  | 
 | #include "localedef.h" | 
 | #include "charmap.h" | 
 | #include "error.h" | 
 | #include "linereader.h" | 
 | #include "locfile.h" | 
 |  | 
 | /* Prototypes for local functions.  */ | 
 | static struct token *get_toplvl_escape (struct linereader *lr); | 
 | static struct token *get_symname (struct linereader *lr); | 
 | static struct token *get_ident (struct linereader *lr); | 
 | static struct token *get_string (struct linereader *lr, | 
 | 				 const struct charmap_t *charmap, | 
 | 				 struct localedef_t *locale, | 
 | 				 const struct repertoire_t *repertoire, | 
 | 				 int verbose); | 
 |  | 
 |  | 
 | struct linereader * | 
 | lr_open (const char *fname, kw_hash_fct_t hf) | 
 | { | 
 |   FILE *fp; | 
 |  | 
 |   if (fname == NULL || strcmp (fname, "-") == 0 | 
 |       || strcmp (fname, "/dev/stdin") == 0) | 
 |     return lr_create (stdin, "<stdin>", hf); | 
 |   else | 
 |     { | 
 |       fp = fopen (fname, "rm"); | 
 |       if (fp == NULL) | 
 | 	return NULL; | 
 |       return lr_create (fp, fname, hf); | 
 |     } | 
 | } | 
 |  | 
 | struct linereader * | 
 | lr_create (FILE *fp, const char *fname, kw_hash_fct_t hf) | 
 | { | 
 |   struct linereader *result; | 
 |   int n; | 
 |  | 
 |   result = (struct linereader *) xmalloc (sizeof (*result)); | 
 |  | 
 |   result->fp = fp; | 
 |   result->fname = xstrdup (fname); | 
 |   result->buf = NULL; | 
 |   result->bufsize = 0; | 
 |   result->lineno = 1; | 
 |   result->idx = 0; | 
 |   result->comment_char = '#'; | 
 |   result->escape_char = '\\'; | 
 |   result->translate_strings = 1; | 
 |   result->return_widestr = 0; | 
 |  | 
 |   n = getdelim (&result->buf, &result->bufsize, '\n', result->fp); | 
 |   if (n < 0) | 
 |     { | 
 |       int save = errno; | 
 |       fclose (result->fp); | 
 |       free ((char *) result->fname); | 
 |       free (result); | 
 |       errno = save; | 
 |       return NULL; | 
 |     } | 
 |  | 
 |   if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n') | 
 |     n -= 2; | 
 |  | 
 |   result->buf[n] = '\0'; | 
 |   result->bufact = n; | 
 |   result->hash_fct = hf; | 
 |  | 
 |   return result; | 
 | } | 
 |  | 
 |  | 
 | int | 
 | lr_eof (struct linereader *lr) | 
 | { | 
 |   return lr->bufact = 0; | 
 | } | 
 |  | 
 |  | 
 | void | 
 | lr_ignore_rest (struct linereader *lr, int verbose) | 
 | { | 
 |   if (verbose) | 
 |     { | 
 |       while (isspace (lr->buf[lr->idx]) && lr->buf[lr->idx] != '\n' | 
 | 	     && lr->buf[lr->idx] != lr->comment_char) | 
 | 	if (lr->buf[lr->idx] == '\0') | 
 | 	  { | 
 | 	    if (lr_next (lr) < 0) | 
 | 	      return; | 
 | 	  } | 
 | 	else | 
 | 	  ++lr->idx; | 
 |  | 
 |       if (lr->buf[lr->idx] != '\n' && ! feof (lr->fp) | 
 | 	  && lr->buf[lr->idx] != lr->comment_char) | 
 | 	lr_error (lr, _("trailing garbage at end of line")); | 
 |     } | 
 |  | 
 |   /* Ignore continued line.  */ | 
 |   while (lr->bufact > 0 && lr->buf[lr->bufact - 1] != '\n') | 
 |     if (lr_next (lr) < 0) | 
 |       break; | 
 |  | 
 |   lr->idx = lr->bufact; | 
 | } | 
 |  | 
 |  | 
 | void | 
 | lr_close (struct linereader *lr) | 
 | { | 
 |   fclose (lr->fp); | 
 |   free (lr->buf); | 
 |   free (lr); | 
 | } | 
 |  | 
 |  | 
 | int | 
 | lr_next (struct linereader *lr) | 
 | { | 
 |   int n; | 
 |  | 
 |   n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp); | 
 |   if (n < 0) | 
 |     return -1; | 
 |  | 
 |   ++lr->lineno; | 
 |  | 
 |   if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n') | 
 |     { | 
 | #if 0 | 
 |       /* XXX Is this correct?  */ | 
 |       /* An escaped newline character is substituted with a single <SP>.  */ | 
 |       --n; | 
 |       lr->buf[n - 1] = ' '; | 
 | #else | 
 |       n -= 2; | 
 | #endif | 
 |     } | 
 |  | 
 |   lr->buf[n] = '\0'; | 
 |   lr->bufact = n; | 
 |   lr->idx = 0; | 
 |  | 
 |   return 0; | 
 | } | 
 |  | 
 |  | 
 | /* Defined in error.c.  */ | 
 | /* This variable is incremented each time `error' is called.  */ | 
 | extern unsigned int error_message_count; | 
 |  | 
 | /* The calling program should define program_name and set it to the | 
 |    name of the executing program.  */ | 
 | extern char *program_name; | 
 |  | 
 |  | 
 | struct token * | 
 | lr_token (struct linereader *lr, const struct charmap_t *charmap, | 
 | 	  struct localedef_t *locale, const struct repertoire_t *repertoire, | 
 | 	  int verbose) | 
 | { | 
 |   int ch; | 
 |  | 
 |   while (1) | 
 |     { | 
 |       do | 
 | 	{ | 
 | 	  ch = lr_getc (lr); | 
 |  | 
 | 	  if (ch == EOF) | 
 | 	    { | 
 | 	      lr->token.tok = tok_eof; | 
 | 	      return &lr->token; | 
 | 	    }; | 
 |  | 
 | 	  if (ch == '\n') | 
 | 	    { | 
 | 	      lr->token.tok = tok_eol; | 
 | 	      return &lr->token; | 
 | 	    } | 
 | 	} | 
 |       while (isspace (ch)); | 
 |  | 
 |       if (ch != lr->comment_char) | 
 | 	break; | 
 |  | 
 |       /* Is there an newline at the end of the buffer?  */ | 
 |       if (lr->buf[lr->bufact - 1] != '\n') | 
 | 	{ | 
 | 	  /* No.  Some people want this to mean that only the line in | 
 | 	     the file not the logical, concatenated line is ignored. | 
 | 	     Let's try this.  */ | 
 | 	  lr->idx = lr->bufact; | 
 | 	  continue; | 
 | 	} | 
 |  | 
 |       /* Ignore rest of line.  */ | 
 |       lr_ignore_rest (lr, 0); | 
 |       lr->token.tok = tok_eol; | 
 |       return &lr->token; | 
 |     } | 
 |  | 
 |   /* Match escape sequences.  */ | 
 |   if (ch == lr->escape_char) | 
 |     return get_toplvl_escape (lr); | 
 |  | 
 |   /* Match ellipsis.  */ | 
 |   if (ch == '.') | 
 |     { | 
 |       if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0) | 
 | 	{ | 
 | 	  int cnt; | 
 | 	  for (cnt = 0; cnt < 10; ++cnt) | 
 | 	    lr_getc (lr); | 
 | 	  lr->token.tok = tok_ellipsis4_2; | 
 | 	  return &lr->token; | 
 | 	} | 
 |       if (strncmp (&lr->buf[lr->idx], "...", 3) == 0) | 
 | 	{ | 
 | 	  lr_getc (lr); | 
 | 	  lr_getc (lr); | 
 | 	  lr_getc (lr); | 
 | 	  lr->token.tok = tok_ellipsis4; | 
 | 	  return &lr->token; | 
 | 	} | 
 |       if (strncmp (&lr->buf[lr->idx], "..", 2) == 0) | 
 | 	{ | 
 | 	  lr_getc (lr); | 
 | 	  lr_getc (lr); | 
 | 	  lr->token.tok = tok_ellipsis3; | 
 | 	  return &lr->token; | 
 | 	} | 
 |       if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0) | 
 | 	{ | 
 | 	  int cnt; | 
 | 	  for (cnt = 0; cnt < 6; ++cnt) | 
 | 	    lr_getc (lr); | 
 | 	  lr->token.tok = tok_ellipsis2_2; | 
 | 	  return &lr->token; | 
 | 	} | 
 |       if (lr->buf[lr->idx] == '.') | 
 | 	{ | 
 | 	  lr_getc (lr); | 
 | 	  lr->token.tok = tok_ellipsis2; | 
 | 	  return &lr->token; | 
 | 	} | 
 |     } | 
 |  | 
 |   switch (ch) | 
 |     { | 
 |     case '<': | 
 |       return get_symname (lr); | 
 |  | 
 |     case '0' ... '9': | 
 |       lr->token.tok = tok_number; | 
 |       lr->token.val.num = ch - '0'; | 
 |  | 
 |       while (isdigit (ch = lr_getc (lr))) | 
 | 	{ | 
 | 	  lr->token.val.num *= 10; | 
 | 	  lr->token.val.num += ch - '0'; | 
 | 	} | 
 |       if (isalpha (ch)) | 
 | 	lr_error (lr, _("garbage at end of number")); | 
 |       lr_ungetn (lr, 1); | 
 |  | 
 |       return &lr->token; | 
 |  | 
 |     case ';': | 
 |       lr->token.tok = tok_semicolon; | 
 |       return &lr->token; | 
 |  | 
 |     case ',': | 
 |       lr->token.tok = tok_comma; | 
 |       return &lr->token; | 
 |  | 
 |     case '(': | 
 |       lr->token.tok = tok_open_brace; | 
 |       return &lr->token; | 
 |  | 
 |     case ')': | 
 |       lr->token.tok = tok_close_brace; | 
 |       return &lr->token; | 
 |  | 
 |     case '"': | 
 |       return get_string (lr, charmap, locale, repertoire, verbose); | 
 |  | 
 |     case '-': | 
 |       ch = lr_getc (lr); | 
 |       if (ch == '1') | 
 | 	{ | 
 | 	  lr->token.tok = tok_minus1; | 
 | 	  return &lr->token; | 
 | 	} | 
 |       lr_ungetn (lr, 2); | 
 |       break; | 
 |     } | 
 |  | 
 |   return get_ident (lr); | 
 | } | 
 |  | 
 |  | 
 | static struct token * | 
 | get_toplvl_escape (struct linereader *lr) | 
 | { | 
 |   /* This is supposed to be a numeric value.  We return the | 
 |      numerical value and the number of bytes.  */ | 
 |   size_t start_idx = lr->idx - 1; | 
 |   unsigned char *bytes = lr->token.val.charcode.bytes; | 
 |   size_t nbytes = 0; | 
 |   int ch; | 
 |  | 
 |   do | 
 |     { | 
 |       unsigned int byte = 0; | 
 |       unsigned int base = 8; | 
 |  | 
 |       ch = lr_getc (lr); | 
 |  | 
 |       if (ch == 'd') | 
 | 	{ | 
 | 	  base = 10; | 
 | 	  ch = lr_getc (lr); | 
 | 	} | 
 |       else if (ch == 'x') | 
 | 	{ | 
 | 	  base = 16; | 
 | 	  ch = lr_getc (lr); | 
 | 	} | 
 |  | 
 |       if ((base == 16 && !isxdigit (ch)) | 
 | 	  || (base != 16 && (ch < '0' || ch >= (int) ('0' + base)))) | 
 | 	{ | 
 | 	esc_error: | 
 | 	  lr->token.val.str.startmb = &lr->buf[start_idx]; | 
 |  | 
 | 	  while (ch != EOF && !isspace (ch)) | 
 | 	    ch = lr_getc (lr); | 
 | 	  lr->token.val.str.lenmb = lr->idx - start_idx; | 
 |  | 
 | 	  lr->token.tok = tok_error; | 
 | 	  return &lr->token; | 
 | 	} | 
 |  | 
 |       if (isdigit (ch)) | 
 | 	byte = ch - '0'; | 
 |       else | 
 | 	byte = tolower (ch) - 'a' + 10; | 
 |  | 
 |       ch = lr_getc (lr); | 
 |       if ((base == 16 && !isxdigit (ch)) | 
 | 	  || (base != 16 && (ch < '0' || ch >= (int) ('0' + base)))) | 
 | 	goto esc_error; | 
 |  | 
 |       byte *= base; | 
 |       if (isdigit (ch)) | 
 | 	byte += ch - '0'; | 
 |       else | 
 | 	byte += tolower (ch) - 'a' + 10; | 
 |  | 
 |       ch = lr_getc (lr); | 
 |       if (base != 16 && isdigit (ch)) | 
 | 	{ | 
 | 	  byte *= base; | 
 | 	  byte += ch - '0'; | 
 |  | 
 | 	  ch = lr_getc (lr); | 
 | 	} | 
 |  | 
 |       bytes[nbytes++] = byte; | 
 |     } | 
 |   while (ch == lr->escape_char | 
 | 	 && nbytes < (int) sizeof (lr->token.val.charcode.bytes)); | 
 |  | 
 |   if (!isspace (ch)) | 
 |     lr_error (lr, _("garbage at end of character code specification")); | 
 |  | 
 |   lr_ungetn (lr, 1); | 
 |  | 
 |   lr->token.tok = tok_charcode; | 
 |   lr->token.val.charcode.nbytes = nbytes; | 
 |  | 
 |   return &lr->token; | 
 | } | 
 |  | 
 |  | 
 | #define ADDC(ch) \ | 
 |   do									      \ | 
 |     {									      \ | 
 |       if (bufact == bufmax)						      \ | 
 | 	{								      \ | 
 | 	  bufmax *= 2;							      \ | 
 | 	  buf = xrealloc (buf, bufmax);					      \ | 
 | 	}								      \ | 
 |       buf[bufact++] = (ch);						      \ | 
 |     }									      \ | 
 |   while (0) | 
 |  | 
 |  | 
 | #define ADDS(s, l) \ | 
 |   do									      \ | 
 |     {									      \ | 
 |       size_t _l = (l);							      \ | 
 |       if (bufact + _l > bufmax)						      \ | 
 | 	{								      \ | 
 | 	  if (bufact < _l)						      \ | 
 | 	    bufact = _l;						      \ | 
 | 	  bufmax *= 2;							      \ | 
 | 	  buf = xrealloc (buf, bufmax);					      \ | 
 | 	}								      \ | 
 |       memcpy (&buf[bufact], s, _l);					      \ | 
 |       bufact += _l;							      \ | 
 |     }									      \ | 
 |   while (0) | 
 |  | 
 |  | 
 | #define ADDWC(ch) \ | 
 |   do									      \ | 
 |     {									      \ | 
 |       if (buf2act == buf2max)						      \ | 
 | 	{								      \ | 
 | 	  buf2max *= 2;							      \ | 
 | 	  buf2 = xrealloc (buf2, buf2max * 4);				      \ | 
 | 	}								      \ | 
 |       buf2[buf2act++] = (ch);						      \ | 
 |     }									      \ | 
 |   while (0) | 
 |  | 
 |  | 
 | static struct token * | 
 | get_symname (struct linereader *lr) | 
 | { | 
 |   /* Symbol in brackets.  We must distinguish three kinds: | 
 |      1. reserved words | 
 |      2. ISO 10646 position values | 
 |      3. all other.  */ | 
 |   char *buf; | 
 |   size_t bufact = 0; | 
 |   size_t bufmax = 56; | 
 |   const struct keyword_t *kw; | 
 |   int ch; | 
 |  | 
 |   buf = (char *) xmalloc (bufmax); | 
 |  | 
 |   do | 
 |     { | 
 |       ch = lr_getc (lr); | 
 |       if (ch == lr->escape_char) | 
 | 	{ | 
 | 	  int c2 = lr_getc (lr); | 
 | 	  ADDC (c2); | 
 |  | 
 | 	  if (c2 == '\n') | 
 | 	    ch = '\n'; | 
 | 	} | 
 |       else | 
 | 	ADDC (ch); | 
 |     } | 
 |   while (ch != '>' && ch != '\n'); | 
 |  | 
 |   if (ch == '\n') | 
 |     lr_error (lr, _("unterminated symbolic name")); | 
 |  | 
 |   /* Test for ISO 10646 position value.  */ | 
 |   if (buf[0] == 'U' && (bufact == 6 || bufact == 10)) | 
 |     { | 
 |       char *cp = buf + 1; | 
 |       while (cp < &buf[bufact - 1] && isxdigit (*cp)) | 
 | 	++cp; | 
 |  | 
 |       if (cp == &buf[bufact - 1]) | 
 | 	{ | 
 | 	  /* Yes, it is.  */ | 
 | 	  lr->token.tok = tok_ucs4; | 
 | 	  lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16); | 
 |  | 
 | 	  return &lr->token; | 
 | 	} | 
 |     } | 
 |  | 
 |   /* It is a symbolic name.  Test for reserved words.  */ | 
 |   kw = lr->hash_fct (buf, bufact - 1); | 
 |  | 
 |   if (kw != NULL && kw->symname_or_ident == 1) | 
 |     { | 
 |       lr->token.tok = kw->token; | 
 |       free (buf); | 
 |     } | 
 |   else | 
 |     { | 
 |       lr->token.tok = tok_bsymbol; | 
 |  | 
 |       buf = xrealloc (buf, bufact + 1); | 
 |       buf[bufact] = '\0'; | 
 |  | 
 |       lr->token.val.str.startmb = buf; | 
 |       lr->token.val.str.lenmb = bufact - 1; | 
 |     } | 
 |  | 
 |   return &lr->token; | 
 | } | 
 |  | 
 |  | 
 | static struct token * | 
 | get_ident (struct linereader *lr) | 
 | { | 
 |   char *buf; | 
 |   size_t bufact; | 
 |   size_t bufmax = 56; | 
 |   const struct keyword_t *kw; | 
 |   int ch; | 
 |  | 
 |   buf = xmalloc (bufmax); | 
 |   bufact = 0; | 
 |  | 
 |   ADDC (lr->buf[lr->idx - 1]); | 
 |  | 
 |   while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';' | 
 | 	 && ch != '<' && ch != ',' && ch != EOF) | 
 |     { | 
 |       if (ch == lr->escape_char) | 
 | 	{ | 
 | 	  ch = lr_getc (lr); | 
 | 	  if (ch == '\n' || ch == EOF) | 
 | 	    { | 
 | 	      lr_error (lr, _("invalid escape sequence")); | 
 | 	      break; | 
 | 	    } | 
 | 	} | 
 |       ADDC (ch); | 
 |     } | 
 |  | 
 |   lr_ungetc (lr, ch); | 
 |  | 
 |   kw = lr->hash_fct (buf, bufact); | 
 |  | 
 |   if (kw != NULL && kw->symname_or_ident == 0) | 
 |     { | 
 |       lr->token.tok = kw->token; | 
 |       free (buf); | 
 |     } | 
 |   else | 
 |     { | 
 |       lr->token.tok = tok_ident; | 
 |  | 
 |       buf = xrealloc (buf, bufact + 1); | 
 |       buf[bufact] = '\0'; | 
 |  | 
 |       lr->token.val.str.startmb = buf; | 
 |       lr->token.val.str.lenmb = bufact; | 
 |     } | 
 |  | 
 |   return &lr->token; | 
 | } | 
 |  | 
 |  | 
 | static struct token * | 
 | get_string (struct linereader *lr, const struct charmap_t *charmap, | 
 | 	    struct localedef_t *locale, const struct repertoire_t *repertoire, | 
 | 	    int verbose) | 
 | { | 
 |   int return_widestr = lr->return_widestr; | 
 |   char *buf; | 
 |   wchar_t *buf2 = NULL; | 
 |   size_t bufact; | 
 |   size_t bufmax = 56; | 
 |  | 
 |   /* We must return two different strings.  */ | 
 |   buf = xmalloc (bufmax); | 
 |   bufact = 0; | 
 |  | 
 |   /* We know it'll be a string.  */ | 
 |   lr->token.tok = tok_string; | 
 |  | 
 |   /* If we need not translate the strings (i.e., expand <...> parts) | 
 |      we can run a simple loop.  */ | 
 |   if (!lr->translate_strings) | 
 |     { | 
 |       int ch; | 
 |  | 
 |       buf2 = NULL; | 
 |       while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF) | 
 | 	ADDC (ch); | 
 |  | 
 |       /* Catch errors with trailing escape character.  */ | 
 |       if (bufact > 0 && buf[bufact - 1] == lr->escape_char | 
 | 	  && (bufact == 1 || buf[bufact - 2] != lr->escape_char)) | 
 | 	{ | 
 | 	  lr_error (lr, _("illegal escape sequence at end of string")); | 
 | 	  --bufact; | 
 | 	} | 
 |       else if (ch == '\n' || ch == EOF) | 
 | 	lr_error (lr, _("unterminated string")); | 
 |  | 
 |       ADDC ('\0'); | 
 |     } | 
 |   else | 
 |     { | 
 |       int illegal_string = 0; | 
 |       size_t buf2act = 0; | 
 |       size_t buf2max = 56 * sizeof (uint32_t); | 
 |       int ch; | 
 |       int warned = 0; | 
 |  | 
 |       /* We have to provide the wide character result as well.  */ | 
 |       if (return_widestr) | 
 | 	buf2 = xmalloc (buf2max); | 
 |  | 
 |       /* Read until the end of the string (or end of the line or file).  */ | 
 |       while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF) | 
 | 	{ | 
 | 	  size_t startidx; | 
 | 	  uint32_t wch; | 
 | 	  struct charseq *seq; | 
 |  | 
 | 	  if (ch != '<') | 
 | 	    { | 
 | 	      /* The standards leave it up to the implementation to decide | 
 | 		 what to do with character which stand for themself.  We | 
 | 		 could jump through hoops to find out the value relative to | 
 | 		 the charmap and the repertoire map, but instead we leave | 
 | 		 it up to the locale definition author to write a better | 
 | 		 definition.  We assume here that every character which | 
 | 		 stands for itself is encoded using ISO 8859-1.  Using the | 
 | 		 escape character is allowed.  */ | 
 | 	      if (ch == lr->escape_char) | 
 | 		{ | 
 | 		  ch = lr_getc (lr); | 
 | 		  if (ch == '\n' || ch == EOF) | 
 | 		    break; | 
 | 		} | 
 |  | 
 | 	      if (verbose && !warned) | 
 | 		{ | 
 | 		  lr_error (lr, _("\ | 
 | non-symbolic character value should not be used")); | 
 | 		  warned = 1; | 
 | 		} | 
 |  | 
 | 	      ADDC (ch); | 
 | 	      if (return_widestr) | 
 | 		ADDWC ((uint32_t) ch); | 
 |  | 
 | 	      continue; | 
 | 	    } | 
 |  | 
 | 	  /* Now we have to search for the end of the symbolic name, i.e., | 
 | 	     the closing '>'.  */ | 
 | 	  startidx = bufact; | 
 | 	  while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF) | 
 | 	    { | 
 | 	      if (ch == lr->escape_char) | 
 | 		{ | 
 | 		  ch = lr_getc (lr); | 
 | 		  if (ch == '\n' || ch == EOF) | 
 | 		    break; | 
 | 		} | 
 | 	      ADDC (ch); | 
 | 	    } | 
 | 	  if (ch == '\n' || ch == EOF) | 
 | 	    /* Not a correct string.  */ | 
 | 	    break; | 
 | 	  if (bufact == startidx) | 
 | 	    { | 
 | 	      /* <> is no correct name.  Ignore it and also signal an | 
 | 		 error.  */ | 
 | 	      illegal_string = 1; | 
 | 	      continue; | 
 | 	    } | 
 |  | 
 | 	  /* It might be a Uxxxx symbol.  */ | 
 | 	  if (buf[startidx] == 'U' | 
 | 	      && (bufact - startidx == 5 || bufact - startidx == 9)) | 
 | 	    { | 
 | 	      char *cp = buf + startidx + 1; | 
 | 	      while (cp < &buf[bufact] && isxdigit (*cp)) | 
 | 		++cp; | 
 |  | 
 | 	      if (cp == &buf[bufact]) | 
 | 		{ | 
 | 		  char utmp[10]; | 
 |  | 
 | 		  /* Yes, it is.  */ | 
 | 		  ADDC ('\0'); | 
 | 		  wch = strtoul (buf + startidx + 1, NULL, 16); | 
 |  | 
 | 		  /* Now forget about the name we just added.  */ | 
 | 		  bufact = startidx; | 
 |  | 
 | 		  if (return_widestr) | 
 | 		    ADDWC (wch); | 
 |  | 
 | 		  /* See whether the charmap contains the Uxxxxxxxx names.  */ | 
 | 		  snprintf (utmp, sizeof (utmp), "U%08X", wch); | 
 | 		  seq = charmap_find_value (charmap, utmp, 9); | 
 |  | 
 | 		  if (seq == NULL) | 
 | 		    { | 
 | 		     /* No, this isn't the case.  Now determine from | 
 | 			the repertoire the name of the character and | 
 | 			find it in the charmap.  */ | 
 | 		      if (repertoire != NULL) | 
 | 			{ | 
 | 			  const char *symbol; | 
 |  | 
 | 			  symbol = repertoire_find_symbol (repertoire, wch); | 
 |  | 
 | 			  if (symbol != NULL) | 
 | 			    seq = charmap_find_value (charmap, symbol, | 
 | 						      strlen (symbol)); | 
 | 			} | 
 |  | 
 | 		      if (seq == NULL) | 
 | 			{ | 
 | #ifndef NO_TRANSLITERATION | 
 | 			  /* Transliterate if possible.  */ | 
 | 			  if (locale != NULL) | 
 | 			    { | 
 | 			      uint32_t *translit; | 
 |  | 
 | 			      if ((locale->avail & CTYPE_LOCALE) == 0) | 
 | 				{ | 
 | 				  /* Load the CTYPE data now.  */ | 
 | 				  int old_needed = locale->needed; | 
 |  | 
 | 				  locale->needed = 0; | 
 | 				  locale = load_locale (LC_CTYPE, | 
 | 							locale->name, | 
 | 							locale->repertoire_name, | 
 | 							charmap, locale); | 
 | 				  locale->needed = old_needed; | 
 | 				} | 
 |  | 
 | 			      if ((locale->avail & CTYPE_LOCALE) != 0 | 
 | 				  && ((translit = find_translit (locale, | 
 | 								 charmap, wch)) | 
 | 				      != NULL)) | 
 | 				/* The CTYPE data contains a matching | 
 | 				   transliteration.  */ | 
 | 				{ | 
 | 				  int i; | 
 |  | 
 | 				  for (i = 0; translit[i] != 0; ++i) | 
 | 				    { | 
 | 				      char utmp[10]; | 
 |  | 
 | 				      snprintf (utmp, sizeof (utmp), "U%08X", | 
 | 						translit[i]); | 
 | 				      seq = charmap_find_value (charmap, utmp, | 
 | 								9); | 
 | 				      assert (seq != NULL); | 
 | 				      ADDS (seq->bytes, seq->nbytes); | 
 | 				    } | 
 |  | 
 | 				  continue; | 
 | 				} | 
 | 			    } | 
 | #endif	/* NO_TRANSLITERATION */ | 
 |  | 
 | 			  /* Not a known name.  */ | 
 | 			  illegal_string = 1; | 
 | 			} | 
 | 		    } | 
 |  | 
 | 		  if (seq != NULL) | 
 | 		    ADDS (seq->bytes, seq->nbytes); | 
 |  | 
 | 		  continue; | 
 | 		} | 
 | 	    } | 
 |  | 
 | 	  /* We now have the symbolic name in buf[startidx] to | 
 | 	     buf[bufact-1].  Now find out the value for this character | 
 | 	     in the charmap as well as in the repertoire map (in this | 
 | 	     order).  */ | 
 | 	  seq = charmap_find_value (charmap, &buf[startidx], | 
 | 				    bufact - startidx); | 
 |  | 
 | 	  if (seq == NULL) | 
 | 	    { | 
 | 	      /* This name is not in the charmap.  */ | 
 | 	      lr_error (lr, _("symbol `%.*s' not in charmap"), | 
 | 			(int) (bufact - startidx), &buf[startidx]); | 
 | 	      illegal_string = 1; | 
 | 	    } | 
 |  | 
 | 	  if (return_widestr) | 
 | 	    { | 
 | 	      /* Now the same for the multibyte representation.  */ | 
 | 	      if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE) | 
 | 		wch = seq->ucs4; | 
 | 	      else | 
 | 		{ | 
 | 		  wch = repertoire_find_value (repertoire, &buf[startidx], | 
 | 					       bufact - startidx); | 
 | 		  if (seq != NULL) | 
 | 		    seq->ucs4 = wch; | 
 | 		} | 
 |  | 
 | 	      if (wch == ILLEGAL_CHAR_VALUE) | 
 | 		{ | 
 | 		  /* This name is not in the repertoire map.  */ | 
 | 		  lr_error (lr, _("symbol `%.*s' not in repertoire map"), | 
 | 			    (int) (bufact - startidx), &buf[startidx]); | 
 | 		  illegal_string = 1; | 
 | 		} | 
 | 	      else | 
 | 		ADDWC (wch); | 
 | 	    } | 
 |  | 
 | 	  /* Now forget about the name we just added.  */ | 
 | 	  bufact = startidx; | 
 |  | 
 | 	  /* And copy the bytes.  */ | 
 | 	  if (seq != NULL) | 
 | 	    ADDS (seq->bytes, seq->nbytes); | 
 | 	} | 
 |  | 
 |       if (ch == '\n' || ch == EOF) | 
 | 	{ | 
 | 	  lr_error (lr, _("unterminated string")); | 
 | 	  illegal_string = 1; | 
 | 	} | 
 |  | 
 |       if (illegal_string) | 
 | 	{ | 
 | 	  free (buf); | 
 | 	  free (buf2); | 
 | 	  lr->token.val.str.startmb = NULL; | 
 | 	  lr->token.val.str.lenmb = 0; | 
 | 	  lr->token.val.str.startwc = NULL; | 
 | 	  lr->token.val.str.lenwc = 0; | 
 |  | 
 | 	  return &lr->token; | 
 | 	} | 
 |  | 
 |       ADDC ('\0'); | 
 |  | 
 |       if (return_widestr) | 
 | 	{ | 
 | 	  ADDWC (0); | 
 | 	  lr->token.val.str.startwc = xrealloc (buf2, | 
 | 						buf2act * sizeof (uint32_t)); | 
 | 	  lr->token.val.str.lenwc = buf2act; | 
 | 	} | 
 |     } | 
 |  | 
 |   lr->token.val.str.startmb = xrealloc (buf, bufact); | 
 |   lr->token.val.str.lenmb = bufact; | 
 |  | 
 |   return &lr->token; | 
 | } |