| xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame] | 1 | /* Copyright (C) 1996-2016 Free Software Foundation, Inc. | 
|  | 2 | This file is part of the GNU C Library. | 
|  | 3 | Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. | 
|  | 4 |  | 
|  | 5 | This program is free software; you can redistribute it and/or modify | 
|  | 6 | it under the terms of the GNU General Public License as published | 
|  | 7 | by the Free Software Foundation; version 2 of the License, or | 
|  | 8 | (at your option) any later version. | 
|  | 9 |  | 
|  | 10 | This program is distributed in the hope that it will be useful, | 
|  | 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|  | 13 | GNU General Public License for more details. | 
|  | 14 |  | 
|  | 15 | You should have received a copy of the GNU General Public License | 
|  | 16 | along with this program; if not, see <http://www.gnu.org/licenses/>.  */ | 
|  | 17 |  | 
|  | 18 | #ifdef HAVE_CONFIG_H | 
|  | 19 | # include <config.h> | 
|  | 20 | #endif | 
|  | 21 |  | 
|  | 22 | #include <ctype.h> | 
|  | 23 | #include <errno.h> | 
|  | 24 | #include <libintl.h> | 
|  | 25 | #include <limits.h> | 
|  | 26 | #include <stdio.h> | 
|  | 27 | #include <stdlib.h> | 
|  | 28 | #include <string.h> | 
|  | 29 | #include <error.h> | 
|  | 30 | #include <stdint.h> | 
|  | 31 |  | 
|  | 32 | #include "localedef.h" | 
|  | 33 | #include "linereader.h" | 
|  | 34 | #include "charmap.h" | 
|  | 35 | #include "charmap-dir.h" | 
|  | 36 |  | 
|  | 37 | #include <assert.h> | 
|  | 38 |  | 
|  | 39 |  | 
|  | 40 | /* Define the lookup function.  */ | 
|  | 41 | #include "charmap-kw.h" | 
|  | 42 |  | 
|  | 43 |  | 
|  | 44 | /* Prototypes for local functions.  */ | 
|  | 45 | static struct charmap_t *parse_charmap (struct linereader *cmfile, | 
|  | 46 | int verbose, int be_quiet); | 
|  | 47 | static void new_width (struct linereader *cmfile, struct charmap_t *result, | 
|  | 48 | const char *from, const char *to, | 
|  | 49 | unsigned long int width); | 
|  | 50 | static void charmap_new_char (struct linereader *lr, struct charmap_t *cm, | 
|  | 51 | size_t nbytes, unsigned char *bytes, | 
|  | 52 | const char *from, const char *to, | 
|  | 53 | int decimal_ellipsis, int step); | 
|  | 54 |  | 
|  | 55 |  | 
|  | 56 | bool enc_not_ascii_compatible; | 
|  | 57 |  | 
|  | 58 |  | 
|  | 59 | #ifdef NEED_NULL_POINTER | 
|  | 60 | static const char *null_pointer; | 
|  | 61 | #endif | 
|  | 62 |  | 
|  | 63 | static struct linereader * | 
|  | 64 | cmlr_open (const char *directory, const char *name, kw_hash_fct_t hf) | 
|  | 65 | { | 
|  | 66 | FILE *fp; | 
|  | 67 |  | 
|  | 68 | fp = charmap_open (directory, name); | 
|  | 69 | if (fp == NULL) | 
|  | 70 | return NULL; | 
|  | 71 | else | 
|  | 72 | { | 
|  | 73 | size_t dlen = strlen (directory); | 
|  | 74 | int add_slash = (dlen == 0 || directory[dlen - 1] != '/'); | 
|  | 75 | size_t nlen = strlen (name); | 
|  | 76 | char *pathname; | 
|  | 77 | char *p; | 
|  | 78 |  | 
|  | 79 | pathname = alloca (dlen + add_slash + nlen + 1); | 
|  | 80 | p = stpcpy (pathname, directory); | 
|  | 81 | if (add_slash) | 
|  | 82 | *p++ = '/'; | 
|  | 83 | stpcpy (p, name); | 
|  | 84 |  | 
|  | 85 | return lr_create (fp, pathname, hf); | 
|  | 86 | } | 
|  | 87 | } | 
|  | 88 |  | 
|  | 89 | struct charmap_t * | 
|  | 90 | charmap_read (const char *filename, int verbose, int error_not_found, | 
|  | 91 | int be_quiet, int use_default) | 
|  | 92 | { | 
|  | 93 | struct charmap_t *result = NULL; | 
|  | 94 |  | 
|  | 95 | if (filename != NULL) | 
|  | 96 | { | 
|  | 97 | struct linereader *cmfile; | 
|  | 98 |  | 
|  | 99 | /* First try the name as found in the parameter.  */ | 
|  | 100 | cmfile = lr_open (filename, charmap_hash); | 
|  | 101 | if (cmfile == NULL) | 
|  | 102 | { | 
|  | 103 | /* No successful.  So start looking through the directories | 
|  | 104 | in the I18NPATH if this is a simple name.  */ | 
|  | 105 | if (strchr (filename, '/') == NULL) | 
|  | 106 | { | 
|  | 107 | char *i18npath = getenv ("I18NPATH"); | 
|  | 108 | if (i18npath != NULL && *i18npath != '\0') | 
|  | 109 | { | 
|  | 110 | const size_t pathlen = strlen (i18npath); | 
|  | 111 | char i18npathbuf[pathlen + 1]; | 
|  | 112 | char path[pathlen + sizeof ("/charmaps")]; | 
|  | 113 | char *next; | 
|  | 114 | i18npath = memcpy (i18npathbuf, i18npath, pathlen + 1); | 
|  | 115 |  | 
|  | 116 | while (cmfile == NULL | 
|  | 117 | && (next = strsep (&i18npath, ":")) != NULL) | 
|  | 118 | { | 
|  | 119 | stpcpy (stpcpy (path, next), "/charmaps"); | 
|  | 120 | cmfile = cmlr_open (path, filename, charmap_hash); | 
|  | 121 |  | 
|  | 122 | if (cmfile == NULL) | 
|  | 123 | /* Try without the "/charmaps" part.  */ | 
|  | 124 | cmfile = cmlr_open (next, filename, charmap_hash); | 
|  | 125 | } | 
|  | 126 | } | 
|  | 127 |  | 
|  | 128 | if (cmfile == NULL) | 
|  | 129 | /* Try the default directory.  */ | 
|  | 130 | cmfile = cmlr_open (CHARMAP_PATH, filename, charmap_hash); | 
|  | 131 | } | 
|  | 132 | } | 
|  | 133 |  | 
|  | 134 | if (cmfile != NULL) | 
|  | 135 | result = parse_charmap (cmfile, verbose, be_quiet); | 
|  | 136 |  | 
|  | 137 | if (result == NULL && error_not_found) | 
|  | 138 | WITH_CUR_LOCALE (error (0, errno, _("\ | 
|  | 139 | character map file `%s' not found"), filename)); | 
|  | 140 | } | 
|  | 141 |  | 
|  | 142 | if (result == NULL && filename != NULL && strchr (filename, '/') == NULL) | 
|  | 143 | { | 
|  | 144 | /* OK, one more try.  We also accept the names given to the | 
|  | 145 | character sets in the files.  Sometimes they differ from the | 
|  | 146 | file name.  */ | 
|  | 147 | CHARMAP_DIR *dir; | 
|  | 148 |  | 
|  | 149 | dir = charmap_opendir (CHARMAP_PATH); | 
|  | 150 | if (dir != NULL) | 
|  | 151 | { | 
|  | 152 | const char *dirent; | 
|  | 153 |  | 
|  | 154 | while ((dirent = charmap_readdir (dir)) != NULL) | 
|  | 155 | { | 
|  | 156 | char **aliases; | 
|  | 157 | char **p; | 
|  | 158 | int found; | 
|  | 159 |  | 
|  | 160 | aliases = charmap_aliases (CHARMAP_PATH, dirent); | 
|  | 161 | found = 0; | 
|  | 162 | for (p = aliases; *p; p++) | 
|  | 163 | if (strcasecmp (*p, filename) == 0) | 
|  | 164 | { | 
|  | 165 | found = 1; | 
|  | 166 | break; | 
|  | 167 | } | 
|  | 168 | charmap_free_aliases (aliases); | 
|  | 169 |  | 
|  | 170 | if (found) | 
|  | 171 | { | 
|  | 172 | struct linereader *cmfile; | 
|  | 173 |  | 
|  | 174 | cmfile = cmlr_open (CHARMAP_PATH, dirent, charmap_hash); | 
|  | 175 | if (cmfile != NULL) | 
|  | 176 | result = parse_charmap (cmfile, verbose, be_quiet); | 
|  | 177 |  | 
|  | 178 | break; | 
|  | 179 | } | 
|  | 180 | } | 
|  | 181 |  | 
|  | 182 | charmap_closedir (dir); | 
|  | 183 | } | 
|  | 184 | } | 
|  | 185 |  | 
|  | 186 | if (result == NULL && DEFAULT_CHARMAP != NULL) | 
|  | 187 | { | 
|  | 188 | struct linereader *cmfile; | 
|  | 189 |  | 
|  | 190 | cmfile = cmlr_open (CHARMAP_PATH, DEFAULT_CHARMAP, charmap_hash); | 
|  | 191 | if (cmfile != NULL) | 
|  | 192 | result = parse_charmap (cmfile, verbose, be_quiet); | 
|  | 193 |  | 
|  | 194 | if (result == NULL) | 
|  | 195 | WITH_CUR_LOCALE (error (4, errno, _("\ | 
|  | 196 | default character map file `%s' not found"), DEFAULT_CHARMAP)); | 
|  | 197 | } | 
|  | 198 |  | 
|  | 199 | if (result != NULL && result->code_set_name == NULL) | 
|  | 200 | /* The input file does not specify a code set name.  This | 
|  | 201 | shouldn't happen but we should cope with it.  */ | 
|  | 202 | result->code_set_name = basename (filename); | 
|  | 203 |  | 
|  | 204 | /* Test of ASCII compatibility of locale encoding. | 
|  | 205 |  | 
|  | 206 | Verify that the encoding to be used in a locale is ASCII compatible, | 
|  | 207 | at least for the graphic characters, excluding the control characters, | 
|  | 208 | '$' and '@'.  This constraint comes from an ISO C 99 restriction. | 
|  | 209 |  | 
|  | 210 | ISO C 99 section 7.17.(2) (about wchar_t): | 
|  | 211 | the null character shall have the code value zero and each member of | 
|  | 212 | the basic character set shall have a code value equal to its value | 
|  | 213 | when used as the lone character in an integer character constant. | 
|  | 214 | ISO C 99 section 5.2.1.(3): | 
|  | 215 | Both the basic source and basic execution character sets shall have | 
|  | 216 | the following members: the 26 uppercase letters of the Latin alphabet | 
|  | 217 | A B C D E F G H I J K L M N O P Q R S T U V W X Y Z | 
|  | 218 | the 26 lowercase letters of the Latin alphabet | 
|  | 219 | a b c d e f g h i j k l m n o p q r s t u v w x y z | 
|  | 220 | the 10 decimal digits | 
|  | 221 | 0 1 2 3 4 5 6 7 8 9 | 
|  | 222 | the following 29 graphic characters | 
|  | 223 | ! " # % & ' ( ) * + , - . / : ; < = > ? [ \ ] ^ _ { | } ~ | 
|  | 224 | the space character, and control characters representing horizontal | 
|  | 225 | tab, vertical tab, and form feed. | 
|  | 226 |  | 
|  | 227 | Therefore, for all members of the "basic character set", the 'char' code | 
|  | 228 | must have the same value as the 'wchar_t' code, which in glibc is the | 
|  | 229 | same as the Unicode code, which for all of the enumerated characters | 
|  | 230 | is identical to the ASCII code. */ | 
|  | 231 | if (result != NULL && use_default) | 
|  | 232 | { | 
|  | 233 | static const char basic_charset[] = | 
|  | 234 | { | 
|  | 235 | 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', | 
|  | 236 | 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', | 
|  | 237 | 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', | 
|  | 238 | 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', | 
|  | 239 | '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', | 
|  | 240 | '!', '"', '#', '%', '&', '\'', '(', ')', '*', '+', ',', '-', | 
|  | 241 | '.', '/', ':', ';', '<', '=', '>', '?', '[', '\\', ']', '^', | 
|  | 242 | '_', '{', '|', '}', '~', ' ', '\t', '\v', '\f', '\0' | 
|  | 243 | }; | 
|  | 244 | int failed = 0; | 
|  | 245 | const char *p = basic_charset; | 
|  | 246 |  | 
|  | 247 | do | 
|  | 248 | { | 
|  | 249 | struct charseq *seq = charmap_find_symbol (result, p, 1); | 
|  | 250 |  | 
|  | 251 | if (seq == NULL || seq->ucs4 != (uint32_t) *p) | 
|  | 252 | failed = 1; | 
|  | 253 | } | 
|  | 254 | while (*p++ != '\0'); | 
|  | 255 |  | 
|  | 256 | if (failed) | 
|  | 257 | { | 
|  | 258 | WITH_CUR_LOCALE (fprintf (stderr, _("\ | 
|  | 259 | character map `%s' is not ASCII compatible, locale not ISO C compliant\n"), | 
|  | 260 | result->code_set_name)); | 
|  | 261 | enc_not_ascii_compatible = true; | 
|  | 262 | } | 
|  | 263 | } | 
|  | 264 |  | 
|  | 265 | return result; | 
|  | 266 | } | 
|  | 267 |  | 
|  | 268 |  | 
|  | 269 | static struct charmap_t * | 
|  | 270 | parse_charmap (struct linereader *cmfile, int verbose, int be_quiet) | 
|  | 271 | { | 
|  | 272 | struct charmap_t *result; | 
|  | 273 | int state; | 
|  | 274 | enum token_t expected_tok = tok_error; | 
|  | 275 | const char *expected_str = NULL; | 
|  | 276 | char *from_name = NULL; | 
|  | 277 | char *to_name = NULL; | 
|  | 278 | enum token_t ellipsis = 0; | 
|  | 279 | int step = 1; | 
|  | 280 |  | 
|  | 281 | /* We don't want symbolic names in string to be translated.  */ | 
|  | 282 | cmfile->translate_strings = 0; | 
|  | 283 |  | 
|  | 284 | /* Allocate room for result.  */ | 
|  | 285 | result = (struct charmap_t *) xmalloc (sizeof (struct charmap_t)); | 
|  | 286 | memset (result, '\0', sizeof (struct charmap_t)); | 
|  | 287 | /* The default DEFAULT_WIDTH is 1.  */ | 
|  | 288 | result->width_default = 1; | 
|  | 289 |  | 
|  | 290 | #define obstack_chunk_alloc malloc | 
|  | 291 | #define obstack_chunk_free free | 
|  | 292 | obstack_init (&result->mem_pool); | 
|  | 293 |  | 
|  | 294 | if (init_hash (&result->char_table, 256) | 
|  | 295 | || init_hash (&result->byte_table, 256)) | 
|  | 296 | { | 
|  | 297 | free (result); | 
|  | 298 | return NULL; | 
|  | 299 | } | 
|  | 300 |  | 
|  | 301 | /* We use a state machine to describe the charmap description file | 
|  | 302 | format.  */ | 
|  | 303 | state = 1; | 
|  | 304 | while (1) | 
|  | 305 | { | 
|  | 306 | /* What's on?  */ | 
|  | 307 | struct token *now = lr_token (cmfile, NULL, NULL, NULL, verbose); | 
|  | 308 | enum token_t nowtok = now->tok; | 
|  | 309 | struct token *arg; | 
|  | 310 |  | 
|  | 311 | if (nowtok == tok_eof) | 
|  | 312 | break; | 
|  | 313 |  | 
|  | 314 | switch (state) | 
|  | 315 | { | 
|  | 316 | case 1: | 
|  | 317 | /* The beginning.  We expect the special declarations, EOL or | 
|  | 318 | `CHARMAP'.  */ | 
|  | 319 | if (nowtok == tok_eol) | 
|  | 320 | /* Ignore empty lines.  */ | 
|  | 321 | continue; | 
|  | 322 |  | 
|  | 323 | if (nowtok == tok_charmap) | 
|  | 324 | { | 
|  | 325 | from_name = NULL; | 
|  | 326 | to_name = NULL; | 
|  | 327 |  | 
|  | 328 | /* We have to set up the real work.  Fill in some | 
|  | 329 | default values.  */ | 
|  | 330 | if (result->mb_cur_max == 0) | 
|  | 331 | result->mb_cur_max = 1; | 
|  | 332 | if (result->mb_cur_min == 0) | 
|  | 333 | result->mb_cur_min = result->mb_cur_max; | 
|  | 334 | if (result->mb_cur_min > result->mb_cur_max) | 
|  | 335 | { | 
|  | 336 | if (!be_quiet) | 
|  | 337 | WITH_CUR_LOCALE (error (0, 0, _("\ | 
|  | 338 | %s: <mb_cur_max> must be greater than <mb_cur_min>\n"), | 
|  | 339 | cmfile->fname)); | 
|  | 340 |  | 
|  | 341 | result->mb_cur_min = result->mb_cur_max; | 
|  | 342 | } | 
|  | 343 |  | 
|  | 344 | lr_ignore_rest (cmfile, 1); | 
|  | 345 |  | 
|  | 346 | state = 2; | 
|  | 347 | continue; | 
|  | 348 | } | 
|  | 349 |  | 
|  | 350 | if (nowtok != tok_code_set_name && nowtok != tok_mb_cur_max | 
|  | 351 | && nowtok != tok_mb_cur_min && nowtok != tok_escape_char | 
|  | 352 | && nowtok != tok_comment_char && nowtok != tok_g0esc | 
|  | 353 | && nowtok != tok_g1esc && nowtok != tok_g2esc | 
|  | 354 | && nowtok != tok_g3esc && nowtok != tok_repertoiremap | 
|  | 355 | && nowtok != tok_include) | 
|  | 356 | { | 
|  | 357 | lr_error (cmfile, _("syntax error in prolog: %s"), | 
|  | 358 | _("invalid definition")); | 
|  | 359 |  | 
|  | 360 | lr_ignore_rest (cmfile, 0); | 
|  | 361 | continue; | 
|  | 362 | } | 
|  | 363 |  | 
|  | 364 | /* We know that we need an argument.  */ | 
|  | 365 | arg = lr_token (cmfile, NULL, NULL, NULL, verbose); | 
|  | 366 |  | 
|  | 367 | switch (nowtok) | 
|  | 368 | { | 
|  | 369 | case tok_code_set_name: | 
|  | 370 | case tok_repertoiremap: | 
|  | 371 | if (arg->tok != tok_ident && arg->tok != tok_string) | 
|  | 372 | { | 
|  | 373 | badarg: | 
|  | 374 | lr_error (cmfile, _("syntax error in prolog: %s"), | 
|  | 375 | _("bad argument")); | 
|  | 376 |  | 
|  | 377 | lr_ignore_rest (cmfile, 0); | 
|  | 378 | continue; | 
|  | 379 | } | 
|  | 380 |  | 
|  | 381 | if (nowtok == tok_code_set_name) | 
|  | 382 | result->code_set_name = obstack_copy0 (&result->mem_pool, | 
|  | 383 | arg->val.str.startmb, | 
|  | 384 | arg->val.str.lenmb); | 
|  | 385 | else | 
|  | 386 | result->repertoiremap = obstack_copy0 (&result->mem_pool, | 
|  | 387 | arg->val.str.startmb, | 
|  | 388 | arg->val.str.lenmb); | 
|  | 389 |  | 
|  | 390 | lr_ignore_rest (cmfile, 1); | 
|  | 391 | continue; | 
|  | 392 |  | 
|  | 393 | case tok_mb_cur_max: | 
|  | 394 | case tok_mb_cur_min: | 
|  | 395 | if (arg->tok != tok_number) | 
|  | 396 | goto badarg; | 
|  | 397 |  | 
|  | 398 | if (verbose | 
|  | 399 | && ((nowtok == tok_mb_cur_max | 
|  | 400 | && result->mb_cur_max != 0) | 
|  | 401 | || (nowtok == tok_mb_cur_max | 
|  | 402 | && result->mb_cur_max != 0))) | 
|  | 403 | lr_error (cmfile, _("duplicate definition of <%s>"), | 
|  | 404 | nowtok == tok_mb_cur_min | 
|  | 405 | ? "mb_cur_min" : "mb_cur_max"); | 
|  | 406 |  | 
|  | 407 | if (arg->val.num < 1) | 
|  | 408 | { | 
|  | 409 | lr_error (cmfile, | 
|  | 410 | _("value for <%s> must be 1 or greater"), | 
|  | 411 | nowtok == tok_mb_cur_min | 
|  | 412 | ? "mb_cur_min" : "mb_cur_max"); | 
|  | 413 |  | 
|  | 414 | lr_ignore_rest (cmfile, 0); | 
|  | 415 | continue; | 
|  | 416 | } | 
|  | 417 | if ((nowtok == tok_mb_cur_max && result->mb_cur_min != 0 | 
|  | 418 | && (int) arg->val.num < result->mb_cur_min) | 
|  | 419 | || (nowtok == tok_mb_cur_min && result->mb_cur_max != 0 | 
|  | 420 | && (int) arg->val.num > result->mb_cur_max)) | 
|  | 421 | { | 
|  | 422 | lr_error (cmfile, _("\ | 
|  | 423 | value of <%s> must be greater or equal than the value of <%s>"), | 
|  | 424 | "mb_cur_max", "mb_cur_min"); | 
|  | 425 |  | 
|  | 426 | lr_ignore_rest (cmfile, 0); | 
|  | 427 | continue; | 
|  | 428 | } | 
|  | 429 |  | 
|  | 430 | if (nowtok == tok_mb_cur_max) | 
|  | 431 | result->mb_cur_max = arg->val.num; | 
|  | 432 | else | 
|  | 433 | result->mb_cur_min = arg->val.num; | 
|  | 434 |  | 
|  | 435 | lr_ignore_rest (cmfile, 1); | 
|  | 436 | continue; | 
|  | 437 |  | 
|  | 438 | case tok_escape_char: | 
|  | 439 | case tok_comment_char: | 
|  | 440 | if (arg->tok != tok_ident) | 
|  | 441 | goto badarg; | 
|  | 442 |  | 
|  | 443 | if (arg->val.str.lenmb != 1) | 
|  | 444 | { | 
|  | 445 | lr_error (cmfile, _("\ | 
|  | 446 | argument to <%s> must be a single character"), | 
|  | 447 | nowtok == tok_escape_char ? "escape_char" | 
|  | 448 | : "comment_char"); | 
|  | 449 |  | 
|  | 450 | lr_ignore_rest (cmfile, 0); | 
|  | 451 | continue; | 
|  | 452 | } | 
|  | 453 |  | 
|  | 454 | if (nowtok == tok_escape_char) | 
|  | 455 | cmfile->escape_char = *arg->val.str.startmb; | 
|  | 456 | else | 
|  | 457 | cmfile->comment_char = *arg->val.str.startmb; | 
|  | 458 |  | 
|  | 459 | lr_ignore_rest (cmfile, 1); | 
|  | 460 | continue; | 
|  | 461 |  | 
|  | 462 | case tok_g0esc: | 
|  | 463 | case tok_g1esc: | 
|  | 464 | case tok_g2esc: | 
|  | 465 | case tok_g3esc: | 
|  | 466 | case tok_escseq: | 
|  | 467 | lr_ignore_rest (cmfile, 0); /* XXX */ | 
|  | 468 | continue; | 
|  | 469 |  | 
|  | 470 | case tok_include: | 
|  | 471 | lr_error (cmfile, _("\ | 
|  | 472 | character sets with locking states are not supported")); | 
|  | 473 | exit (4); | 
|  | 474 |  | 
|  | 475 | default: | 
|  | 476 | /* Cannot happen.  */ | 
|  | 477 | assert (! "Should not happen"); | 
|  | 478 | } | 
|  | 479 | break; | 
|  | 480 |  | 
|  | 481 | case 2: | 
|  | 482 | /* We have seen `CHARMAP' and now are in the body.  Each line | 
|  | 483 | must have the format "%s %s %s\n" or "%s...%s %s %s\n".  */ | 
|  | 484 | if (nowtok == tok_eol) | 
|  | 485 | /* Ignore empty lines.  */ | 
|  | 486 | continue; | 
|  | 487 |  | 
|  | 488 | if (nowtok == tok_end) | 
|  | 489 | { | 
|  | 490 | expected_tok = tok_charmap; | 
|  | 491 | expected_str = "CHARMAP"; | 
|  | 492 | state = 90; | 
|  | 493 | continue; | 
|  | 494 | } | 
|  | 495 |  | 
|  | 496 | if (nowtok != tok_bsymbol && nowtok != tok_ucs4) | 
|  | 497 | { | 
|  | 498 | lr_error (cmfile, _("syntax error in %s definition: %s"), | 
|  | 499 | "CHARMAP", _("no symbolic name given")); | 
|  | 500 |  | 
|  | 501 | lr_ignore_rest (cmfile, 0); | 
|  | 502 | continue; | 
|  | 503 | } | 
|  | 504 |  | 
|  | 505 | /* If the previous line was not completely correct free the | 
|  | 506 | used memory.  */ | 
|  | 507 | if (from_name != NULL) | 
|  | 508 | obstack_free (&result->mem_pool, from_name); | 
|  | 509 |  | 
|  | 510 | if (nowtok == tok_bsymbol) | 
|  | 511 | from_name = (char *) obstack_copy0 (&result->mem_pool, | 
|  | 512 | now->val.str.startmb, | 
|  | 513 | now->val.str.lenmb); | 
|  | 514 | else | 
|  | 515 | { | 
|  | 516 | obstack_printf (&result->mem_pool, "U%08X", | 
|  | 517 | cmfile->token.val.ucs4); | 
|  | 518 | obstack_1grow (&result->mem_pool, '\0'); | 
|  | 519 | from_name = (char *) obstack_finish (&result->mem_pool); | 
|  | 520 | } | 
|  | 521 | to_name = NULL; | 
|  | 522 |  | 
|  | 523 | state = 3; | 
|  | 524 | continue; | 
|  | 525 |  | 
|  | 526 | case 3: | 
|  | 527 | /* We have two possibilities: We can see an ellipsis or an | 
|  | 528 | encoding value.  */ | 
|  | 529 | if (nowtok == tok_ellipsis3 || nowtok == tok_ellipsis4 | 
|  | 530 | || nowtok == tok_ellipsis2 || nowtok == tok_ellipsis4_2 | 
|  | 531 | || nowtok == tok_ellipsis2_2) | 
|  | 532 | { | 
|  | 533 | ellipsis = nowtok; | 
|  | 534 | if (nowtok == tok_ellipsis4_2) | 
|  | 535 | { | 
|  | 536 | step = 2; | 
|  | 537 | nowtok = tok_ellipsis4; | 
|  | 538 | } | 
|  | 539 | else if (nowtok == tok_ellipsis2_2) | 
|  | 540 | { | 
|  | 541 | step = 2; | 
|  | 542 | nowtok = tok_ellipsis2; | 
|  | 543 | } | 
|  | 544 | state = 4; | 
|  | 545 | continue; | 
|  | 546 | } | 
|  | 547 | /* FALLTHROUGH */ | 
|  | 548 |  | 
|  | 549 | case 5: | 
|  | 550 | if (nowtok != tok_charcode) | 
|  | 551 | { | 
|  | 552 | lr_error (cmfile, _("syntax error in %s definition: %s"), | 
|  | 553 | "CHARMAP", _("invalid encoding given")); | 
|  | 554 |  | 
|  | 555 | lr_ignore_rest (cmfile, 0); | 
|  | 556 |  | 
|  | 557 | state = 2; | 
|  | 558 | continue; | 
|  | 559 | } | 
|  | 560 |  | 
|  | 561 | if (now->val.charcode.nbytes < result->mb_cur_min) | 
|  | 562 | lr_error (cmfile, _("too few bytes in character encoding")); | 
|  | 563 | else if (now->val.charcode.nbytes > result->mb_cur_max) | 
|  | 564 | lr_error (cmfile, _("too many bytes in character encoding")); | 
|  | 565 | else | 
|  | 566 | charmap_new_char (cmfile, result, now->val.charcode.nbytes, | 
|  | 567 | now->val.charcode.bytes, from_name, to_name, | 
|  | 568 | ellipsis != tok_ellipsis2, step); | 
|  | 569 |  | 
|  | 570 | /* Ignore trailing comment silently.  */ | 
|  | 571 | lr_ignore_rest (cmfile, 0); | 
|  | 572 |  | 
|  | 573 | from_name = NULL; | 
|  | 574 | to_name = NULL; | 
|  | 575 | ellipsis = tok_none; | 
|  | 576 | step = 1; | 
|  | 577 |  | 
|  | 578 | state = 2; | 
|  | 579 | continue; | 
|  | 580 |  | 
|  | 581 | case 4: | 
|  | 582 | if (nowtok != tok_bsymbol && nowtok != tok_ucs4) | 
|  | 583 | { | 
|  | 584 | lr_error (cmfile, _("syntax error in %s definition: %s"), | 
|  | 585 | "CHARMAP", | 
|  | 586 | _("no symbolic name given for end of range")); | 
|  | 587 |  | 
|  | 588 | lr_ignore_rest (cmfile, 0); | 
|  | 589 | continue; | 
|  | 590 | } | 
|  | 591 |  | 
|  | 592 | /* Copy the to-name in a safe place.  */ | 
|  | 593 | if (nowtok == tok_bsymbol) | 
|  | 594 | to_name = (char *) obstack_copy0 (&result->mem_pool, | 
|  | 595 | cmfile->token.val.str.startmb, | 
|  | 596 | cmfile->token.val.str.lenmb); | 
|  | 597 | else | 
|  | 598 | { | 
|  | 599 | obstack_printf (&result->mem_pool, "U%08X", | 
|  | 600 | cmfile->token.val.ucs4); | 
|  | 601 | obstack_1grow (&result->mem_pool, '\0'); | 
|  | 602 | to_name = (char *) obstack_finish (&result->mem_pool); | 
|  | 603 | } | 
|  | 604 |  | 
|  | 605 | state = 5; | 
|  | 606 | continue; | 
|  | 607 |  | 
|  | 608 | case 90: | 
|  | 609 | if (nowtok != expected_tok) | 
|  | 610 | lr_error (cmfile, _("\ | 
|  | 611 | %1$s: definition does not end with `END %1$s'"), expected_str); | 
|  | 612 |  | 
|  | 613 | lr_ignore_rest (cmfile, nowtok == expected_tok); | 
|  | 614 | state = 91; | 
|  | 615 | continue; | 
|  | 616 |  | 
|  | 617 | case 91: | 
|  | 618 | /* Waiting for WIDTH... */ | 
|  | 619 | if (nowtok == tok_eol) | 
|  | 620 | /* Ignore empty lines.  */ | 
|  | 621 | continue; | 
|  | 622 |  | 
|  | 623 | if (nowtok == tok_width_default) | 
|  | 624 | { | 
|  | 625 | state = 92; | 
|  | 626 | continue; | 
|  | 627 | } | 
|  | 628 |  | 
|  | 629 | if (nowtok == tok_width) | 
|  | 630 | { | 
|  | 631 | lr_ignore_rest (cmfile, 1); | 
|  | 632 | state = 93; | 
|  | 633 | continue; | 
|  | 634 | } | 
|  | 635 |  | 
|  | 636 | if (nowtok == tok_width_variable) | 
|  | 637 | { | 
|  | 638 | lr_ignore_rest (cmfile, 1); | 
|  | 639 | state = 98; | 
|  | 640 | continue; | 
|  | 641 | } | 
|  | 642 |  | 
|  | 643 | lr_error (cmfile, _("\ | 
|  | 644 | only WIDTH definitions are allowed to follow the CHARMAP definition")); | 
|  | 645 |  | 
|  | 646 | lr_ignore_rest (cmfile, 0); | 
|  | 647 | continue; | 
|  | 648 |  | 
|  | 649 | case 92: | 
|  | 650 | if (nowtok != tok_number) | 
|  | 651 | lr_error (cmfile, _("value for %s must be an integer"), | 
|  | 652 | "WIDTH_DEFAULT"); | 
|  | 653 | else | 
|  | 654 | result->width_default = now->val.num; | 
|  | 655 |  | 
|  | 656 | lr_ignore_rest (cmfile, nowtok == tok_number); | 
|  | 657 |  | 
|  | 658 | state = 91; | 
|  | 659 | continue; | 
|  | 660 |  | 
|  | 661 | case 93: | 
|  | 662 | /* We now expect `END WIDTH' or lines of the format "%s %d\n" or | 
|  | 663 | "%s...%s %d\n".  */ | 
|  | 664 | if (nowtok == tok_eol) | 
|  | 665 | /* ignore empty lines.  */ | 
|  | 666 | continue; | 
|  | 667 |  | 
|  | 668 | if (nowtok == tok_end) | 
|  | 669 | { | 
|  | 670 | expected_tok = tok_width; | 
|  | 671 | expected_str = "WIDTH"; | 
|  | 672 | state = 90; | 
|  | 673 | continue; | 
|  | 674 | } | 
|  | 675 |  | 
|  | 676 | if (nowtok != tok_bsymbol && nowtok != tok_ucs4) | 
|  | 677 | { | 
|  | 678 | lr_error (cmfile, _("syntax error in %s definition: %s"), | 
|  | 679 | "WIDTH", _("no symbolic name given")); | 
|  | 680 |  | 
|  | 681 | lr_ignore_rest (cmfile, 0); | 
|  | 682 | continue; | 
|  | 683 | } | 
|  | 684 |  | 
|  | 685 | if (from_name != NULL) | 
|  | 686 | obstack_free (&result->mem_pool, from_name); | 
|  | 687 |  | 
|  | 688 | if (nowtok == tok_bsymbol) | 
|  | 689 | from_name = (char *) obstack_copy0 (&result->mem_pool, | 
|  | 690 | now->val.str.startmb, | 
|  | 691 | now->val.str.lenmb); | 
|  | 692 | else | 
|  | 693 | { | 
|  | 694 | obstack_printf (&result->mem_pool, "U%08X", | 
|  | 695 | cmfile->token.val.ucs4); | 
|  | 696 | obstack_1grow (&result->mem_pool, '\0'); | 
|  | 697 | from_name = (char *) obstack_finish (&result->mem_pool); | 
|  | 698 | } | 
|  | 699 |  | 
|  | 700 | to_name = NULL; | 
|  | 701 |  | 
|  | 702 | state = 94; | 
|  | 703 | continue; | 
|  | 704 |  | 
|  | 705 | case 94: | 
|  | 706 | if (nowtok == tok_ellipsis3) | 
|  | 707 | { | 
|  | 708 | state = 95; | 
|  | 709 | continue; | 
|  | 710 | } | 
|  | 711 |  | 
|  | 712 | case 96: | 
|  | 713 | if (nowtok != tok_number) | 
|  | 714 | lr_error (cmfile, _("value for %s must be an integer"), | 
|  | 715 | "WIDTH"); | 
|  | 716 | else | 
|  | 717 | { | 
|  | 718 | /* Store width for chars.  */ | 
|  | 719 | new_width (cmfile, result, from_name, to_name, now->val.num); | 
|  | 720 |  | 
|  | 721 | from_name = NULL; | 
|  | 722 | to_name = NULL; | 
|  | 723 | } | 
|  | 724 |  | 
|  | 725 | lr_ignore_rest (cmfile, nowtok == tok_number); | 
|  | 726 |  | 
|  | 727 | state = 93; | 
|  | 728 | continue; | 
|  | 729 |  | 
|  | 730 | case 95: | 
|  | 731 | if (nowtok != tok_bsymbol && nowtok != tok_ucs4) | 
|  | 732 | { | 
|  | 733 | lr_error (cmfile, _("syntax error in %s definition: %s"), | 
|  | 734 | "WIDTH", _("no symbolic name given for end of range")); | 
|  | 735 |  | 
|  | 736 | lr_ignore_rest (cmfile, 0); | 
|  | 737 |  | 
|  | 738 | state = 93; | 
|  | 739 | continue; | 
|  | 740 | } | 
|  | 741 |  | 
|  | 742 | if (nowtok == tok_bsymbol) | 
|  | 743 | to_name = (char *) obstack_copy0 (&result->mem_pool, | 
|  | 744 | now->val.str.startmb, | 
|  | 745 | now->val.str.lenmb); | 
|  | 746 | else | 
|  | 747 | { | 
|  | 748 | obstack_printf (&result->mem_pool, "U%08X", | 
|  | 749 | cmfile->token.val.ucs4); | 
|  | 750 | obstack_1grow (&result->mem_pool, '\0'); | 
|  | 751 | to_name = (char *) obstack_finish (&result->mem_pool); | 
|  | 752 | } | 
|  | 753 |  | 
|  | 754 | state = 96; | 
|  | 755 | continue; | 
|  | 756 |  | 
|  | 757 | case 98: | 
|  | 758 | /* We now expect `END WIDTH_VARIABLE' or lines of the format | 
|  | 759 | "%s\n" or "%s...%s\n".  */ | 
|  | 760 | if (nowtok == tok_eol) | 
|  | 761 | /* ignore empty lines.  */ | 
|  | 762 | continue; | 
|  | 763 |  | 
|  | 764 | if (nowtok == tok_end) | 
|  | 765 | { | 
|  | 766 | expected_tok = tok_width_variable; | 
|  | 767 | expected_str = "WIDTH_VARIABLE"; | 
|  | 768 | state = 90; | 
|  | 769 | continue; | 
|  | 770 | } | 
|  | 771 |  | 
|  | 772 | if (nowtok != tok_bsymbol && nowtok != tok_ucs4) | 
|  | 773 | { | 
|  | 774 | lr_error (cmfile, _("syntax error in %s definition: %s"), | 
|  | 775 | "WIDTH_VARIABLE", _("no symbolic name given")); | 
|  | 776 |  | 
|  | 777 | lr_ignore_rest (cmfile, 0); | 
|  | 778 |  | 
|  | 779 | continue; | 
|  | 780 | } | 
|  | 781 |  | 
|  | 782 | if (from_name != NULL) | 
|  | 783 | obstack_free (&result->mem_pool, from_name); | 
|  | 784 |  | 
|  | 785 | if (nowtok == tok_bsymbol) | 
|  | 786 | from_name = (char *) obstack_copy0 (&result->mem_pool, | 
|  | 787 | now->val.str.startmb, | 
|  | 788 | now->val.str.lenmb); | 
|  | 789 | else | 
|  | 790 | { | 
|  | 791 | obstack_printf (&result->mem_pool, "U%08X", | 
|  | 792 | cmfile->token.val.ucs4); | 
|  | 793 | obstack_1grow (&result->mem_pool, '\0'); | 
|  | 794 | from_name = (char *) obstack_finish (&result->mem_pool); | 
|  | 795 | } | 
|  | 796 | to_name = NULL; | 
|  | 797 |  | 
|  | 798 | state = 99; | 
|  | 799 | continue; | 
|  | 800 |  | 
|  | 801 | case 99: | 
|  | 802 | if (nowtok == tok_ellipsis3) | 
|  | 803 | state = 100; | 
|  | 804 |  | 
|  | 805 | /* Store info.  */ | 
|  | 806 | from_name = NULL; | 
|  | 807 |  | 
|  | 808 | /* Warn */ | 
|  | 809 | state = 98; | 
|  | 810 | continue; | 
|  | 811 |  | 
|  | 812 | case 100: | 
|  | 813 | if (nowtok != tok_bsymbol && nowtok != tok_ucs4) | 
|  | 814 | { | 
|  | 815 | lr_error (cmfile, _("syntax error in %s definition: %s"), | 
|  | 816 | "WIDTH_VARIABLE", | 
|  | 817 | _("no symbolic name given for end of range")); | 
|  | 818 | lr_ignore_rest (cmfile, 0); | 
|  | 819 | continue; | 
|  | 820 | } | 
|  | 821 |  | 
|  | 822 | if (nowtok == tok_bsymbol) | 
|  | 823 | to_name = (char *) obstack_copy0 (&result->mem_pool, | 
|  | 824 | now->val.str.startmb, | 
|  | 825 | now->val.str.lenmb); | 
|  | 826 | else | 
|  | 827 | { | 
|  | 828 | obstack_printf (&result->mem_pool, "U%08X", | 
|  | 829 | cmfile->token.val.ucs4); | 
|  | 830 | obstack_1grow (&result->mem_pool, '\0'); | 
|  | 831 | to_name = (char *) obstack_finish (&result->mem_pool); | 
|  | 832 | } | 
|  | 833 |  | 
|  | 834 | /* XXX Enter value into table.  */ | 
|  | 835 |  | 
|  | 836 | lr_ignore_rest (cmfile, 1); | 
|  | 837 |  | 
|  | 838 | state = 98; | 
|  | 839 | continue; | 
|  | 840 |  | 
|  | 841 | default: | 
|  | 842 | WITH_CUR_LOCALE (error (5, 0, _("%s: error in state machine"), | 
|  | 843 | __FILE__)); | 
|  | 844 | /* NOTREACHED */ | 
|  | 845 | } | 
|  | 846 | break; | 
|  | 847 | } | 
|  | 848 |  | 
|  | 849 | if (state != 91 && !be_quiet) | 
|  | 850 | WITH_CUR_LOCALE (error (0, 0, _("%s: premature end of file"), | 
|  | 851 | cmfile->fname)); | 
|  | 852 |  | 
|  | 853 | lr_close (cmfile); | 
|  | 854 |  | 
|  | 855 | return result; | 
|  | 856 | } | 
|  | 857 |  | 
|  | 858 |  | 
|  | 859 | static void | 
|  | 860 | new_width (struct linereader *cmfile, struct charmap_t *result, | 
|  | 861 | const char *from, const char *to, unsigned long int width) | 
|  | 862 | { | 
|  | 863 | struct charseq *from_val; | 
|  | 864 | struct charseq *to_val; | 
|  | 865 |  | 
|  | 866 | from_val = charmap_find_value (result, from, strlen (from)); | 
|  | 867 | if (from_val == NULL) | 
|  | 868 | { | 
|  | 869 | lr_error (cmfile, _("unknown character `%s'"), from); | 
|  | 870 | return; | 
|  | 871 | } | 
|  | 872 |  | 
|  | 873 | if (to == NULL) | 
|  | 874 | to_val = from_val; | 
|  | 875 | else | 
|  | 876 | { | 
|  | 877 | to_val = charmap_find_value (result, to, strlen (to)); | 
|  | 878 | if (to_val == NULL) | 
|  | 879 | { | 
|  | 880 | lr_error (cmfile, _("unknown character `%s'"), to); | 
|  | 881 | return; | 
|  | 882 | } | 
|  | 883 |  | 
|  | 884 | /* Make sure the number of bytes for the end points of the range | 
|  | 885 | is correct.  */ | 
|  | 886 | if (from_val->nbytes != to_val->nbytes) | 
|  | 887 | { | 
|  | 888 | lr_error (cmfile, _("\ | 
|  | 889 | number of bytes for byte sequence of beginning and end of range not the same: %d vs %d"), | 
|  | 890 | from_val->nbytes, to_val->nbytes); | 
|  | 891 | return; | 
|  | 892 | } | 
|  | 893 | } | 
|  | 894 |  | 
|  | 895 | if (result->nwidth_rules >= result->nwidth_rules_max) | 
|  | 896 | { | 
|  | 897 | size_t new_size = result->nwidth_rules + 32; | 
|  | 898 | struct width_rule *new_rules = | 
|  | 899 | (struct width_rule *) obstack_alloc (&result->mem_pool, | 
|  | 900 | (new_size | 
|  | 901 | * sizeof (struct width_rule))); | 
|  | 902 |  | 
|  | 903 | memcpy (new_rules, result->width_rules, | 
|  | 904 | result->nwidth_rules_max * sizeof (struct width_rule)); | 
|  | 905 |  | 
|  | 906 | result->width_rules = new_rules; | 
|  | 907 | result->nwidth_rules_max = new_size; | 
|  | 908 | } | 
|  | 909 |  | 
|  | 910 | result->width_rules[result->nwidth_rules].from = from_val; | 
|  | 911 | result->width_rules[result->nwidth_rules].to = to_val; | 
|  | 912 | result->width_rules[result->nwidth_rules].width = (unsigned int) width; | 
|  | 913 | ++result->nwidth_rules; | 
|  | 914 | } | 
|  | 915 |  | 
|  | 916 |  | 
|  | 917 | struct charseq * | 
|  | 918 | charmap_find_value (const struct charmap_t *cm, const char *name, size_t len) | 
|  | 919 | { | 
|  | 920 | void *result; | 
|  | 921 |  | 
|  | 922 | return (find_entry ((hash_table *) &cm->char_table, name, len, &result) | 
|  | 923 | < 0 ? NULL : (struct charseq *) result); | 
|  | 924 | } | 
|  | 925 |  | 
|  | 926 |  | 
|  | 927 | static void | 
|  | 928 | charmap_new_char (struct linereader *lr, struct charmap_t *cm, | 
|  | 929 | size_t nbytes, unsigned char *bytes, | 
|  | 930 | const char *from, const char *to, | 
|  | 931 | int decimal_ellipsis, int step) | 
|  | 932 | { | 
|  | 933 | hash_table *ht = &cm->char_table; | 
|  | 934 | hash_table *bt = &cm->byte_table; | 
|  | 935 | struct obstack *ob = &cm->mem_pool; | 
|  | 936 | char *from_end; | 
|  | 937 | char *to_end; | 
|  | 938 | const char *cp; | 
|  | 939 | int prefix_len, len1, len2; | 
|  | 940 | unsigned int from_nr, to_nr, cnt; | 
|  | 941 | struct charseq *newp; | 
|  | 942 |  | 
|  | 943 | len1 = strlen (from); | 
|  | 944 |  | 
|  | 945 | if (to == NULL) | 
|  | 946 | { | 
|  | 947 | newp = (struct charseq *) obstack_alloc (ob, sizeof (*newp) + nbytes); | 
|  | 948 | newp->nbytes = nbytes; | 
|  | 949 | memcpy (newp->bytes, bytes, nbytes); | 
|  | 950 | newp->name = from; | 
|  | 951 |  | 
|  | 952 | newp->ucs4 = UNINITIALIZED_CHAR_VALUE; | 
|  | 953 | if ((from[0] == 'U' || from[0] == 'P') && (len1 == 5 || len1 == 9)) | 
|  | 954 | { | 
|  | 955 | /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where | 
|  | 956 | xxxx and xxxxxxxx are hexadecimal numbers.  In this case | 
|  | 957 | we use the value of xxxx or xxxxxxxx as the UCS4 value of | 
|  | 958 | this character and we don't have to consult the repertoire | 
|  | 959 | map. | 
|  | 960 |  | 
|  | 961 | If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx | 
|  | 962 | and xxxxxxxx also give the code point in UCS4 but this must | 
|  | 963 | be in the private, i.e., unassigned, area.  This should be | 
|  | 964 | used for characters which do not (yet) have an equivalent | 
|  | 965 | in ISO 10646 and Unicode.  */ | 
|  | 966 | char *endp; | 
|  | 967 |  | 
|  | 968 | errno = 0; | 
|  | 969 | newp->ucs4 = strtoul (from + 1, &endp, 16); | 
|  | 970 | if (endp - from != len1 | 
|  | 971 | || (newp->ucs4 == ~((uint32_t) 0) && errno == ERANGE) | 
|  | 972 | || newp->ucs4 >= 0x80000000) | 
|  | 973 | /* This wasn't successful.  Signal this name cannot be a | 
|  | 974 | correct UCS value.  */ | 
|  | 975 | newp->ucs4 = UNINITIALIZED_CHAR_VALUE; | 
|  | 976 | } | 
|  | 977 |  | 
|  | 978 | insert_entry (ht, from, len1, newp); | 
|  | 979 | insert_entry (bt, newp->bytes, nbytes, newp); | 
|  | 980 | /* Please note that it isn't a bug if a symbol is defined more | 
|  | 981 | than once.  All later definitions are simply discarded.  */ | 
|  | 982 | return; | 
|  | 983 | } | 
|  | 984 |  | 
|  | 985 | /* We have a range: the names must have names with equal prefixes | 
|  | 986 | and an equal number of digits, where the second number is greater | 
|  | 987 | or equal than the first.  */ | 
|  | 988 | len2 = strlen (to); | 
|  | 989 |  | 
|  | 990 | if (len1 != len2) | 
|  | 991 | { | 
|  | 992 | illegal_range: | 
|  | 993 | lr_error (lr, _("invalid names for character range")); | 
|  | 994 | return; | 
|  | 995 | } | 
|  | 996 |  | 
|  | 997 | cp = &from[len1 - 1]; | 
|  | 998 | if (decimal_ellipsis) | 
|  | 999 | while (isdigit (*cp) && cp >= from) | 
|  | 1000 | --cp; | 
|  | 1001 | else | 
|  | 1002 | while (isxdigit (*cp) && cp >= from) | 
|  | 1003 | { | 
|  | 1004 | if (!isdigit (*cp) && !isupper (*cp)) | 
|  | 1005 | lr_error (lr, _("\ | 
|  | 1006 | hexadecimal range format should use only capital characters")); | 
|  | 1007 | --cp; | 
|  | 1008 | } | 
|  | 1009 |  | 
|  | 1010 | prefix_len = (cp - from) + 1; | 
|  | 1011 |  | 
|  | 1012 | if (cp == &from[len1 - 1] || strncmp (from, to, prefix_len) != 0) | 
|  | 1013 | goto illegal_range; | 
|  | 1014 |  | 
|  | 1015 | errno = 0; | 
|  | 1016 | from_nr = strtoul (&from[prefix_len], &from_end, decimal_ellipsis ? 10 : 16); | 
|  | 1017 | if (*from_end != '\0' || (from_nr == UINT_MAX && errno == ERANGE) | 
|  | 1018 | || ((to_nr = strtoul (&to[prefix_len], &to_end, | 
|  | 1019 | decimal_ellipsis ? 10 : 16)) == UINT_MAX | 
|  | 1020 | && errno == ERANGE) | 
|  | 1021 | || *to_end != '\0') | 
|  | 1022 | { | 
|  | 1023 | lr_error (lr, _("<%s> and <%s> are invalid names for range"), from, to); | 
|  | 1024 | return; | 
|  | 1025 | } | 
|  | 1026 |  | 
|  | 1027 | if (from_nr > to_nr) | 
|  | 1028 | { | 
|  | 1029 | lr_error (lr, _("upper limit in range is smaller than lower limit")); | 
|  | 1030 | return; | 
|  | 1031 | } | 
|  | 1032 |  | 
|  | 1033 | for (cnt = from_nr; cnt <= to_nr; cnt += step) | 
|  | 1034 | { | 
|  | 1035 | char *name_end; | 
|  | 1036 | obstack_printf (ob, decimal_ellipsis ? "%.*s%0*d" : "%.*s%0*X", | 
|  | 1037 | prefix_len, from, len1 - prefix_len, cnt); | 
|  | 1038 | obstack_1grow (ob, '\0'); | 
|  | 1039 | name_end = obstack_finish (ob); | 
|  | 1040 |  | 
|  | 1041 | newp = (struct charseq *) obstack_alloc (ob, sizeof (*newp) + nbytes); | 
|  | 1042 | newp->nbytes = nbytes; | 
|  | 1043 | memcpy (newp->bytes, bytes, nbytes); | 
|  | 1044 | newp->name = name_end; | 
|  | 1045 |  | 
|  | 1046 | newp->ucs4 = UNINITIALIZED_CHAR_VALUE; | 
|  | 1047 | if ((name_end[0] == 'U' || name_end[0] == 'P') | 
|  | 1048 | && (len1 == 5 || len1 == 9)) | 
|  | 1049 | { | 
|  | 1050 | /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where | 
|  | 1051 | xxxx and xxxxxxxx are hexadecimal numbers.  In this case | 
|  | 1052 | we use the value of xxxx or xxxxxxxx as the UCS4 value of | 
|  | 1053 | this character and we don't have to consult the repertoire | 
|  | 1054 | map. | 
|  | 1055 |  | 
|  | 1056 | If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx | 
|  | 1057 | and xxxxxxxx also give the code point in UCS4 but this must | 
|  | 1058 | be in the private, i.e., unassigned, area.  This should be | 
|  | 1059 | used for characters which do not (yet) have an equivalent | 
|  | 1060 | in ISO 10646 and Unicode.  */ | 
|  | 1061 | char *endp; | 
|  | 1062 |  | 
|  | 1063 | errno = 0; | 
|  | 1064 | newp->ucs4 = strtoul (name_end + 1, &endp, 16); | 
|  | 1065 | if (endp - name_end != len1 | 
|  | 1066 | || (newp->ucs4 == ~((uint32_t) 0) && errno == ERANGE) | 
|  | 1067 | || newp->ucs4 >= 0x80000000) | 
|  | 1068 | /* This wasn't successful.  Signal this name cannot be a | 
|  | 1069 | correct UCS value.  */ | 
|  | 1070 | newp->ucs4 = UNINITIALIZED_CHAR_VALUE; | 
|  | 1071 | } | 
|  | 1072 |  | 
|  | 1073 | insert_entry (ht, name_end, len1, newp); | 
|  | 1074 | insert_entry (bt, newp->bytes, nbytes, newp); | 
|  | 1075 | /* Please note we don't examine the return value since it is no error | 
|  | 1076 | if we have two definitions for a symbol.  */ | 
|  | 1077 |  | 
|  | 1078 | /* Increment the value in the byte sequence.  */ | 
|  | 1079 | if (++bytes[nbytes - 1] == '\0') | 
|  | 1080 | { | 
|  | 1081 | int b = nbytes - 2; | 
|  | 1082 |  | 
|  | 1083 | do | 
|  | 1084 | if (b < 0) | 
|  | 1085 | { | 
|  | 1086 | lr_error (lr, | 
|  | 1087 | _("resulting bytes for range not representable.")); | 
|  | 1088 | return; | 
|  | 1089 | } | 
|  | 1090 | while (++bytes[b--] == 0); | 
|  | 1091 | } | 
|  | 1092 | } | 
|  | 1093 | } | 
|  | 1094 |  | 
|  | 1095 |  | 
|  | 1096 | struct charseq * | 
|  | 1097 | charmap_find_symbol (const struct charmap_t *cm, const char *bytes, | 
|  | 1098 | size_t nbytes) | 
|  | 1099 | { | 
|  | 1100 | void *result; | 
|  | 1101 |  | 
|  | 1102 | return (find_entry ((hash_table *) &cm->byte_table, bytes, nbytes, &result) | 
|  | 1103 | < 0 ? NULL : (struct charseq *) result); | 
|  | 1104 | } |