lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame] | 1 | /* Code to load locale data from the locale archive file. |
| 2 | Copyright (C) 2002-2015 Free Software Foundation, Inc. |
| 3 | This file is part of the GNU C Library. |
| 4 | |
| 5 | The GNU C Library is free software; you can redistribute it and/or |
| 6 | modify it under the terms of the GNU Lesser General Public |
| 7 | License as published by the Free Software Foundation; either |
| 8 | version 2.1 of the License, or (at your option) any later version. |
| 9 | |
| 10 | The GNU C Library is distributed in the hope that it will be useful, |
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 | Lesser General Public License for more details. |
| 14 | |
| 15 | You should have received a copy of the GNU Lesser General Public |
| 16 | License along with the GNU C Library; if not, see |
| 17 | <http://www.gnu.org/licenses/>. */ |
| 18 | |
| 19 | #include <locale.h> |
| 20 | #include <stddef.h> |
| 21 | #include <stdlib.h> |
| 22 | #include <stdbool.h> |
| 23 | #include <errno.h> |
| 24 | #include <assert.h> |
| 25 | #include <string.h> |
| 26 | #include <fcntl.h> |
| 27 | #include <unistd.h> |
| 28 | #include <stdint.h> |
| 29 | #include <sys/mman.h> |
| 30 | #include <sys/stat.h> |
| 31 | #include <sys/param.h> |
| 32 | |
| 33 | #include "localeinfo.h" |
| 34 | #include "locarchive.h" |
| 35 | #include <not-cancel.h> |
| 36 | |
| 37 | /* Define the hash function. We define the function as static inline. */ |
| 38 | #define compute_hashval static inline compute_hashval |
| 39 | #define hashval_t uint32_t |
| 40 | #include "hashval.h" |
| 41 | #undef compute_hashval |
| 42 | |
| 43 | |
| 44 | /* Name of the locale archive file. */ |
| 45 | static const char archfname[] = LOCALEDIR "/locale-archive"; |
| 46 | |
| 47 | /* Size of initial mapping window, optimal if large enough to |
| 48 | cover the header plus the initial locale. */ |
| 49 | #define ARCHIVE_MAPPING_WINDOW (2 * 1024 * 1024) |
| 50 | |
| 51 | #ifndef MAP_COPY |
| 52 | /* This is not quite as good as MAP_COPY since unexamined pages |
| 53 | can change out from under us and give us inconsistent data. |
| 54 | But we rely on the user not to diddle the system's live archive. |
| 55 | Even though we only ever use PROT_READ, using MAP_SHARED would |
| 56 | not give the system sufficient freedom to e.g. let the on disk |
| 57 | file go away because it doesn't know we won't call mprotect later. */ |
| 58 | # define MAP_COPY MAP_PRIVATE |
| 59 | #endif |
| 60 | #ifndef MAP_FILE |
| 61 | /* Some systems do not have this flag; it is superfluous. */ |
| 62 | # define MAP_FILE 0 |
| 63 | #endif |
| 64 | |
| 65 | /* Record of contiguous pages already mapped from the locale archive. */ |
| 66 | struct archmapped |
| 67 | { |
| 68 | void *ptr; |
| 69 | uint32_t from; |
| 70 | uint32_t len; |
| 71 | struct archmapped *next; |
| 72 | }; |
| 73 | static struct archmapped *archmapped; |
| 74 | |
| 75 | /* This describes the mapping at the beginning of the file that contains |
| 76 | the header data. There could be data in the following partial page, |
| 77 | so this is searched like any other. Once the archive has been used, |
| 78 | ARCHMAPPED points to this; if mapping the archive header failed, |
| 79 | then headmap.ptr is null. */ |
| 80 | static struct archmapped headmap; |
| 81 | static struct stat64 archive_stat; /* stat of archive when header mapped. */ |
| 82 | |
| 83 | /* Record of locales that we have already loaded from the archive. */ |
| 84 | struct locale_in_archive |
| 85 | { |
| 86 | struct locale_in_archive *next; |
| 87 | char *name; |
| 88 | struct __locale_data *data[__LC_LAST]; |
| 89 | }; |
| 90 | static struct locale_in_archive *archloaded; |
| 91 | |
| 92 | |
| 93 | /* Local structure and subroutine of _nl_load_archive, see below. */ |
| 94 | struct range |
| 95 | { |
| 96 | uint32_t from; |
| 97 | uint32_t len; |
| 98 | int category; |
| 99 | void *result; |
| 100 | }; |
| 101 | |
| 102 | static int |
| 103 | rangecmp (const void *p1, const void *p2) |
| 104 | { |
| 105 | return ((struct range *) p1)->from - ((struct range *) p2)->from; |
| 106 | } |
| 107 | |
| 108 | |
| 109 | /* Calculate the amount of space needed for all the tables described |
| 110 | by the given header. Note we do not include the empty table space |
| 111 | that has been preallocated in the file, so our mapping may not be |
| 112 | large enough if localedef adds data to the file in place. However, |
| 113 | doing that would permute the header fields while we are accessing |
| 114 | them and thus not be safe anyway, so we don't allow for that. */ |
| 115 | static inline off_t |
| 116 | calculate_head_size (const struct locarhead *h) |
| 117 | { |
| 118 | off_t namehash_end = (h->namehash_offset |
| 119 | + h->namehash_size * sizeof (struct namehashent)); |
| 120 | off_t string_end = h->string_offset + h->string_used; |
| 121 | off_t locrectab_end = (h->locrectab_offset |
| 122 | + h->locrectab_used * sizeof (struct locrecent)); |
| 123 | return MAX (namehash_end, MAX (string_end, locrectab_end)); |
| 124 | } |
| 125 | |
| 126 | |
| 127 | /* Find the locale *NAMEP in the locale archive, and return the |
| 128 | internalized data structure for its CATEGORY data. If this locale has |
| 129 | already been loaded from the archive, just returns the existing data |
| 130 | structure. If successful, sets *NAMEP to point directly into the mapped |
| 131 | archive string table; that way, the next call can short-circuit strcmp. */ |
| 132 | struct __locale_data * |
| 133 | internal_function |
| 134 | _nl_load_locale_from_archive (int category, const char **namep) |
| 135 | { |
| 136 | const char *name = *namep; |
| 137 | struct |
| 138 | { |
| 139 | void *addr; |
| 140 | size_t len; |
| 141 | } results[__LC_LAST]; |
| 142 | struct locale_in_archive *lia; |
| 143 | struct locarhead *head; |
| 144 | struct namehashent *namehashtab; |
| 145 | struct locrecent *locrec; |
| 146 | struct archmapped *mapped; |
| 147 | struct archmapped *last; |
| 148 | unsigned long int hval; |
| 149 | size_t idx; |
| 150 | size_t incr; |
| 151 | struct range ranges[__LC_LAST - 1]; |
| 152 | int nranges; |
| 153 | int cnt; |
| 154 | size_t ps = __sysconf (_SC_PAGE_SIZE); |
| 155 | int fd = -1; |
| 156 | |
| 157 | /* Check if we have already loaded this locale from the archive. |
| 158 | If we previously loaded the locale but found bogons in the data, |
| 159 | then we will have stored a null pointer to return here. */ |
| 160 | for (lia = archloaded; lia != NULL; lia = lia->next) |
| 161 | if (name == lia->name || !strcmp (name, lia->name)) |
| 162 | { |
| 163 | *namep = lia->name; |
| 164 | return lia->data[category]; |
| 165 | } |
| 166 | |
| 167 | { |
| 168 | /* If the name contains a codeset, then we normalize the name before |
| 169 | doing the lookup. */ |
| 170 | const char *p = strchr (name, '.'); |
| 171 | if (p != NULL && p[1] != '@' && p[1] != '\0') |
| 172 | { |
| 173 | const char *rest = __strchrnul (++p, '@'); |
| 174 | const char *normalized_codeset = _nl_normalize_codeset (p, rest - p); |
| 175 | if (normalized_codeset == NULL) /* malloc failure */ |
| 176 | return NULL; |
| 177 | if (strncmp (normalized_codeset, p, rest - p) != 0 |
| 178 | || normalized_codeset[rest - p] != '\0') |
| 179 | { |
| 180 | /* There is a normalized codeset name that is different from |
| 181 | what was specified; reconstruct a new locale name using it. */ |
| 182 | size_t normlen = strlen (normalized_codeset); |
| 183 | size_t restlen = strlen (rest) + 1; |
| 184 | char *newname = alloca (p - name + normlen + restlen); |
| 185 | memcpy (__mempcpy (__mempcpy (newname, name, p - name), |
| 186 | normalized_codeset, normlen), |
| 187 | rest, restlen); |
| 188 | name = newname; |
| 189 | } |
| 190 | free ((char *) normalized_codeset); |
| 191 | } |
| 192 | } |
| 193 | |
| 194 | /* Make sure the archive is loaded. */ |
| 195 | if (archmapped == NULL) |
| 196 | { |
| 197 | void *result; |
| 198 | size_t headsize, mapsize; |
| 199 | |
| 200 | /* We do this early as a sign that we have tried to open the archive. |
| 201 | If headmap.ptr remains null, that's an indication that we tried |
| 202 | and failed, so we won't try again. */ |
| 203 | archmapped = &headmap; |
| 204 | |
| 205 | /* The archive has never been opened. */ |
| 206 | fd = open_not_cancel_2 (archfname, O_RDONLY|O_LARGEFILE|O_CLOEXEC); |
| 207 | if (fd < 0) |
| 208 | /* Cannot open the archive, for whatever reason. */ |
| 209 | return NULL; |
| 210 | |
| 211 | if (__fxstat64 (_STAT_VER, fd, &archive_stat) == -1) |
| 212 | { |
| 213 | /* stat failed, very strange. */ |
| 214 | close_and_out: |
| 215 | if (fd >= 0) |
| 216 | close_not_cancel_no_status (fd); |
| 217 | return NULL; |
| 218 | } |
| 219 | |
| 220 | |
| 221 | /* Map an initial window probably large enough to cover the header |
| 222 | and the first locale's data. With a large address space, we can |
| 223 | just map the whole file and be sure everything is covered. */ |
| 224 | |
| 225 | mapsize = (sizeof (void *) > 4 ? archive_stat.st_size |
| 226 | : MIN (archive_stat.st_size, ARCHIVE_MAPPING_WINDOW)); |
| 227 | |
| 228 | result = __mmap64 (NULL, mapsize, PROT_READ, MAP_FILE|MAP_COPY, fd, 0); |
| 229 | if (result == MAP_FAILED) |
| 230 | goto close_and_out; |
| 231 | |
| 232 | /* Check whether the file is large enough for the sizes given in |
| 233 | the header. Theoretically an archive could be so large that |
| 234 | just the header fails to fit in our initial mapping window. */ |
| 235 | headsize = calculate_head_size ((const struct locarhead *) result); |
| 236 | if (headsize > mapsize) |
| 237 | { |
| 238 | (void) __munmap (result, mapsize); |
| 239 | if (sizeof (void *) > 4 || headsize > archive_stat.st_size) |
| 240 | /* The file is not big enough for the header. Bogus. */ |
| 241 | goto close_and_out; |
| 242 | |
| 243 | /* Freakishly long header. */ |
| 244 | /* XXX could use mremap when available */ |
| 245 | mapsize = (headsize + ps - 1) & ~(ps - 1); |
| 246 | result = __mmap64 (NULL, mapsize, PROT_READ, MAP_FILE|MAP_COPY, |
| 247 | fd, 0); |
| 248 | if (result == MAP_FAILED) |
| 249 | goto close_and_out; |
| 250 | } |
| 251 | |
| 252 | if (sizeof (void *) > 4 || mapsize >= archive_stat.st_size) |
| 253 | { |
| 254 | /* We've mapped the whole file already, so we can be |
| 255 | sure we won't need this file descriptor later. */ |
| 256 | close_not_cancel_no_status (fd); |
| 257 | fd = -1; |
| 258 | } |
| 259 | |
| 260 | headmap.ptr = result; |
| 261 | /* headmap.from already initialized to zero. */ |
| 262 | headmap.len = mapsize; |
| 263 | } |
| 264 | |
| 265 | /* If there is no archive or it cannot be loaded for some reason fail. */ |
| 266 | if (__glibc_unlikely (headmap.ptr == NULL)) |
| 267 | goto close_and_out; |
| 268 | |
| 269 | /* We have the archive available. To find the name we first have to |
| 270 | determine its hash value. */ |
| 271 | hval = compute_hashval (name, strlen (name)); |
| 272 | |
| 273 | head = headmap.ptr; |
| 274 | namehashtab = (struct namehashent *) ((char *) head |
| 275 | + head->namehash_offset); |
| 276 | |
| 277 | /* Avoid division by 0 if the file is corrupted. */ |
| 278 | if (__glibc_unlikely (head->namehash_size == 0)) |
| 279 | goto close_and_out; |
| 280 | |
| 281 | idx = hval % head->namehash_size; |
| 282 | incr = 1 + hval % (head->namehash_size - 2); |
| 283 | |
| 284 | /* If the name_offset field is zero this means this is a |
| 285 | deleted entry and therefore no entry can be found. */ |
| 286 | while (1) |
| 287 | { |
| 288 | if (namehashtab[idx].name_offset == 0) |
| 289 | /* Not found. */ |
| 290 | goto close_and_out; |
| 291 | |
| 292 | if (namehashtab[idx].hashval == hval |
| 293 | && strcmp (name, headmap.ptr + namehashtab[idx].name_offset) == 0) |
| 294 | /* Found the entry. */ |
| 295 | break; |
| 296 | |
| 297 | idx += incr; |
| 298 | if (idx >= head->namehash_size) |
| 299 | idx -= head->namehash_size; |
| 300 | } |
| 301 | |
| 302 | /* We found an entry. It might be a placeholder for a removed one. */ |
| 303 | if (namehashtab[idx].locrec_offset == 0) |
| 304 | goto close_and_out; |
| 305 | |
| 306 | locrec = (struct locrecent *) (headmap.ptr + namehashtab[idx].locrec_offset); |
| 307 | |
| 308 | if (sizeof (void *) > 4 /* || headmap.len == archive_stat.st_size */) |
| 309 | { |
| 310 | /* We already have the whole locale archive mapped in. */ |
| 311 | assert (headmap.len == archive_stat.st_size); |
| 312 | for (cnt = 0; cnt < __LC_LAST; ++cnt) |
| 313 | if (cnt != LC_ALL) |
| 314 | { |
| 315 | if (locrec->record[cnt].offset + locrec->record[cnt].len |
| 316 | > headmap.len) |
| 317 | /* The archive locrectab contains bogus offsets. */ |
| 318 | goto close_and_out; |
| 319 | results[cnt].addr = headmap.ptr + locrec->record[cnt].offset; |
| 320 | results[cnt].len = locrec->record[cnt].len; |
| 321 | } |
| 322 | } |
| 323 | else |
| 324 | { |
| 325 | /* Get the offsets of the data files and sort them. */ |
| 326 | for (cnt = nranges = 0; cnt < __LC_LAST; ++cnt) |
| 327 | if (cnt != LC_ALL) |
| 328 | { |
| 329 | ranges[nranges].from = locrec->record[cnt].offset; |
| 330 | ranges[nranges].len = locrec->record[cnt].len; |
| 331 | ranges[nranges].category = cnt; |
| 332 | ranges[nranges].result = NULL; |
| 333 | |
| 334 | ++nranges; |
| 335 | } |
| 336 | |
| 337 | qsort (ranges, nranges, sizeof (ranges[0]), rangecmp); |
| 338 | |
| 339 | /* The information about mmap'd blocks is kept in a list. |
| 340 | Skip over the blocks which are before the data we need. */ |
| 341 | last = mapped = archmapped; |
| 342 | for (cnt = 0; cnt < nranges; ++cnt) |
| 343 | { |
| 344 | int upper; |
| 345 | size_t from; |
| 346 | size_t to; |
| 347 | void *addr; |
| 348 | struct archmapped *newp; |
| 349 | |
| 350 | /* Determine whether the appropriate page is already mapped. */ |
| 351 | while (mapped != NULL |
| 352 | && (mapped->from + mapped->len |
| 353 | <= ranges[cnt].from + ranges[cnt].len)) |
| 354 | { |
| 355 | last = mapped; |
| 356 | mapped = mapped->next; |
| 357 | } |
| 358 | |
| 359 | /* Do we have a match? */ |
| 360 | if (mapped != NULL |
| 361 | && mapped->from <= ranges[cnt].from |
| 362 | && (ranges[cnt].from + ranges[cnt].len |
| 363 | <= mapped->from + mapped->len)) |
| 364 | { |
| 365 | /* Yep, already loaded. */ |
| 366 | results[ranges[cnt].category].addr = ((char *) mapped->ptr |
| 367 | + ranges[cnt].from |
| 368 | - mapped->from); |
| 369 | results[ranges[cnt].category].len = ranges[cnt].len; |
| 370 | continue; |
| 371 | } |
| 372 | |
| 373 | /* Map the range with the locale data from the file. We will |
| 374 | try to cover as much of the locale as possible. I.e., if the |
| 375 | next category (next as in "next offset") is on the current or |
| 376 | immediately following page we use it as well. */ |
| 377 | assert (powerof2 (ps)); |
| 378 | from = ranges[cnt].from & ~(ps - 1); |
| 379 | upper = cnt; |
| 380 | do |
| 381 | { |
| 382 | to = ranges[upper].from + ranges[upper].len; |
| 383 | if (to > (size_t) archive_stat.st_size) |
| 384 | /* The archive locrectab contains bogus offsets. */ |
| 385 | goto close_and_out; |
| 386 | to = (to + ps - 1) & ~(ps - 1); |
| 387 | |
| 388 | /* If a range is already mmaped in, stop. */ |
| 389 | if (mapped != NULL && ranges[upper].from >= mapped->from) |
| 390 | break; |
| 391 | |
| 392 | ++upper; |
| 393 | } |
| 394 | /* Loop while still in contiguous pages. */ |
| 395 | while (upper < nranges && ranges[upper].from < to + ps); |
| 396 | |
| 397 | /* Open the file if it hasn't happened yet. */ |
| 398 | if (fd == -1) |
| 399 | { |
| 400 | struct stat64 st; |
| 401 | fd = open_not_cancel_2 (archfname, |
| 402 | O_RDONLY|O_LARGEFILE|O_CLOEXEC); |
| 403 | if (fd == -1) |
| 404 | /* Cannot open the archive, for whatever reason. */ |
| 405 | return NULL; |
| 406 | /* Now verify we think this is really the same archive file |
| 407 | we opened before. If it has been changed we cannot trust |
| 408 | the header we read previously. */ |
| 409 | if (__fxstat64 (_STAT_VER, fd, &st) < 0 |
| 410 | || st.st_size != archive_stat.st_size |
| 411 | || st.st_mtime != archive_stat.st_mtime |
| 412 | || st.st_dev != archive_stat.st_dev |
| 413 | || st.st_ino != archive_stat.st_ino) |
| 414 | goto close_and_out; |
| 415 | } |
| 416 | |
| 417 | /* Map the range from the archive. */ |
| 418 | addr = __mmap64 (NULL, to - from, PROT_READ, MAP_FILE|MAP_COPY, |
| 419 | fd, from); |
| 420 | if (addr == MAP_FAILED) |
| 421 | goto close_and_out; |
| 422 | |
| 423 | /* Allocate a record for this mapping. */ |
| 424 | newp = (struct archmapped *) malloc (sizeof (struct archmapped)); |
| 425 | if (newp == NULL) |
| 426 | { |
| 427 | (void) __munmap (addr, to - from); |
| 428 | goto close_and_out; |
| 429 | } |
| 430 | |
| 431 | /* And queue it. */ |
| 432 | newp->ptr = addr; |
| 433 | newp->from = from; |
| 434 | newp->len = to - from; |
| 435 | assert (last->next == mapped); |
| 436 | newp->next = mapped; |
| 437 | last->next = newp; |
| 438 | last = newp; |
| 439 | |
| 440 | /* Determine the load addresses for the category data. */ |
| 441 | do |
| 442 | { |
| 443 | assert (ranges[cnt].from >= from); |
| 444 | results[ranges[cnt].category].addr = ((char *) addr |
| 445 | + ranges[cnt].from - from); |
| 446 | results[ranges[cnt].category].len = ranges[cnt].len; |
| 447 | } |
| 448 | while (++cnt < upper); |
| 449 | --cnt; /* The 'for' will increase 'cnt' again. */ |
| 450 | } |
| 451 | } |
| 452 | |
| 453 | /* We don't need the file descriptor any longer. */ |
| 454 | if (fd >= 0) |
| 455 | close_not_cancel_no_status (fd); |
| 456 | fd = -1; |
| 457 | |
| 458 | /* We succeeded in mapping all the necessary regions of the archive. |
| 459 | Now we need the expected data structures to point into the data. */ |
| 460 | |
| 461 | lia = malloc (sizeof *lia); |
| 462 | if (__glibc_unlikely (lia == NULL)) |
| 463 | return NULL; |
| 464 | |
| 465 | lia->name = strdup (*namep); |
| 466 | if (__glibc_unlikely (lia->name == NULL)) |
| 467 | { |
| 468 | free (lia); |
| 469 | return NULL; |
| 470 | } |
| 471 | |
| 472 | lia->next = archloaded; |
| 473 | archloaded = lia; |
| 474 | |
| 475 | for (cnt = 0; cnt < __LC_LAST; ++cnt) |
| 476 | if (cnt != LC_ALL) |
| 477 | { |
| 478 | lia->data[cnt] = _nl_intern_locale_data (cnt, |
| 479 | results[cnt].addr, |
| 480 | results[cnt].len); |
| 481 | if (__glibc_likely (lia->data[cnt] != NULL)) |
| 482 | { |
| 483 | /* _nl_intern_locale_data leaves us these fields to initialize. */ |
| 484 | lia->data[cnt]->alloc = ld_archive; |
| 485 | lia->data[cnt]->name = lia->name; |
| 486 | |
| 487 | /* We do this instead of bumping the count each time we return |
| 488 | this data because the mappings stay around forever anyway |
| 489 | and we might as well hold on to a little more memory and not |
| 490 | have to rebuild it on the next lookup of the same thing. |
| 491 | If we were to maintain the usage_count normally and let the |
| 492 | structures be freed, we would have to remove the elements |
| 493 | from archloaded too. */ |
| 494 | lia->data[cnt]->usage_count = UNDELETABLE; |
| 495 | } |
| 496 | } |
| 497 | |
| 498 | *namep = lia->name; |
| 499 | return lia->data[category]; |
| 500 | } |
| 501 | |
| 502 | void __libc_freeres_fn_section |
| 503 | _nl_archive_subfreeres (void) |
| 504 | { |
| 505 | struct locale_in_archive *lia; |
| 506 | struct archmapped *am; |
| 507 | |
| 508 | /* Toss out our cached locales. */ |
| 509 | lia = archloaded; |
| 510 | while (lia != NULL) |
| 511 | { |
| 512 | int category; |
| 513 | struct locale_in_archive *dead = lia; |
| 514 | lia = lia->next; |
| 515 | |
| 516 | free (dead->name); |
| 517 | for (category = 0; category < __LC_LAST; ++category) |
| 518 | if (category != LC_ALL) |
| 519 | { |
| 520 | /* _nl_unload_locale just does this free for the archive case. */ |
| 521 | if (dead->data[category]->private.cleanup) |
| 522 | (*dead->data[category]->private.cleanup) (dead->data[category]); |
| 523 | |
| 524 | free (dead->data[category]); |
| 525 | } |
| 526 | free (dead); |
| 527 | } |
| 528 | archloaded = NULL; |
| 529 | |
| 530 | if (archmapped != NULL) |
| 531 | { |
| 532 | /* Now toss all the mapping windows, which we know nothing is using any |
| 533 | more because we just tossed all the locales that point into them. */ |
| 534 | |
| 535 | assert (archmapped == &headmap); |
| 536 | archmapped = NULL; |
| 537 | (void) __munmap (headmap.ptr, headmap.len); |
| 538 | am = headmap.next; |
| 539 | while (am != NULL) |
| 540 | { |
| 541 | struct archmapped *dead = am; |
| 542 | am = am->next; |
| 543 | (void) __munmap (dead->ptr, dead->len); |
| 544 | free (dead); |
| 545 | } |
| 546 | } |
| 547 | } |