| From: Debian PHP Maintainers <team+pkg-php@tracker.debian.org> |
| Date: Thu, 7 Mar 2019 19:42:35 +0000 |
| Subject: Add-support-for-use-of-the-system-timezone-database |
| |
| # License: MIT |
| # http://opensource.org/licenses/MIT |
| |
| # License: MIT |
| # http://opensource.org/licenses/MIT |
| |
| Add support for use of the system timezone database, rather |
| than embedding a copy. Discussed upstream but was not desired. |
| |
| History: |
| r18: adapt for autotool change in 7.3.3RC1 |
| r17: adapt for timelib 2018.01 (in 7.3.2RC1) |
| r16: adapt for timelib 2017.06 (in 7.2.3RC1) |
| r15: adapt for timelib 2017.05beta7 (in 7.2.0RC1) |
| r14: improve check for valid tz file |
| r13: adapt for upstream changes to use PHP allocator |
| r12: adapt for upstream changes for new zic |
| r11: use canonical names to avoid more case sensitivity issues |
| round lat/long from zone.tab towards zero per builtin db |
| r10: make timezone case insensitive |
| r9: fix another compile error without --with-system-tzdata configured (Michael Heimpold) |
| r8: fix compile error without --with-system-tzdata configured |
| r7: improve check for valid timezone id to exclude directories |
| r6: fix fd leak in r5, fix country code/BC flag use in |
| timezone_identifiers_list() using system db, |
| fix use of PECL timezonedb to override system db, |
| r5: reverts addition of "System/Localtime" fake tzname. |
| updated for 5.3.0, parses zone.tab to pick up mapping between |
| timezone name, country code and long/lat coords |
| r4: added "System/Localtime" tzname which uses /etc/localtime |
| r3: fix a crash if /usr/share/zoneinfo doesn't exist (Raphael Geissert) |
| r2: add filesystem trawl to set up name alias index |
| r1: initial revision |
| --- |
| ext/date/config0.m4 | 13 ++ |
| ext/date/lib/parse_tz.c | 535 +++++++++++++++++++++++++++++++++++++++++++++++- |
| 2 files changed, 545 insertions(+), 3 deletions(-) |
| |
| --- a/ext/date/config0.m4 |
| +++ b/ext/date/config0.m4 |
| @@ -4,6 +4,19 @@ AC_CHECK_HEADERS([io.h]) |
| dnl Check for strtoll, atoll |
| AC_CHECK_FUNCS(strtoll atoll) |
| |
| +PHP_ARG_WITH(system-tzdata, for use of system timezone data, |
| +[ --with-system-tzdata[=DIR] to specify use of system timezone data], |
| +no, no) |
| + |
| +if test "$PHP_SYSTEM_TZDATA" != "no"; then |
| + AC_DEFINE(HAVE_SYSTEM_TZDATA, 1, [Define if system timezone data is used]) |
| + |
| + if test "$PHP_SYSTEM_TZDATA" != "yes"; then |
| + AC_DEFINE_UNQUOTED(HAVE_SYSTEM_TZDATA_PREFIX, "$PHP_SYSTEM_TZDATA", |
| + [Define for location of system timezone data]) |
| + fi |
| +fi |
| + |
| PHP_DATE_CFLAGS="-I@ext_builddir@/lib -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1 -DHAVE_TIMELIB_CONFIG_H=1" |
| timelib_sources="lib/astro.c lib/dow.c lib/parse_date.c lib/parse_tz.c |
| lib/timelib.c lib/tm2unixtime.c lib/unixtime2tm.c lib/parse_iso_intervals.c lib/interval.c" |
| --- a/ext/date/lib/parse_tz.c |
| +++ b/ext/date/lib/parse_tz.c |
| @@ -26,8 +26,21 @@ |
| #include "timelib.h" |
| #include "timelib_private.h" |
| |
| +#ifdef HAVE_SYSTEM_TZDATA |
| +#include <sys/mman.h> |
| +#include <sys/stat.h> |
| +#include <limits.h> |
| +#include <fcntl.h> |
| +#include <unistd.h> |
| + |
| +#include "php_scandir.h" |
| + |
| +#else |
| #define TIMELIB_SUPPORTS_V2DATA |
| #include "timezonedb.h" |
| +#endif |
| + |
| +#include <ctype.h> |
| |
| #if (defined(__APPLE__) || defined(__APPLE_CC__)) && (defined(__BIG_ENDIAN__) || defined(__LITTLE_ENDIAN__)) |
| # if defined(__LITTLE_ENDIAN__) |
| @@ -88,6 +101,11 @@ static int read_php_preamble(const unsig |
| { |
| uint32_t version; |
| |
| + if (memcmp(*tzf, "TZif", 4) == 0) { |
| + *tzf += 20; |
| + return 0; |
| + } |
| + |
| /* read ID */ |
| version = (*tzf)[3] - '0'; |
| *tzf += 4; |
| @@ -412,7 +430,429 @@ void timelib_dump_tzinfo(timelib_tzinfo |
| } |
| } |
| |
| -static int seek_to_tz_position(const unsigned char **tzf, char *timezone, const timelib_tzdb *tzdb) |
| +#ifdef HAVE_SYSTEM_TZDATA |
| + |
| +#ifdef HAVE_SYSTEM_TZDATA_PREFIX |
| +#define ZONEINFO_PREFIX HAVE_SYSTEM_TZDATA_PREFIX |
| +#else |
| +#define ZONEINFO_PREFIX "/usr/share/zoneinfo" |
| +#endif |
| + |
| +/* System timezone database pointer. */ |
| +static const timelib_tzdb *timezonedb_system; |
| + |
| +/* Hash table entry for the cache of the zone.tab mapping table. */ |
| +struct location_info { |
| + char code[2]; |
| + double latitude, longitude; |
| + char name[64]; |
| + char *comment; |
| + struct location_info *next; |
| +}; |
| + |
| +/* Cache of zone.tab. */ |
| +static struct location_info **system_location_table; |
| + |
| +/* Size of the zone.tab hash table; a random-ish prime big enough to |
| + * prevent too many collisions. */ |
| +#define LOCINFO_HASH_SIZE (1021) |
| + |
| +/* Compute a case insensitive hash of str */ |
| +static uint32_t tz_hash(const char *str) |
| +{ |
| + const unsigned char *p = (const unsigned char *)str; |
| + uint32_t hash = 5381; |
| + int c; |
| + |
| + while ((c = tolower(*p++)) != '\0') { |
| + hash = (hash << 5) ^ hash ^ c; |
| + } |
| + |
| + return hash % LOCINFO_HASH_SIZE; |
| +} |
| + |
| +/* Parse an ISO-6709 date as used in zone.tab. Returns end of the |
| + * parsed string on success, or NULL on parse error. On success, |
| + * writes the parsed number to *result. */ |
| +static char *parse_iso6709(char *p, double *result) |
| +{ |
| + double v, sign; |
| + char *pend; |
| + size_t len; |
| + |
| + if (*p == '+') |
| + sign = 1.0; |
| + else if (*p == '-') |
| + sign = -1.0; |
| + else |
| + return NULL; |
| + |
| + p++; |
| + for (pend = p; *pend >= '0' && *pend <= '9'; pend++) |
| + ;; |
| + |
| + /* Annoying encoding used by zone.tab has no decimal point, so use |
| + * the length to determine the format: |
| + * |
| + * 4 = DDMM |
| + * 5 = DDDMM |
| + * 6 = DDMMSS |
| + * 7 = DDDMMSS |
| + */ |
| + len = pend - p; |
| + if (len < 4 || len > 7) { |
| + return NULL; |
| + } |
| + |
| + /* p => [D]DD */ |
| + v = (p[0] - '0') * 10.0 + (p[1] - '0'); |
| + p += 2; |
| + if (len == 5 || len == 7) |
| + v = v * 10.0 + (*p++ - '0'); |
| + /* p => MM[SS] */ |
| + v += (10.0 * (p[0] - '0') |
| + + p[1] - '0') / 60.0; |
| + p += 2; |
| + /* p => [SS] */ |
| + if (len > 5) { |
| + v += (10.0 * (p[0] - '0') |
| + + p[1] - '0') / 3600.0; |
| + p += 2; |
| + } |
| + |
| + /* Round to five decimal place, not because it's a good idea, |
| + * but, because the builtin data uses rounded data, so, match |
| + * that. */ |
| + *result = trunc(v * sign * 100000.0) / 100000.0; |
| + |
| + return p; |
| +} |
| + |
| +/* This function parses the zone.tab file to build up the mapping of |
| + * timezone to country code and geographic location, and returns a |
| + * hash table. The hash table is indexed by the function: |
| + * |
| + * tz_hash(timezone-name) |
| + */ |
| +static struct location_info **create_location_table(void) |
| +{ |
| + struct location_info **li, *i; |
| + char zone_tab[PATH_MAX]; |
| + char line[512]; |
| + FILE *fp; |
| + |
| + strncpy(zone_tab, ZONEINFO_PREFIX "/zone.tab", sizeof zone_tab); |
| + |
| + fp = fopen(zone_tab, "r"); |
| + if (!fp) { |
| + return NULL; |
| + } |
| + |
| + li = calloc(LOCINFO_HASH_SIZE, sizeof *li); |
| + |
| + while (fgets(line, sizeof line, fp)) { |
| + char *p = line, *code, *name, *comment; |
| + uint32_t hash; |
| + double latitude, longitude; |
| + |
| + while (isspace(*p)) |
| + p++; |
| + |
| + if (*p == '#' || *p == '\0' || *p == '\n') |
| + continue; |
| + |
| + if (!isalpha(p[0]) || !isalpha(p[1]) || p[2] != '\t') |
| + continue; |
| + |
| + /* code => AA */ |
| + code = p; |
| + p[2] = 0; |
| + p += 3; |
| + |
| + /* coords => [+-][D]DDMM[SS][+-][D]DDMM[SS] */ |
| + p = parse_iso6709(p, &latitude); |
| + if (!p) { |
| + continue; |
| + } |
| + p = parse_iso6709(p, &longitude); |
| + if (!p) { |
| + continue; |
| + } |
| + |
| + if (!p || *p != '\t') { |
| + continue; |
| + } |
| + |
| + /* name = string */ |
| + name = ++p; |
| + while (*p != '\t' && *p && *p != '\n') |
| + p++; |
| + |
| + *p++ = '\0'; |
| + |
| + /* comment = string */ |
| + comment = p; |
| + while (*p != '\t' && *p && *p != '\n') |
| + p++; |
| + |
| + if (*p == '\n' || *p == '\t') |
| + *p = '\0'; |
| + |
| + hash = tz_hash(name); |
| + i = malloc(sizeof *i); |
| + memcpy(i->code, code, 2); |
| + strncpy(i->name, name, sizeof i->name); |
| + i->comment = strdup(comment); |
| + i->longitude = longitude; |
| + i->latitude = latitude; |
| + i->next = li[hash]; |
| + li[hash] = i; |
| + /* printf("%s [%u, %f, %f]\n", name, hash, latitude, longitude); */ |
| + } |
| + |
| + fclose(fp); |
| + |
| + return li; |
| +} |
| + |
| +/* Return location info from hash table, using given timezone name. |
| + * Returns NULL if the name could not be found. */ |
| +const struct location_info *find_zone_info(struct location_info **li, |
| + const char *name) |
| +{ |
| + uint32_t hash = tz_hash(name); |
| + const struct location_info *l; |
| + |
| + if (!li) { |
| + return NULL; |
| + } |
| + |
| + for (l = li[hash]; l; l = l->next) { |
| + if (timelib_strcasecmp(l->name, name) == 0) |
| + return l; |
| + } |
| + |
| + return NULL; |
| +} |
| + |
| +/* Filter out some non-tzdata files and the posix/right databases, if |
| + * present. */ |
| +static int index_filter(const struct dirent *ent) |
| +{ |
| + return strcmp(ent->d_name, ".") != 0 |
| + && strcmp(ent->d_name, "..") != 0 |
| + && strcmp(ent->d_name, "posix") != 0 |
| + && strcmp(ent->d_name, "posixrules") != 0 |
| + && strcmp(ent->d_name, "right") != 0 |
| + && strstr(ent->d_name, ".list") == NULL |
| + && strstr(ent->d_name, ".tab") == NULL; |
| +} |
| + |
| +static int sysdbcmp(const void *first, const void *second) |
| +{ |
| + const timelib_tzdb_index_entry *alpha = first, *beta = second; |
| + |
| + return timelib_strcasecmp(alpha->id, beta->id); |
| +} |
| + |
| + |
| +/* Create the zone identifier index by trawling the filesystem. */ |
| +static void create_zone_index(timelib_tzdb *db) |
| +{ |
| + size_t dirstack_size, dirstack_top; |
| + size_t index_size, index_next; |
| + timelib_tzdb_index_entry *db_index; |
| + char **dirstack; |
| + |
| + /* LIFO stack to hold directory entries to scan; each slot is a |
| + * directory name relative to the zoneinfo prefix. */ |
| + dirstack_size = 32; |
| + dirstack = malloc(dirstack_size * sizeof *dirstack); |
| + dirstack_top = 1; |
| + dirstack[0] = strdup(""); |
| + |
| + /* Index array. */ |
| + index_size = 64; |
| + db_index = malloc(index_size * sizeof *db_index); |
| + index_next = 0; |
| + |
| + do { |
| + struct dirent **ents; |
| + char name[PATH_MAX], *top; |
| + int count; |
| + |
| + /* Pop the top stack entry, and iterate through its contents. */ |
| + top = dirstack[--dirstack_top]; |
| + snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s", top); |
| + |
| + count = php_scandir(name, &ents, index_filter, php_alphasort); |
| + |
| + while (count > 0) { |
| + struct stat st; |
| + const char *leaf = ents[count - 1]->d_name; |
| + |
| + snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s/%s", |
| + top, leaf); |
| + |
| + if (strlen(name) && stat(name, &st) == 0) { |
| + /* Name, relative to the zoneinfo prefix. */ |
| + const char *root = top; |
| + |
| + if (root[0] == '/') root++; |
| + |
| + snprintf(name, sizeof name, "%s%s%s", root, |
| + *root ? "/": "", leaf); |
| + |
| + if (S_ISDIR(st.st_mode)) { |
| + if (dirstack_top == dirstack_size) { |
| + dirstack_size *= 2; |
| + dirstack = realloc(dirstack, |
| + dirstack_size * sizeof *dirstack); |
| + } |
| + dirstack[dirstack_top++] = strdup(name); |
| + } |
| + else { |
| + if (index_next == index_size) { |
| + index_size *= 2; |
| + db_index = realloc(db_index, |
| + index_size * sizeof *db_index); |
| + } |
| + |
| + db_index[index_next++].id = strdup(name); |
| + } |
| + } |
| + |
| + free(ents[--count]); |
| + } |
| + |
| + if (count != -1) free(ents); |
| + free(top); |
| + } while (dirstack_top); |
| + |
| + qsort(db_index, index_next, sizeof *db_index, sysdbcmp); |
| + |
| + db->index = db_index; |
| + db->index_size = index_next; |
| + |
| + free(dirstack); |
| +} |
| + |
| +#define FAKE_HEADER "1234\0??\1??" |
| +#define FAKE_UTC_POS (7 - 4) |
| + |
| +/* Create a fake data segment for database 'sysdb'. */ |
| +static void fake_data_segment(timelib_tzdb *sysdb, |
| + struct location_info **info) |
| +{ |
| + size_t n; |
| + char *data, *p; |
| + |
| + data = malloc(3 * sysdb->index_size + 7); |
| + |
| + p = mempcpy(data, FAKE_HEADER, sizeof(FAKE_HEADER) - 1); |
| + |
| + for (n = 0; n < sysdb->index_size; n++) { |
| + const struct location_info *li; |
| + timelib_tzdb_index_entry *ent; |
| + |
| + ent = (timelib_tzdb_index_entry *)&sysdb->index[n]; |
| + |
| + /* Lookup the timezone name in the hash table. */ |
| + if (strcmp(ent->id, "UTC") == 0) { |
| + ent->pos = FAKE_UTC_POS; |
| + continue; |
| + } |
| + |
| + li = find_zone_info(info, ent->id); |
| + if (li) { |
| + /* If found, append the BC byte and the |
| + * country code; set the position for this |
| + * section of timezone data. */ |
| + ent->pos = (p - data) - 4; |
| + *p++ = '\1'; |
| + *p++ = li->code[0]; |
| + *p++ = li->code[1]; |
| + } |
| + else { |
| + /* If not found, the timezone data can |
| + * point at the header. */ |
| + ent->pos = 0; |
| + } |
| + } |
| + |
| + sysdb->data = (unsigned char *)data; |
| +} |
| + |
| +/* Returns true if the passed-in stat structure describes a |
| + * probably-valid timezone file. */ |
| +static int is_valid_tzfile(const struct stat *st, int fd) |
| +{ |
| + if (fd) { |
| + char buf[20]; |
| + if (read(fd, buf, 20)!=20) { |
| + return 0; |
| + } |
| + lseek(fd, SEEK_SET, 0); |
| + if (memcmp(buf, "TZif", 4)) { |
| + return 0; |
| + } |
| + } |
| + return S_ISREG(st->st_mode) && st->st_size > 20; |
| +} |
| + |
| +/* To allow timezone names to be used case-insensitively, find the |
| + * canonical name for this timezone, if possible. */ |
| +static const char *canonical_tzname(const char *timezone) |
| +{ |
| + if (timezonedb_system) { |
| + timelib_tzdb_index_entry *ent, lookup; |
| + |
| + lookup.id = (char *)timezone; |
| + |
| + ent = bsearch(&lookup, timezonedb_system->index, |
| + timezonedb_system->index_size, sizeof lookup, |
| + sysdbcmp); |
| + if (ent) { |
| + return ent->id; |
| + } |
| + } |
| + |
| + return timezone; |
| +} |
| + |
| +/* Return the mmap()ed tzfile if found, else NULL. On success, the |
| + * length of the mapped data is placed in *length. */ |
| +static char *map_tzfile(const char *timezone, size_t *length) |
| +{ |
| + char fname[PATH_MAX]; |
| + struct stat st; |
| + char *p; |
| + int fd; |
| + |
| + if (timezone[0] == '\0' || strstr(timezone, "..") != NULL) { |
| + return NULL; |
| + } |
| + |
| + snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", canonical_tzname(timezone)); |
| + |
| + fd = open(fname, O_RDONLY); |
| + if (fd == -1) { |
| + return NULL; |
| + } else if (fstat(fd, &st) != 0 || !is_valid_tzfile(&st, fd)) { |
| + close(fd); |
| + return NULL; |
| + } |
| + |
| + *length = st.st_size; |
| + p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); |
| + close(fd); |
| + |
| + return p != MAP_FAILED ? p : NULL; |
| +} |
| + |
| +#endif |
| + |
| +static int inmem_seek_to_tz_position(const unsigned char **tzf, char *timezone, const timelib_tzdb *tzdb) |
| { |
| int left = 0, right = tzdb->index_size - 1; |
| |
| @@ -438,9 +878,48 @@ static int seek_to_tz_position(const uns |
| return 0; |
| } |
| |
| +static int seek_to_tz_position(const unsigned char **tzf, char *timezone, |
| + char **map, size_t *maplen, |
| + const timelib_tzdb *tzdb) |
| +{ |
| +#ifdef HAVE_SYSTEM_TZDATA |
| + if (tzdb == timezonedb_system) { |
| + char *orig; |
| + |
| + orig = map_tzfile(timezone, maplen); |
| + if (orig == NULL) { |
| + return 0; |
| + } |
| + |
| + (*tzf) = (unsigned char *)orig; |
| + *map = orig; |
| + return 1; |
| + } |
| + else |
| +#endif |
| + { |
| + return inmem_seek_to_tz_position(tzf, timezone, tzdb); |
| + } |
| +} |
| + |
| const timelib_tzdb *timelib_builtin_db(void) |
| { |
| +#ifdef HAVE_SYSTEM_TZDATA |
| + if (timezonedb_system == NULL) { |
| + timelib_tzdb *tmp = malloc(sizeof *tmp); |
| + |
| + tmp->version = "0.system"; |
| + tmp->data = NULL; |
| + create_zone_index(tmp); |
| + system_location_table = create_location_table(); |
| + fake_data_segment(tmp, system_location_table); |
| + timezonedb_system = tmp; |
| + } |
| + |
| + return timezonedb_system; |
| +#else |
| return &timezonedb_builtin; |
| +#endif |
| } |
| |
| const timelib_tzdb_index_entry *timelib_timezone_identifiers_list(const timelib_tzdb *tzdb, int *count) |
| @@ -452,7 +931,30 @@ const timelib_tzdb_index_entry *timelib_ |
| int timelib_timezone_id_is_valid(char *timezone, const timelib_tzdb *tzdb) |
| { |
| const unsigned char *tzf; |
| - return (seek_to_tz_position(&tzf, timezone, tzdb)); |
| + |
| +#ifdef HAVE_SYSTEM_TZDATA |
| + if (tzdb == timezonedb_system) { |
| + char fname[PATH_MAX]; |
| + struct stat st; |
| + |
| + if (timezone[0] == '\0' || strstr(timezone, "..") != NULL) { |
| + return 0; |
| + } |
| + |
| + if (system_location_table) { |
| + if (find_zone_info(system_location_table, timezone) != NULL) { |
| + /* found in cache */ |
| + return 1; |
| + } |
| + } |
| + |
| + snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", canonical_tzname(timezone)); |
| + |
| + return stat(fname, &st) == 0 && is_valid_tzfile(&st, 0); |
| + } |
| +#endif |
| + |
| + return (inmem_seek_to_tz_position(&tzf, timezone, tzdb)); |
| } |
| |
| static int skip_64bit_preamble(const unsigned char **tzf, timelib_tzinfo *tz) |
| @@ -494,12 +996,14 @@ static timelib_tzinfo* timelib_tzinfo_ct |
| timelib_tzinfo *timelib_parse_tzfile(char *timezone, const timelib_tzdb *tzdb, int *error_code) |
| { |
| const unsigned char *tzf; |
| + char *memmap = NULL; |
| + size_t maplen; |
| timelib_tzinfo *tmp; |
| int version; |
| int transitions_result, types_result; |
| unsigned int type; /* TIMELIB_TZINFO_PHP or TIMELIB_TZINFO_ZONEINFO */ |
| |
| - if (seek_to_tz_position(&tzf, timezone, tzdb)) { |
| + if (seek_to_tz_position(&tzf, timezone, &memmap, &maplen, tzdb)) { |
| tmp = timelib_tzinfo_ctor(timezone); |
| |
| version = read_preamble(&tzf, tmp, &type); |
| @@ -534,11 +1038,36 @@ timelib_tzinfo *timelib_parse_tzfile(cha |
| } |
| skip_posix_string(&tzf, tmp); |
| |
| +#ifdef HAVE_SYSTEM_TZDATA |
| + if (memmap) { |
| + const struct location_info *li; |
| + |
| + /* TZif-style - grok the location info from the system database, |
| + * if possible. */ |
| + |
| + if ((li = find_zone_info(system_location_table, timezone)) != NULL) { |
| + tmp->location.comments = timelib_strdup(li->comment); |
| + strncpy(tmp->location.country_code, li->code, 2); |
| + tmp->location.longitude = li->longitude; |
| + tmp->location.latitude = li->latitude; |
| + tmp->bc = 1; |
| + } |
| + else { |
| + set_default_location_and_comments(&tzf, tmp); |
| + } |
| + |
| + /* Now done with the mmap segment - discard it. */ |
| + munmap(memmap, maplen); |
| + } else { |
| +#endif |
| if (type == TIMELIB_TZINFO_PHP) { |
| read_location(&tzf, tmp); |
| } else { |
| set_default_location_and_comments(&tzf, tmp); |
| } |
| +#ifdef HAVE_SYSTEM_TZDATA |
| + } |
| +#endif |
| } else { |
| *error_code = TIMELIB_ERROR_NO_SUCH_TIMEZONE; |
| tmp = NULL; |