blob: f9ef5e4dbf68d5bfd29ad891058ec7cab7083556 [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001From: Debian PHP Maintainers <team+pkg-php@tracker.debian.org>
2Date: Thu, 7 Mar 2019 19:42:35 +0000
3Subject: Add-support-for-use-of-the-system-timezone-database
4
5# License: MIT
6# http://opensource.org/licenses/MIT
7
8# License: MIT
9# http://opensource.org/licenses/MIT
10
11Add support for use of the system timezone database, rather
12than embedding a copy. Discussed upstream but was not desired.
13
14History:
15r18: adapt for autotool change in 7.3.3RC1
16r17: adapt for timelib 2018.01 (in 7.3.2RC1)
17r16: adapt for timelib 2017.06 (in 7.2.3RC1)
18r15: adapt for timelib 2017.05beta7 (in 7.2.0RC1)
19r14: improve check for valid tz file
20r13: adapt for upstream changes to use PHP allocator
21r12: adapt for upstream changes for new zic
22r11: use canonical names to avoid more case sensitivity issues
23 round lat/long from zone.tab towards zero per builtin db
24r10: make timezone case insensitive
25r9: fix another compile error without --with-system-tzdata configured (Michael Heimpold)
26r8: fix compile error without --with-system-tzdata configured
27r7: improve check for valid timezone id to exclude directories
28r6: fix fd leak in r5, fix country code/BC flag use in
29 timezone_identifiers_list() using system db,
30 fix use of PECL timezonedb to override system db,
31r5: reverts addition of "System/Localtime" fake tzname.
32 updated for 5.3.0, parses zone.tab to pick up mapping between
33 timezone name, country code and long/lat coords
34r4: added "System/Localtime" tzname which uses /etc/localtime
35r3: fix a crash if /usr/share/zoneinfo doesn't exist (Raphael Geissert)
36r2: add filesystem trawl to set up name alias index
37r1: initial revision
38---
39 ext/date/config0.m4 | 13 ++
40 ext/date/lib/parse_tz.c | 535 +++++++++++++++++++++++++++++++++++++++++++++++-
41 2 files changed, 545 insertions(+), 3 deletions(-)
42
43--- a/ext/date/config0.m4
44+++ b/ext/date/config0.m4
45@@ -4,6 +4,19 @@ AC_CHECK_HEADERS([io.h])
46 dnl Check for strtoll, atoll
47 AC_CHECK_FUNCS(strtoll atoll)
48
49+PHP_ARG_WITH(system-tzdata, for use of system timezone data,
50+[ --with-system-tzdata[=DIR] to specify use of system timezone data],
51+no, no)
52+
53+if test "$PHP_SYSTEM_TZDATA" != "no"; then
54+ AC_DEFINE(HAVE_SYSTEM_TZDATA, 1, [Define if system timezone data is used])
55+
56+ if test "$PHP_SYSTEM_TZDATA" != "yes"; then
57+ AC_DEFINE_UNQUOTED(HAVE_SYSTEM_TZDATA_PREFIX, "$PHP_SYSTEM_TZDATA",
58+ [Define for location of system timezone data])
59+ fi
60+fi
61+
62 PHP_DATE_CFLAGS="-I@ext_builddir@/lib -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1 -DHAVE_TIMELIB_CONFIG_H=1"
63 timelib_sources="lib/astro.c lib/dow.c lib/parse_date.c lib/parse_tz.c
64 lib/timelib.c lib/tm2unixtime.c lib/unixtime2tm.c lib/parse_iso_intervals.c lib/interval.c"
65--- a/ext/date/lib/parse_tz.c
66+++ b/ext/date/lib/parse_tz.c
67@@ -26,8 +26,21 @@
68 #include "timelib.h"
69 #include "timelib_private.h"
70
71+#ifdef HAVE_SYSTEM_TZDATA
72+#include <sys/mman.h>
73+#include <sys/stat.h>
74+#include <limits.h>
75+#include <fcntl.h>
76+#include <unistd.h>
77+
78+#include "php_scandir.h"
79+
80+#else
81 #define TIMELIB_SUPPORTS_V2DATA
82 #include "timezonedb.h"
83+#endif
84+
85+#include <ctype.h>
86
87 #if (defined(__APPLE__) || defined(__APPLE_CC__)) && (defined(__BIG_ENDIAN__) || defined(__LITTLE_ENDIAN__))
88 # if defined(__LITTLE_ENDIAN__)
89@@ -88,6 +101,11 @@ static int read_php_preamble(const unsig
90 {
91 uint32_t version;
92
93+ if (memcmp(*tzf, "TZif", 4) == 0) {
94+ *tzf += 20;
95+ return 0;
96+ }
97+
98 /* read ID */
99 version = (*tzf)[3] - '0';
100 *tzf += 4;
101@@ -412,7 +430,429 @@ void timelib_dump_tzinfo(timelib_tzinfo
102 }
103 }
104
105-static int seek_to_tz_position(const unsigned char **tzf, char *timezone, const timelib_tzdb *tzdb)
106+#ifdef HAVE_SYSTEM_TZDATA
107+
108+#ifdef HAVE_SYSTEM_TZDATA_PREFIX
109+#define ZONEINFO_PREFIX HAVE_SYSTEM_TZDATA_PREFIX
110+#else
111+#define ZONEINFO_PREFIX "/usr/share/zoneinfo"
112+#endif
113+
114+/* System timezone database pointer. */
115+static const timelib_tzdb *timezonedb_system;
116+
117+/* Hash table entry for the cache of the zone.tab mapping table. */
118+struct location_info {
119+ char code[2];
120+ double latitude, longitude;
121+ char name[64];
122+ char *comment;
123+ struct location_info *next;
124+};
125+
126+/* Cache of zone.tab. */
127+static struct location_info **system_location_table;
128+
129+/* Size of the zone.tab hash table; a random-ish prime big enough to
130+ * prevent too many collisions. */
131+#define LOCINFO_HASH_SIZE (1021)
132+
133+/* Compute a case insensitive hash of str */
134+static uint32_t tz_hash(const char *str)
135+{
136+ const unsigned char *p = (const unsigned char *)str;
137+ uint32_t hash = 5381;
138+ int c;
139+
140+ while ((c = tolower(*p++)) != '\0') {
141+ hash = (hash << 5) ^ hash ^ c;
142+ }
143+
144+ return hash % LOCINFO_HASH_SIZE;
145+}
146+
147+/* Parse an ISO-6709 date as used in zone.tab. Returns end of the
148+ * parsed string on success, or NULL on parse error. On success,
149+ * writes the parsed number to *result. */
150+static char *parse_iso6709(char *p, double *result)
151+{
152+ double v, sign;
153+ char *pend;
154+ size_t len;
155+
156+ if (*p == '+')
157+ sign = 1.0;
158+ else if (*p == '-')
159+ sign = -1.0;
160+ else
161+ return NULL;
162+
163+ p++;
164+ for (pend = p; *pend >= '0' && *pend <= '9'; pend++)
165+ ;;
166+
167+ /* Annoying encoding used by zone.tab has no decimal point, so use
168+ * the length to determine the format:
169+ *
170+ * 4 = DDMM
171+ * 5 = DDDMM
172+ * 6 = DDMMSS
173+ * 7 = DDDMMSS
174+ */
175+ len = pend - p;
176+ if (len < 4 || len > 7) {
177+ return NULL;
178+ }
179+
180+ /* p => [D]DD */
181+ v = (p[0] - '0') * 10.0 + (p[1] - '0');
182+ p += 2;
183+ if (len == 5 || len == 7)
184+ v = v * 10.0 + (*p++ - '0');
185+ /* p => MM[SS] */
186+ v += (10.0 * (p[0] - '0')
187+ + p[1] - '0') / 60.0;
188+ p += 2;
189+ /* p => [SS] */
190+ if (len > 5) {
191+ v += (10.0 * (p[0] - '0')
192+ + p[1] - '0') / 3600.0;
193+ p += 2;
194+ }
195+
196+ /* Round to five decimal place, not because it's a good idea,
197+ * but, because the builtin data uses rounded data, so, match
198+ * that. */
199+ *result = trunc(v * sign * 100000.0) / 100000.0;
200+
201+ return p;
202+}
203+
204+/* This function parses the zone.tab file to build up the mapping of
205+ * timezone to country code and geographic location, and returns a
206+ * hash table. The hash table is indexed by the function:
207+ *
208+ * tz_hash(timezone-name)
209+ */
210+static struct location_info **create_location_table(void)
211+{
212+ struct location_info **li, *i;
213+ char zone_tab[PATH_MAX];
214+ char line[512];
215+ FILE *fp;
216+
217+ strncpy(zone_tab, ZONEINFO_PREFIX "/zone.tab", sizeof zone_tab);
218+
219+ fp = fopen(zone_tab, "r");
220+ if (!fp) {
221+ return NULL;
222+ }
223+
224+ li = calloc(LOCINFO_HASH_SIZE, sizeof *li);
225+
226+ while (fgets(line, sizeof line, fp)) {
227+ char *p = line, *code, *name, *comment;
228+ uint32_t hash;
229+ double latitude, longitude;
230+
231+ while (isspace(*p))
232+ p++;
233+
234+ if (*p == '#' || *p == '\0' || *p == '\n')
235+ continue;
236+
237+ if (!isalpha(p[0]) || !isalpha(p[1]) || p[2] != '\t')
238+ continue;
239+
240+ /* code => AA */
241+ code = p;
242+ p[2] = 0;
243+ p += 3;
244+
245+ /* coords => [+-][D]DDMM[SS][+-][D]DDMM[SS] */
246+ p = parse_iso6709(p, &latitude);
247+ if (!p) {
248+ continue;
249+ }
250+ p = parse_iso6709(p, &longitude);
251+ if (!p) {
252+ continue;
253+ }
254+
255+ if (!p || *p != '\t') {
256+ continue;
257+ }
258+
259+ /* name = string */
260+ name = ++p;
261+ while (*p != '\t' && *p && *p != '\n')
262+ p++;
263+
264+ *p++ = '\0';
265+
266+ /* comment = string */
267+ comment = p;
268+ while (*p != '\t' && *p && *p != '\n')
269+ p++;
270+
271+ if (*p == '\n' || *p == '\t')
272+ *p = '\0';
273+
274+ hash = tz_hash(name);
275+ i = malloc(sizeof *i);
276+ memcpy(i->code, code, 2);
277+ strncpy(i->name, name, sizeof i->name);
278+ i->comment = strdup(comment);
279+ i->longitude = longitude;
280+ i->latitude = latitude;
281+ i->next = li[hash];
282+ li[hash] = i;
283+ /* printf("%s [%u, %f, %f]\n", name, hash, latitude, longitude); */
284+ }
285+
286+ fclose(fp);
287+
288+ return li;
289+}
290+
291+/* Return location info from hash table, using given timezone name.
292+ * Returns NULL if the name could not be found. */
293+const struct location_info *find_zone_info(struct location_info **li,
294+ const char *name)
295+{
296+ uint32_t hash = tz_hash(name);
297+ const struct location_info *l;
298+
299+ if (!li) {
300+ return NULL;
301+ }
302+
303+ for (l = li[hash]; l; l = l->next) {
304+ if (timelib_strcasecmp(l->name, name) == 0)
305+ return l;
306+ }
307+
308+ return NULL;
309+}
310+
311+/* Filter out some non-tzdata files and the posix/right databases, if
312+ * present. */
313+static int index_filter(const struct dirent *ent)
314+{
315+ return strcmp(ent->d_name, ".") != 0
316+ && strcmp(ent->d_name, "..") != 0
317+ && strcmp(ent->d_name, "posix") != 0
318+ && strcmp(ent->d_name, "posixrules") != 0
319+ && strcmp(ent->d_name, "right") != 0
320+ && strstr(ent->d_name, ".list") == NULL
321+ && strstr(ent->d_name, ".tab") == NULL;
322+}
323+
324+static int sysdbcmp(const void *first, const void *second)
325+{
326+ const timelib_tzdb_index_entry *alpha = first, *beta = second;
327+
328+ return timelib_strcasecmp(alpha->id, beta->id);
329+}
330+
331+
332+/* Create the zone identifier index by trawling the filesystem. */
333+static void create_zone_index(timelib_tzdb *db)
334+{
335+ size_t dirstack_size, dirstack_top;
336+ size_t index_size, index_next;
337+ timelib_tzdb_index_entry *db_index;
338+ char **dirstack;
339+
340+ /* LIFO stack to hold directory entries to scan; each slot is a
341+ * directory name relative to the zoneinfo prefix. */
342+ dirstack_size = 32;
343+ dirstack = malloc(dirstack_size * sizeof *dirstack);
344+ dirstack_top = 1;
345+ dirstack[0] = strdup("");
346+
347+ /* Index array. */
348+ index_size = 64;
349+ db_index = malloc(index_size * sizeof *db_index);
350+ index_next = 0;
351+
352+ do {
353+ struct dirent **ents;
354+ char name[PATH_MAX], *top;
355+ int count;
356+
357+ /* Pop the top stack entry, and iterate through its contents. */
358+ top = dirstack[--dirstack_top];
359+ snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s", top);
360+
361+ count = php_scandir(name, &ents, index_filter, php_alphasort);
362+
363+ while (count > 0) {
364+ struct stat st;
365+ const char *leaf = ents[count - 1]->d_name;
366+
367+ snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s/%s",
368+ top, leaf);
369+
370+ if (strlen(name) && stat(name, &st) == 0) {
371+ /* Name, relative to the zoneinfo prefix. */
372+ const char *root = top;
373+
374+ if (root[0] == '/') root++;
375+
376+ snprintf(name, sizeof name, "%s%s%s", root,
377+ *root ? "/": "", leaf);
378+
379+ if (S_ISDIR(st.st_mode)) {
380+ if (dirstack_top == dirstack_size) {
381+ dirstack_size *= 2;
382+ dirstack = realloc(dirstack,
383+ dirstack_size * sizeof *dirstack);
384+ }
385+ dirstack[dirstack_top++] = strdup(name);
386+ }
387+ else {
388+ if (index_next == index_size) {
389+ index_size *= 2;
390+ db_index = realloc(db_index,
391+ index_size * sizeof *db_index);
392+ }
393+
394+ db_index[index_next++].id = strdup(name);
395+ }
396+ }
397+
398+ free(ents[--count]);
399+ }
400+
401+ if (count != -1) free(ents);
402+ free(top);
403+ } while (dirstack_top);
404+
405+ qsort(db_index, index_next, sizeof *db_index, sysdbcmp);
406+
407+ db->index = db_index;
408+ db->index_size = index_next;
409+
410+ free(dirstack);
411+}
412+
413+#define FAKE_HEADER "1234\0??\1??"
414+#define FAKE_UTC_POS (7 - 4)
415+
416+/* Create a fake data segment for database 'sysdb'. */
417+static void fake_data_segment(timelib_tzdb *sysdb,
418+ struct location_info **info)
419+{
420+ size_t n;
421+ char *data, *p;
422+
423+ data = malloc(3 * sysdb->index_size + 7);
424+
425+ p = mempcpy(data, FAKE_HEADER, sizeof(FAKE_HEADER) - 1);
426+
427+ for (n = 0; n < sysdb->index_size; n++) {
428+ const struct location_info *li;
429+ timelib_tzdb_index_entry *ent;
430+
431+ ent = (timelib_tzdb_index_entry *)&sysdb->index[n];
432+
433+ /* Lookup the timezone name in the hash table. */
434+ if (strcmp(ent->id, "UTC") == 0) {
435+ ent->pos = FAKE_UTC_POS;
436+ continue;
437+ }
438+
439+ li = find_zone_info(info, ent->id);
440+ if (li) {
441+ /* If found, append the BC byte and the
442+ * country code; set the position for this
443+ * section of timezone data. */
444+ ent->pos = (p - data) - 4;
445+ *p++ = '\1';
446+ *p++ = li->code[0];
447+ *p++ = li->code[1];
448+ }
449+ else {
450+ /* If not found, the timezone data can
451+ * point at the header. */
452+ ent->pos = 0;
453+ }
454+ }
455+
456+ sysdb->data = (unsigned char *)data;
457+}
458+
459+/* Returns true if the passed-in stat structure describes a
460+ * probably-valid timezone file. */
461+static int is_valid_tzfile(const struct stat *st, int fd)
462+{
463+ if (fd) {
464+ char buf[20];
465+ if (read(fd, buf, 20)!=20) {
466+ return 0;
467+ }
468+ lseek(fd, SEEK_SET, 0);
469+ if (memcmp(buf, "TZif", 4)) {
470+ return 0;
471+ }
472+ }
473+ return S_ISREG(st->st_mode) && st->st_size > 20;
474+}
475+
476+/* To allow timezone names to be used case-insensitively, find the
477+ * canonical name for this timezone, if possible. */
478+static const char *canonical_tzname(const char *timezone)
479+{
480+ if (timezonedb_system) {
481+ timelib_tzdb_index_entry *ent, lookup;
482+
483+ lookup.id = (char *)timezone;
484+
485+ ent = bsearch(&lookup, timezonedb_system->index,
486+ timezonedb_system->index_size, sizeof lookup,
487+ sysdbcmp);
488+ if (ent) {
489+ return ent->id;
490+ }
491+ }
492+
493+ return timezone;
494+}
495+
496+/* Return the mmap()ed tzfile if found, else NULL. On success, the
497+ * length of the mapped data is placed in *length. */
498+static char *map_tzfile(const char *timezone, size_t *length)
499+{
500+ char fname[PATH_MAX];
501+ struct stat st;
502+ char *p;
503+ int fd;
504+
505+ if (timezone[0] == '\0' || strstr(timezone, "..") != NULL) {
506+ return NULL;
507+ }
508+
509+ snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", canonical_tzname(timezone));
510+
511+ fd = open(fname, O_RDONLY);
512+ if (fd == -1) {
513+ return NULL;
514+ } else if (fstat(fd, &st) != 0 || !is_valid_tzfile(&st, fd)) {
515+ close(fd);
516+ return NULL;
517+ }
518+
519+ *length = st.st_size;
520+ p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
521+ close(fd);
522+
523+ return p != MAP_FAILED ? p : NULL;
524+}
525+
526+#endif
527+
528+static int inmem_seek_to_tz_position(const unsigned char **tzf, char *timezone, const timelib_tzdb *tzdb)
529 {
530 int left = 0, right = tzdb->index_size - 1;
531
532@@ -438,9 +878,48 @@ static int seek_to_tz_position(const uns
533 return 0;
534 }
535
536+static int seek_to_tz_position(const unsigned char **tzf, char *timezone,
537+ char **map, size_t *maplen,
538+ const timelib_tzdb *tzdb)
539+{
540+#ifdef HAVE_SYSTEM_TZDATA
541+ if (tzdb == timezonedb_system) {
542+ char *orig;
543+
544+ orig = map_tzfile(timezone, maplen);
545+ if (orig == NULL) {
546+ return 0;
547+ }
548+
549+ (*tzf) = (unsigned char *)orig;
550+ *map = orig;
551+ return 1;
552+ }
553+ else
554+#endif
555+ {
556+ return inmem_seek_to_tz_position(tzf, timezone, tzdb);
557+ }
558+}
559+
560 const timelib_tzdb *timelib_builtin_db(void)
561 {
562+#ifdef HAVE_SYSTEM_TZDATA
563+ if (timezonedb_system == NULL) {
564+ timelib_tzdb *tmp = malloc(sizeof *tmp);
565+
566+ tmp->version = "0.system";
567+ tmp->data = NULL;
568+ create_zone_index(tmp);
569+ system_location_table = create_location_table();
570+ fake_data_segment(tmp, system_location_table);
571+ timezonedb_system = tmp;
572+ }
573+
574+ return timezonedb_system;
575+#else
576 return &timezonedb_builtin;
577+#endif
578 }
579
580 const timelib_tzdb_index_entry *timelib_timezone_identifiers_list(const timelib_tzdb *tzdb, int *count)
581@@ -452,7 +931,30 @@ const timelib_tzdb_index_entry *timelib_
582 int timelib_timezone_id_is_valid(char *timezone, const timelib_tzdb *tzdb)
583 {
584 const unsigned char *tzf;
585- return (seek_to_tz_position(&tzf, timezone, tzdb));
586+
587+#ifdef HAVE_SYSTEM_TZDATA
588+ if (tzdb == timezonedb_system) {
589+ char fname[PATH_MAX];
590+ struct stat st;
591+
592+ if (timezone[0] == '\0' || strstr(timezone, "..") != NULL) {
593+ return 0;
594+ }
595+
596+ if (system_location_table) {
597+ if (find_zone_info(system_location_table, timezone) != NULL) {
598+ /* found in cache */
599+ return 1;
600+ }
601+ }
602+
603+ snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", canonical_tzname(timezone));
604+
605+ return stat(fname, &st) == 0 && is_valid_tzfile(&st, 0);
606+ }
607+#endif
608+
609+ return (inmem_seek_to_tz_position(&tzf, timezone, tzdb));
610 }
611
612 static int skip_64bit_preamble(const unsigned char **tzf, timelib_tzinfo *tz)
613@@ -494,12 +996,14 @@ static timelib_tzinfo* timelib_tzinfo_ct
614 timelib_tzinfo *timelib_parse_tzfile(char *timezone, const timelib_tzdb *tzdb, int *error_code)
615 {
616 const unsigned char *tzf;
617+ char *memmap = NULL;
618+ size_t maplen;
619 timelib_tzinfo *tmp;
620 int version;
621 int transitions_result, types_result;
622 unsigned int type; /* TIMELIB_TZINFO_PHP or TIMELIB_TZINFO_ZONEINFO */
623
624- if (seek_to_tz_position(&tzf, timezone, tzdb)) {
625+ if (seek_to_tz_position(&tzf, timezone, &memmap, &maplen, tzdb)) {
626 tmp = timelib_tzinfo_ctor(timezone);
627
628 version = read_preamble(&tzf, tmp, &type);
629@@ -534,11 +1038,36 @@ timelib_tzinfo *timelib_parse_tzfile(cha
630 }
631 skip_posix_string(&tzf, tmp);
632
633+#ifdef HAVE_SYSTEM_TZDATA
634+ if (memmap) {
635+ const struct location_info *li;
636+
637+ /* TZif-style - grok the location info from the system database,
638+ * if possible. */
639+
640+ if ((li = find_zone_info(system_location_table, timezone)) != NULL) {
641+ tmp->location.comments = timelib_strdup(li->comment);
642+ strncpy(tmp->location.country_code, li->code, 2);
643+ tmp->location.longitude = li->longitude;
644+ tmp->location.latitude = li->latitude;
645+ tmp->bc = 1;
646+ }
647+ else {
648+ set_default_location_and_comments(&tzf, tmp);
649+ }
650+
651+ /* Now done with the mmap segment - discard it. */
652+ munmap(memmap, maplen);
653+ } else {
654+#endif
655 if (type == TIMELIB_TZINFO_PHP) {
656 read_location(&tzf, tmp);
657 } else {
658 set_default_location_and_comments(&tzf, tmp);
659 }
660+#ifdef HAVE_SYSTEM_TZDATA
661+ }
662+#endif
663 } else {
664 *error_code = TIMELIB_ERROR_NO_SUCH_TIMEZONE;
665 tmp = NULL;