[PROPOSED 1/3] Tune and simplify addtype
* zic.c (addtype): Avoid the need to call strcmp each time through the loop checking for duplicate zone types. The strcmp is unnecessary if we already have checked for duplicate abbreviations and so can just look at the strings’ offsets. So, look for those duplicates first. --- zic.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/zic.c b/zic.c index 2cd8cc7..3e27a48 100644 --- a/zic.c +++ b/zic.c @@ -2876,16 +2876,28 @@ addtype(zic_t gmtoff, char const *abbr, bool isdst, bool ttisstd, bool ttisgmt) { register int i, j; - /* - ** See if there's already an entry for this zone type. - ** If so, just return its index. - */ - for (i = 0; i < typecnt; ++i) { + if (! (-1L - 2147483647L <= gmtoff && gmtoff <= 2147483647L)) { + error(_("UT offset out of range")); + exit(EXIT_FAILURE); + } + + for (j = 0; j < charcnt; ++j) + if (strcmp(&chars[j], abbr) == 0) + break; + if (j == charcnt) + newabbr(abbr); + else { + /* + ** See if there's already an entry for this zone type. + ** If so, just return its index. + */ + for (i = 0; i < typecnt; i++) { if (gmtoff == gmtoffs[i] && isdst == isdsts[i] && - strcmp(abbr, &chars[abbrinds[i]]) == 0 && + j == abbrinds[i] && ttisstd == ttisstds[i] && ttisgmt == ttisgmts[i]) return i; + } } /* ** There isn't one; add a new one, unless there are already too @@ -2895,22 +2907,12 @@ addtype(zic_t gmtoff, char const *abbr, bool isdst, bool ttisstd, bool ttisgmt) error(_("too many local time types")); exit(EXIT_FAILURE); } - if (! (-1L - 2147483647L <= gmtoff && gmtoff <= 2147483647L)) { - error(_("UT offset out of range")); - exit(EXIT_FAILURE); - } + i = typecnt++; gmtoffs[i] = gmtoff; isdsts[i] = isdst; ttisstds[i] = ttisstd; ttisgmts[i] = ttisgmt; - - for (j = 0; j < charcnt; ++j) - if (strcmp(&chars[j], abbr) == 0) - break; - if (j == charcnt) - newabbr(abbr); abbrinds[i] = j; - ++typecnt; return i; } -- 2.21.0
* NEWS: Mention this. * zic.c (writezone): When optimizing, discard a transition if it is marked as mergable and if it changes neither the UT offset, nor the isdst flag, nor the time zone abbreviation. This removes a few useless transitions and types from the output when they are merely artifacts of how the input was written. For example, without this fix Europe/London generates a useless transition at the start of 1996, because the line "0:00 GB-Eire %s 1996" has an UNTIL field that specifies 1996-01-01 00:00:00 *local time*, unlike the previous transition specified at 1995-10-22 01:00:00 *UTC*, which means addtype will incorrectly consider the 1996 transition as being significant (as the two time types differ in their ttisgmt value even though they do not differ in UT offset, isdst flag, or time zone abbreviation). --- NEWS | 3 +++ zic.c | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index eb837ea..56bbc7f 100644 --- a/NEWS +++ b/NEWS @@ -48,6 +48,9 @@ Unreleased, experimental changes longer needed for current tzdata, and caused problems with newlib when used with older tzdata (reported by David Gauchard). + zic no longer generates some artifact transitions. For example, + Europe/London no longer has a no-op transition in January 1996. + Changes to build procedure tzdata.zi now assumes zic 2017c or later. This shrinks tzdata.zi diff --git a/zic.c b/zic.c index 3e27a48..d43cc0d 100644 --- a/zic.c +++ b/zic.c @@ -1873,7 +1873,12 @@ writezone(const char *const name, const char *const string, char version, } if (toi == 0 || attypes[fromi].dontmerge - || attypes[toi - 1].type != attypes[fromi].type) + || (gmtoffs[attypes[toi - 1].type] + != gmtoffs[attypes[fromi].type]) + || (isdsts[attypes[toi - 1].type] + != isdsts[attypes[fromi].type]) + || (abbrinds[attypes[toi - 1].type] + != abbrinds[attypes[fromi].type])) attypes[toi++] = attypes[fromi]; } timecnt = toi; -- 2.21.0
* Makefile (ZFLAGS): Mention -b in comment. * NEWS: Mention this. * zic.c (usage): Mention -b. (bloat): New static var. (want_bloat): New static function. (main): Parse new -b option. (writezone): In slim output, suppress QTBUG-53071 workaround, pre-2011 bug workaround, all-zero isstdcnt and isutcnt indicators, and 32-bit data (except for header and the single type that RFC 8536 requires). (outzone): In slim output, suppress redundant transitions back to 1900 or forward through 2038, and suppress redundant transitions just before the TZ string takes over. * zic.8: Document -b. Sort options alphabetically. --- Makefile | 12 ++++++-- NEWS | 14 +++++++++ zic.8 | 47 ++++++++++++++++++++++++------ zic.c | 89 ++++++++++++++++++++++++++++++++++++++++++++------------ 4 files changed, 133 insertions(+), 29 deletions(-) diff --git a/Makefile b/Makefile index 25f1d35..35db07d 100644 --- a/Makefile +++ b/Makefile @@ -362,9 +362,17 @@ LEAPSECONDS= zic= ./zic ZIC= $(zic) $(ZFLAGS) -# To shrink the size of installed TZif files, +# Append "-b fat" to install larger TZif files that work around +# incompatiblities and bugs in some TZif readers, notably readers that +# mishandle 64-bit data in TZif files. Append "-b slim" to install +# smaller TZif files that test for these year-2038 bugs. If no -b +# option is given, the current default is "-b fat", but this is +# intended to change as buggy readers often mishandle timestamps +# after 2038 anyway. +# +# To shrink the size of installed TZif files even further, # append "-r @N" to omit data before N-seconds-after-the-Epoch. -# See the zic man page for more about -r. +# See the zic man page for more about -b and -r. ZFLAGS= # How to use zic to install TZif files. diff --git a/NEWS b/NEWS index 56bbc7f..0e4c688 100644 --- a/NEWS +++ b/NEWS @@ -41,6 +41,20 @@ Unreleased, experimental changes Changes to code + zic's new -b option supports a way to control data bloat and to + test for year-2038 bugs in software that reads TZif files. + 'zic -b fat' and 'zic -b slim' generate larger and smaller output; + for example, changing from fat to slim shrinks the Europe/London + file from 3648 to 1625 bytes, saving about 55%. Fat and slim + files represent the same time data and use the same TZif format as + documented in tzfile(5) and in Internet RFC 8536. Fat format + attempts to work around bugs or incompatibilities on older + software that reads TZif files, notably software that mishandles + 64-bit TZif data. Slim format is more efficient and tests for + these bugs. Currently zic defaults to fat format, although this + is intended to change in future zic versions, as the buggy + software typically mishandles post-2038 timestamps anyway. + zic no longer treats a set of rules ending in 2037 specially. Previously, zic assumed that such a ruleset meant that future timestamps could not be predicted, and therefore omitted a diff --git a/zic.8 b/zic.8 index 89ea2d9..d02994f 100644 --- a/zic.8 +++ b/zic.8 @@ -45,6 +45,32 @@ Output version information and exit. .B \*-\*-help Output short usage message and exit. .TP +.BI "\*-b " bloat +Output backward-compatibility data as specified by +.IR bloat . +If +.I bloat +is +.BR fat , +generate additional data entries that work around potential bugs or +incompatibilities in older software, such as software that mishandles +the 64-bit generated data. +If +.I bloat +is +.BR slim , +keep the output files small; this can help check for the bugs +and incompatibilities. +Although the default is currently +.BR fat , +this is intended to change in future +.B zic +versions, as software that mishandles the 64-bit data typically +mishandles timestamps after the year 2038 anyway. +Also see the +.B \*-r +option for another way to shrink output size. +.TP .BI "\*-d " directory Create time conversion information files in the named directory rather than in the standard directory named below. @@ -59,6 +85,11 @@ will act as if the input contained a link line of the form .ti +.5i Link \fItimezone\fP localtime .TP +.BI "\*-L " leapsecondfilename +Read leap second information from the file with the given name. +If this option is not used, +no leap second information appears in output files. +.TP .BI "\*-p " timezone Use .IR timezone 's @@ -70,15 +101,6 @@ will act as if the input contained a link line of the form .ti +.5i Link \fItimezone\fP posixrules .TP -.BI "\*-t " file -When creating local time information, put the configuration link in -the named file rather than in the standard location. -.TP -.BI "\*-L " leapsecondfilename -Read leap second information from the file with the given name. -If this option is not used, -no leap second information appears in output files. -.TP .BR "\*-r " "[\fB@\fP\fIlo\fP][\fB/@\fP\fIhi\fP]" Reduce the size of output files by limiting their applicability to timestamps in the range from @@ -102,6 +124,13 @@ On platforms with GNU .BR date , .q "zic \-r @$(date +%s)" omits data intended for past timestamps. +Also see the +.B "\*-b slim" +option for another way to shrink output size. +.TP +.BI "\*-t " file +When creating local time information, put the configuration link in +the named file rather than in the standard location. .TP .B \*-v Be more verbose, and complain about the following situations: diff --git a/zic.c b/zic.c index d43cc0d..34228a5 100644 --- a/zic.c +++ b/zic.c @@ -574,8 +574,9 @@ usage(FILE *stream, int status) { fprintf(stream, _("%s: usage is %s [ --version ] [ --help ] [ -v ] \\\n" - "\t[ -l localtime ] [ -p posixrules ] [ -d directory ] \\\n" - "\t[ -t localtime-link ] [ -L leapseconds ] [ -r '[@lo][/@hi]' ] \\\n" + "\t[ -b {slim|fat} ] [ -d directory ] [ -l localtime ]" + " [ -L leapseconds ] \\\n" + "\t[ -p posixrules ] [ -r '[@lo][/@hi]' ] [ -t localtime-link ] \\\n" "\t[ filename ... ]\n\n" "Report bugs to %s.\n"), progname, progname, REPORT_BUGS_TO); @@ -650,6 +651,17 @@ static const char * leapsec; static const char * tzdefault; static const char * yitcommand; +/* -1 if the TZif output file should be slim, 0 if default, 1 if the + output should be fat for backward compatibility. Currently the + default is fat, although this may change. */ +static int bloat; + +static bool +want_bloat(void) +{ + return 0 <= bloat; +} + int main(int argc, char **argv) { @@ -681,10 +693,22 @@ main(int argc, char **argv) } else if (strcmp(argv[k], "--help") == 0) { usage(stdout, EXIT_SUCCESS); } - while ((c = getopt(argc, argv, "d:l:L:p:r:st:vy:")) != EOF && c != -1) + while ((c = getopt(argc, argv, "b:d:l:L:p:r:st:vy:")) != EOF && c != -1) switch (c) { default: usage(stderr, EXIT_FAILURE); + case 'b': + if (strcmp(optarg, "slim") == 0) { + if (0 < bloat) + error(_("incompatible -b options")); + bloat = -1; + } else if (strcmp(optarg, "fat") == 0) { + if (bloat < 0) + error(_("incompatible -b options")); + bloat = 1; + } else + error(_("invalid option: -b '%s'"), optarg); + break; case 'd': if (directory == NULL) directory = optarg; @@ -1921,7 +1945,7 @@ writezone(const char *const name, const char *const string, char version, seconds, as the idea is to insert a transition just before 32-bit time_t rolls around, and this occurs at a slightly different moment if transitions are leap-second corrected. */ - if (WORK_AROUND_QTBUG_53071 && timecnt != 0 + if (WORK_AROUND_QTBUG_53071 && timecnt != 0 && want_bloat() && ats[timecnt - 1] < y2038_boundary - 1 && strchr(string, '<')) { ats[timecnt] = y2038_boundary - 1; types[timecnt] = types[timecnt - 1]; @@ -1970,7 +1994,7 @@ writezone(const char *const name, const char *const string, char version, int old0; char omittype[TZ_MAX_TYPES]; int typemap[TZ_MAX_TYPES]; - register int thistypecnt; + int thistypecnt, stdcnt, utcnt; char thischars[TZ_MAX_CHARS]; int thischarcnt; bool toomanytimes; @@ -2053,7 +2077,7 @@ writezone(const char *const name, const char *const string, char version, ** (to help get global "altzone" and "timezone" variables ** set correctly). */ - { + if (want_bloat()) { register int mrudst, mrustd, hidst, histd, type; hidst = histd = mrudst = mrustd = -1; @@ -2100,12 +2124,16 @@ writezone(const char *const name, const char *const string, char version, for (i = 0; i < sizeof indmap / sizeof indmap[0]; ++i) indmap[i] = -1; - thischarcnt = 0; + thischarcnt = stdcnt = utcnt = 0; for (i = old0; i < typecnt; i++) { register char * thisabbr; if (omittype[i]) continue; + if (ttisstds[i]) + stdcnt = thistypecnt; + if (ttisgmts[i]) + utcnt = thistypecnt; if (indmap[abbrinds[i]] >= 0) continue; thisabbr = &chars[abbrinds[i]]; @@ -2118,12 +2146,18 @@ writezone(const char *const name, const char *const string, char version, } indmap[abbrinds[i]] = j; } + if (pass == 1 && !want_bloat()) { + utcnt = stdcnt = thisleapcnt = 0; + thistimecnt = - locut - hicut; + thistypecnt = thischarcnt = 1; + thistimelim = thistimei; + } #define DO(field) fwrite(tzh.field, sizeof tzh.field, 1, fp) tzh = tzh0; memcpy(tzh.tzh_magic, TZ_MAGIC, sizeof tzh.tzh_magic); tzh.tzh_version[0] = version; - convert(thistypecnt, tzh.tzh_ttisgmtcnt); - convert(thistypecnt, tzh.tzh_ttisstdcnt); + convert(utcnt, tzh.tzh_ttisgmtcnt); + convert(stdcnt, tzh.tzh_ttisstdcnt); convert(thisleapcnt, tzh.tzh_leapcnt); convert(locut + thistimecnt + hicut, tzh.tzh_timecnt); convert(thistypecnt, tzh.tzh_typecnt); @@ -2138,6 +2172,15 @@ writezone(const char *const name, const char *const string, char version, DO(tzh_typecnt); DO(tzh_charcnt); #undef DO + if (pass == 1 && !want_bloat()) { + /* Output a minimal data block with just one time type. */ + puttzcode(0, fp); /* utoff */ + putc(0, fp); /* dst */ + putc(0, fp); /* index of abbreviation */ + putc(0, fp); /* empty-string abbreviation */ + continue; + } + /* Output a LO_TIME transition if needed; see limitrange. But do not go below the minimum representable value for this pass. */ @@ -2193,10 +2236,12 @@ writezone(const char *const name, const char *const string, char version, puttzcodepass(todo, fp, pass); puttzcode(corr[i], fp); } - for (i = old0; i < typecnt; i++) + if (stdcnt != 0) + for (i = old0; i < typecnt; i++) if (!omittype[i]) putc(ttisstds[i], fp); - for (i = old0; i < typecnt; i++) + if (utcnt != 0) + for (i = old0; i < typecnt; i++) if (!omittype[i]) putc(ttisgmts[i], fp); swaptypes(old0, thisdefaulttype); @@ -2643,16 +2688,18 @@ outzone(const struct zone *zpfirst, ptrdiff_t zonecount) max_year = min_year + years_of_observations; } } - /* - ** For the benefit of older systems, - ** generate data from 1900 through 2038. - */ - if (min_year > 1900) - min_year = 1900; max_year0 = max_year; - if (max_year < 2038) + if (want_bloat()) { + /* For the benefit of older systems, + generate data from 1900 through 2038. */ + if (min_year > 1900) + min_year = 1900; + if (max_year < 2038) max_year = 2038; + } + for (i = 0; i < zonecount; ++i) { + struct rule *prevrp = NULL; /* ** A guess that may well be corrected later. */ @@ -2788,6 +2835,11 @@ outzone(const struct zone *zpfirst, ptrdiff_t zonecount) doabbr(ab, zp, rp->r_abbrvar, rp->r_isdst, rp->r_stdoff, false); offset = oadd(zp->z_gmtoff, rp->r_stdoff); + if (!want_bloat() && !useuntil && !do_extend + && prevrp + && rp->r_hiyear == ZIC_MAX + && prevrp->r_hiyear == ZIC_MAX) + break; type = addtype(offset, ab, rp->r_isdst, rp->r_todisstd, rp->r_todisgmt); if (defaulttype < 0 && !rp->r_isdst) @@ -2797,6 +2849,7 @@ outzone(const struct zone *zpfirst, ptrdiff_t zonecount) && ktime < attypes[lastatmax].at)) lastatmax = timecnt; addtt(ktime, type); + prevrp = rp; } } if (usestart) { -- 2.21.0
participants (1)
-
Paul Eggert