POSIX extension check
It's probably worthwhile to check whether extended POSIX in zic-produced binaries will cause grief when those binaries are used with old versions of localtime.c. If so, one or more of documentation, a zic warning, or a version number bump may be in order. --ado
Arthur David Olson wrote:
It's probably worthwhile to check whether extended POSIX in zic-produced binaries will cause grief when those binaries are used with old versions of localtime.c. If so, one or more of documentation, a zic warning, or a version number bump may be in order.
Those suggestions sound good, and I'll soon propose a patch along those lines. I checked with the test script appended to the message, and it looks like we're pretty safe. The only problem I observed was with America/Godthab for dates past the start of 2038; this is because the new POSIX TZ string uses negative hours, which the old tz code does not grok. I doubt whether the Godthab issue is urgent, since we are talking only far-future dates here, and our predictions for them are quite possibly wrong anyway. export LC_ALL=C o=/tmp/tz-2013dd n=/tmp/tz omit_junk='s/^[^[:blank:]]*[[:blank:]]*//; s/ UTC / UT /' for zone in America/Los_Angeles Antarctica/Palmer Asia/Tehran Asia/Gaza Asia/Hebron Pacific/Fiji America/Godthab America/Argentina/San_Luis America/Santiago Pacific/Easter; do echo $zone report $o/etc/zdump -v $o/etc/zoneinfo-posix/$zone | sed "$omit_junk" >oo $o/etc/zdump -v $n/etc/zoneinfo-posix/$zone | sed "$omit_junk" >on $n/etc/zdump -v $o/etc/zoneinfo-posix/$zone | sed "$omit_junk" >no $n/etc/zdump -v $n/etc/zoneinfo-posix/$zone | sed "$omit_junk" >nn set oo on no nn for i in oo on no; do case $i in oo) js='on no nn';; on) js='no nn';; no) js='nn';; esac for j in $js; do diff -u $i $j done done done
Since we assume C89 now, it's safe to use vfprintf. (verror): New function, with the old implementation of 'error' but with a va_list API. (error, warning): Use it. (associate, gethms): Pass explicit %s to avoid GCC warning about possibly unsafe formats. (inzone, outzone, newabbr): Simplify by using the new error or warning functions, avoiding the need to allocate and free memory. * private.h (ATTRIBUTE_FORMAT): New macro. --- private.h | 2 ++ zic.c | 76 +++++++++++++++++++++++++++------------------------------------ 2 files changed, 35 insertions(+), 43 deletions(-) diff --git a/private.h b/private.h index 8c1cda7..3a19305 100644 --- a/private.h +++ b/private.h @@ -204,9 +204,11 @@ typedef unsigned long uintmax_t; #if 2 < __GNUC__ + (96 <= __GNUC_MINOR__) # define ATTRIBUTE_CONST __attribute__ ((const)) # define ATTRIBUTE_PURE __attribute__ ((__pure__)) +# define ATTRIBUTE_FORMAT(spec) __attribute__ ((__format__ spec)) #else # define ATTRIBUTE_CONST /* empty */ # define ATTRIBUTE_PURE /* empty */ +# define ATTRIBUTE_FORMAT(spec) /* empty */ #endif #if !defined _Noreturn && __STDC_VERSION__ < 201112 diff --git a/zic.c b/zic.c index 15a1f5f..55afb40 100644 --- a/zic.c +++ b/zic.c @@ -8,6 +8,8 @@ #include "locale.h" #include "tzfile.h" +#include <stdarg.h> + #define ZIC_VERSION '2' typedef int_fast64_t zic_t; @@ -389,16 +391,16 @@ eat(const char *const name, const int num) eats(name, num, NULL, -1); } -static void -error(const char *const string) +static void ATTRIBUTE_FORMAT((printf, 1, 0)) +verror(const char *const string, va_list args) { /* ** Match the format of "cc" to allow sh users to ** zic ... 2>&1 | error -t "*" -v ** on BSD systems. */ - (void) fprintf(stderr, _("\"%s\", line %d: %s"), - filename, linenum, string); + fprintf(stderr, _("\"%s\", line %d: "), filename, linenum); + vfprintf(stderr, string, args); if (rfilename != NULL) (void) fprintf(stderr, _(" (rule from \"%s\", line %d)"), rfilename, rlinenum); @@ -406,15 +408,23 @@ error(const char *const string) ++errors; } -static void -warning(const char *const string) +static void ATTRIBUTE_FORMAT((printf, 1, 2)) +error(const char *const string, ...) { - char * cp; + va_list args; + va_start(args, string); + verror(string, args); + va_end(args); +} - cp = ecpyalloc(_("warning: ")); - cp = ecatalloc(cp, string); - error(cp); - free(cp); +static void ATTRIBUTE_FORMAT((printf, 1, 2)) +warning(const char *const string, ...) +{ + va_list args; + fprintf(stderr, _("warning: ")); + va_start(args, string); + verror(string, args); + va_end(args); --errors; } @@ -743,7 +753,7 @@ associate(void) ** a '%s' in the format is a bad thing. */ if (strchr(zp->z_format, '%') != 0) - error(_("%s in ruleless zone")); + error("%s", _("%s in ruleless zone")); } } if (errors) @@ -873,13 +883,13 @@ gethms(const char *string, const char *const errstring, const int signable) ss = 0; else if (sscanf(string, scheck(string, "%"SCNdZIC":%d:%d"), &hh, &mm, &ss) != 3) { - error(errstring); + error("%s", errstring); return 0; } if (hh < 0 || mm < 0 || mm >= MINSPERHOUR || ss < 0 || ss > SECSPERMIN) { - error(errstring); + error("%s", errstring); return 0; } if (ZIC_MAX / SECSPERHOUR < hh) { @@ -925,40 +935,31 @@ static int inzone(register char **const fields, const int nfields) { register int i; - static char * buf; if (nfields < ZONE_MINFIELDS || nfields > ZONE_MAXFIELDS) { error(_("wrong number of fields on Zone line")); return FALSE; } if (strcmp(fields[ZF_NAME], TZDEFAULT) == 0 && lcltime != NULL) { - buf = erealloc(buf, 132 + strlen(TZDEFAULT)); - (void) sprintf(buf, + error( _("\"Zone %s\" line and -l option are mutually exclusive"), TZDEFAULT); - error(buf); return FALSE; } if (strcmp(fields[ZF_NAME], TZDEFRULES) == 0 && psxrules != NULL) { - buf = erealloc(buf, 132 + strlen(TZDEFRULES)); - (void) sprintf(buf, + error( _("\"Zone %s\" line and -p option are mutually exclusive"), TZDEFRULES); - error(buf); return FALSE; } for (i = 0; i < nzones; ++i) if (zones[i].z_name != NULL && strcmp(zones[i].z_name, fields[ZF_NAME]) == 0) { - buf = erealloc(buf, - (132 + strlen(fields[ZF_NAME]) - + strlen(zones[i].z_filename))); - (void) sprintf(buf, + error( _("duplicate zone name %s (file \"%s\", line %d)"), fields[ZF_NAME], zones[i].z_filename, zones[i].z_linenum); - error(buf); return FALSE; } return inzsub(fields, nfields, FALSE); @@ -2032,15 +2033,10 @@ outzone(const struct zone * const zpfirst, const int zonecount) ** Generate lots of data if a rule can't cover all future times. */ stringzone(envvar, zpfirst, zonecount); - if (noise && envvar[0] == '\0') { - register char * wp; - -wp = ecpyalloc(_("no POSIX environment variable for zone")); - wp = ecatalloc(wp, " "); - wp = ecatalloc(wp, zpfirst->z_name); - warning(wp); - free(wp); - } + if (noise && envvar[0] == '\0') + warning("%s %s", + _("no POSIX environment variable for zone"), + zpfirst->z_name); if (envvar[0] == '\0') { if (min_year >= ZIC_MIN + YEARSPERREPEAT) min_year -= YEARSPERREPEAT; @@ -2622,14 +2618,8 @@ mp = _("time zone abbreviation has too many alphabetics"); } if (*cp != '\0') mp = _("time zone abbreviation differs from POSIX standard"); - if (mp != NULL) { - char *wp = ecpyalloc(mp); - wp = ecatalloc(wp, " ("); - wp = ecatalloc(wp, string); - wp = ecatalloc(wp, ")"); - warning(wp); - free(wp); - } + if (mp != NULL) + warning("%s (%s)", mp, string); } i = strlen(string) + 1; if (charcnt + i > TZ_MAX_CHARS) { -- 1.8.1.2
Also, improve the documentation and diagnostics in this area. Suggested by Arthur David Olson in <http://mm.icann.org/pipermail/tz/2013-September/020064.html>. * tzfile.5, tzfile.h: Bump tzfile format to version 3. * zic.8: Document -v better. * zic.c (ZIC_VERSION): Bump from '2' to '3'. (stringrule, stringzone, outzone): Report compatibility issues more carefully, mentioning client dates. --- tzfile.5 | 9 +++++---- tzfile.h | 9 ++++++++- zic.8 | 38 ++++++++++++++++++++++++++++++----- zic.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++------------------ 4 files changed, 97 insertions(+), 29 deletions(-) diff --git a/tzfile.5 b/tzfile.5 index b2d1a4d..ff1ec63 100644 --- a/tzfile.5 +++ b/tzfile.5 @@ -10,7 +10,7 @@ The time zone information files used by begin with the magic characters "TZif" to identify them as time zone information files, followed by a character identifying the version of the file's format -(as of 2005, either an ASCII NUL or a '2') +(as of 2013, either an ASCII NUL, or '2', or '3') followed by fifteen bytes containing zeroes reserved for future use, followed by six four-byte values of type .BR long , @@ -145,9 +145,10 @@ POSIX-TZ-environment-variable-style string for use in handling instants after the last transition time stored in the file (with nothing between the newlines if there is no POSIX representation for such instants). -As described in -.IR newtzset (3), -this string may use two minor extensions to the POSIX TZ format. +.PP +For version-3-format time zone files, the POSIX-TZ-style string may +use two minor extensions to the POSIX TZ format, as described in +.IR newtzset (3). First, the hours part of its transition times may be signed and range from \(mi167 through 167 instead of the POSIX-required unsigned values from 0 through 24. Second, DST is in effect all year if it starts diff --git a/tzfile.h b/tzfile.h index 0cf2943..63db98e 100644 --- a/tzfile.h +++ b/tzfile.h @@ -39,7 +39,7 @@ struct tzhead { char tzh_magic[4]; /* TZ_MAGIC */ - char tzh_version[1]; /* '\0' or '2' as of 2005 */ + char tzh_version[1]; /* '\0' or '2' or '3' as of 2013 */ char tzh_reserved[15]; /* reserved--must be zero */ char tzh_ttisgmtcnt[4]; /* coded number of trans. time flags */ char tzh_ttisstdcnt[4]; /* coded number of trans. time flags */ @@ -82,6 +82,13 @@ struct tzhead { ** instants after the last transition time stored in the file ** (with nothing between the newlines if there is no POSIX representation for ** such instants). +** +** If tz_version is '3' or greatar, the above is extended as follows. +** First, the POSIX TZ string's hour offset may range from -167 +** through 167 as compared to the POSIX-required 0 through 24. +** Second, its DST start time may be January 1 at 00:00 and its stop +** time December 31 at 24:00 plus the difference between DST and +** standard time, indicating DST all year. */ /* diff --git a/zic.8 b/zic.8 index 5c8b59c..602c3c9 100644 --- a/zic.8 +++ b/zic.8 @@ -77,14 +77,42 @@ If this option is not used, no leap second information appears in output files. .TP .B \-v -Complain if a year that appears in a data file is outside the range +Be more verbose, and complain about the following situations: +.RS +.PP +The input data specifies a link to a link. +.PP +A year that appears in a data file is outside the range of years representable by .IR time (2) values. -Also complain if a time of 24:00 -(which cannot be handled by pre-1998 versions of -.IR zic ) -appears in the input. +.PP +A time of 24:00 or more appears in the input. +Pre-1998 versions of +.I zic +prohibit 24:00, and pre-2007 versions prohibit times greater than 24:00. +.PP +A rule goes past the start or end of the month. +Pre-2004 versions of +.I zic +prohibit this. +.PP +The output file does not contain all the information about the +long-term future of a zone, because the future cannot be summarized as +an extended POSIX TZ string. For example, as of 2013 this problem +occurs for Iran's daylight-saving rules for the predicted future, as +these rules are based on the Iranian calendar, which cannot be +represented. +.PP +The output contains data that may not be handled properly by client +code designed for older +.I zic +output formats. These compatibility issues affect only time stamps +before 1970 or after the start of 2038. +.PP +A time zone abbreviation has fewer than 3 characters. +POSIX requires at least 3. +.RE .TP .B \-s Limit time values stored in output files to values that are the same diff --git a/zic.c b/zic.c index 55afb40..dcab3aa 100644 --- a/zic.c +++ b/zic.c @@ -10,7 +10,7 @@ #include <stdarg.h> -#define ZIC_VERSION '2' +#define ZIC_VERSION '3' typedef int_fast64_t zic_t; #define ZIC_MIN INT_FAST64_MIN @@ -1795,6 +1795,7 @@ stringrule(char *result, const struct rule *const rp, const zic_t dstoff, const zic_t gmtoff) { register zic_t tod = rp->r_tod; + register int compat = 0; result = end(result); if (rp->r_dycode == DC_DOM) { @@ -1817,6 +1818,8 @@ stringrule(char *result, const struct rule *const rp, const zic_t dstoff, if (rp->r_dycode == DC_DOWGEQ) { wdayoff = (rp->r_dayofmonth - 1) % DAYSPERWEEK; + if (wdayoff) + compat = 2013; wday -= wdayoff; tod += wdayoff * SECSPERDAY; week = 1 + (rp->r_dayofmonth - 1) / DAYSPERWEEK; @@ -1825,6 +1828,8 @@ stringrule(char *result, const struct rule *const rp, const zic_t dstoff, week = 5; else { wdayoff = rp->r_dayofmonth % DAYSPERWEEK; + if (wdayoff) + compat = 2013; wday -= wdayoff; tod += wdayoff * SECSPERDAY; week = rp->r_dayofmonth / DAYSPERWEEK; @@ -1843,8 +1848,15 @@ stringrule(char *result, const struct rule *const rp, const zic_t dstoff, (void) strcat(result, "/"); if (stringoffset(end(result), tod) != 0) return -1; + if (tod < 0) { + if (compat < 2013) + compat = 2013; + } else if (SECSPERDAY <= tod) { + if (compat < 1994) + compat = 1994; + } } - return 0; + return compat; } static int @@ -1861,7 +1873,7 @@ rule_cmp(struct rule const *a, struct rule const *b) return a->r_dayofmonth - b->r_dayofmonth; } -static void +static int stringzone(char *result, const struct zone *const zpfirst, const int zonecount) { register const struct zone * zp; @@ -1870,6 +1882,8 @@ stringzone(char *result, const struct zone *const zpfirst, const int zonecount) register struct rule * dstrp; register int i; register const char * abbrvar; + register int compat = 0; + register int c; struct rule stdr, dstr; result[0] = '\0'; @@ -1884,11 +1898,11 @@ stringzone(char *result, const struct zone *const zpfirst, const int zonecount) if (rp->r_stdoff == 0) { if (stdrp == NULL) stdrp = rp; - else return; + else return -1; } else { if (dstrp == NULL) dstrp = rp; - else return; + else return -1; } } if (stdrp == NULL && dstrp == NULL) { @@ -1911,7 +1925,7 @@ stringzone(char *result, const struct zone *const zpfirst, const int zonecount) ** do not try to apply a rule to the zone. */ if (stdrp != NULL && stdrp->r_hiyear == 2037) - return; + return -1; if (stdrp != NULL && stdrp->r_stdoff != 0) { /* Perpetual DST. */ @@ -1935,32 +1949,39 @@ stringzone(char *result, const struct zone *const zpfirst, const int zonecount) } } if (stdrp == NULL && (zp->z_nrules != 0 || zp->z_stdoff != 0)) - return; + return -1; abbrvar = (stdrp == NULL) ? "" : stdrp->r_abbrvar; doabbr(result, zp->z_format, abbrvar, FALSE, TRUE); if (stringoffset(end(result), -zp->z_gmtoff) != 0) { result[0] = '\0'; - return; + return -1; } if (dstrp == NULL) - return; + return compat; doabbr(end(result), zp->z_format, dstrp->r_abbrvar, TRUE, TRUE); if (dstrp->r_stdoff != SECSPERMIN * MINSPERHOUR) if (stringoffset(end(result), -(zp->z_gmtoff + dstrp->r_stdoff)) != 0) { result[0] = '\0'; - return; + return -1; } (void) strcat(result, ","); - if (stringrule(result, dstrp, dstrp->r_stdoff, zp->z_gmtoff) != 0) { + c = stringrule(result, dstrp, dstrp->r_stdoff, zp->z_gmtoff); + if (c < 0) { result[0] = '\0'; - return; + return -1; } + if (compat < c) + compat = c; (void) strcat(result, ","); - if (stringrule(result, stdrp, dstrp->r_stdoff, zp->z_gmtoff) != 0) { + c = stringrule(result, stdrp, dstrp->r_stdoff, zp->z_gmtoff); + if (c < 0) { result[0] = '\0'; - return; + return -1; } + if (compat < c) + compat = c; + return compat; } static void @@ -1984,6 +2005,7 @@ outzone(const struct zone * const zpfirst, const int zonecount) register int max_abbr_len; register int max_envvar_len; register int prodstic; /* all rules are min to max */ + register int compat; max_abbr_len = 2 + max_format_len + max_abbrvar_len; max_envvar_len = 2 * max_abbr_len + 5 * 9; @@ -2032,11 +2054,21 @@ outzone(const struct zone * const zpfirst, const int zonecount) /* ** Generate lots of data if a rule can't cover all future times. */ - stringzone(envvar, zpfirst, zonecount); - if (noise && envvar[0] == '\0') - warning("%s %s", - _("no POSIX environment variable for zone"), - zpfirst->z_name); + compat = stringzone(envvar, zpfirst, zonecount); + if (noise && compat != 0) { + if (compat < 0) + warning("%s %s", + _("no POSIX environment variable for zone"), + zpfirst->z_name); + else { + /* Circa-COMPAT clients, and earlier clients, might + not work for this zone when given dates before + 1970 or after 2038. */ + warning(_("%s: pre-%d clients may mishandle" + " distant timestamps"), + zpfirst->z_name, compat); + } + } if (envvar[0] == '\0') { if (min_year >= ZIC_MIN + YEARSPERREPEAT) min_year -= YEARSPERREPEAT; -- 1.8.1.2
I noticed a small typo in this patch: On Sun, Sep 08, 2013 at 23:17:58 -0700, Paul Eggert wrote:
diff --git a/tzfile.h b/tzfile.h index 0cf2943..63db98e 100644 --- a/tzfile.h +++ b/tzfile.h [...] @@ -82,6 +82,13 @@ struct tzhead { ** instants after the last transition time stored in the file ** (with nothing between the newlines if there is no POSIX representation for ** such instants). +** +** If tz_version is '3' or greatar, the above is extended as follows.
s/greatar/greater/ Nathan ---------------------------------------------------------------------------- Nathan Stratton Treadway - nathanst@ontko.com - Mid-Atlantic region Ray Ontko & Co. - Software consulting services - http://www.ontko.com/ GPG Key: http://www.ontko.com/~nathanst/gpg_key.txt ID: 1023D/ECFB6239 Key fingerprint = 6AD8 485E 20B9 5C71 231C 0C32 15F3 ADCD ECFB 6239
Nathan Stratton Treadway wrote:
I noticed a small typo in this patch:
Thanks, plus I forgot to document the version-3 stuff there. It is a pain that this stuff is documented redundantly in two places, but perhaps now's not the best time to change this. I pushed this:
From 8dada0cec8b5fbfa42748e66fc7bce06f32795aa Mon Sep 17 00:00:00 2001 From: Paul Eggert <eggert@cs.ucla.edu> Date: Tue, 10 Sep 2013 13:15:22 -0700 Subject: [PATCH] * tzfile.h: Fix tzfile.h comment.
Spelling problem reported by Nathan Stratton Treadway in <http://mm.icann.org/pipermail/tz/2013-September/020121.html>. --- tzfile.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tzfile.h b/tzfile.h index 63db98e..233563c 100644 --- a/tzfile.h +++ b/tzfile.h @@ -83,12 +83,18 @@ struct tzhead { ** (with nothing between the newlines if there is no POSIX representation for ** such instants). ** -** If tz_version is '3' or greatar, the above is extended as follows. +** If tz_version is '3' or greater, the above is extended as follows. ** First, the POSIX TZ string's hour offset may range from -167 ** through 167 as compared to the POSIX-required 0 through 24. ** Second, its DST start time may be January 1 at 00:00 and its stop ** time December 31 at 24:00 plus the difference between DST and ** standard time, indicating DST all year. +** Third, the newline-enclosed TZ string is preceded by a new section +** consisting of another copy of the string, followed by a four-byte +** integer size value, followed by zero or more NUL-terminated +** name=value byte strings, followed by an additional NUL. The size +** value gives the total size of the name=value byte strings, +** including their terminating NUL bytes, but excluding the additional NUL. */ /* -- 1.8.1.2
One other thing. It's been privately suggested to me, more than once, that a tz binary file should have a version number in it, identifying which version of the tz data it came from. Presumably this could be a new option to 'zic', used by Makefile, e.g., zic -V "$(VERSION)" ... would copy the value of VERSION into the zic output file. Do you think this is a good idea? If so, where would be a good place to put the version information -- a new "version=2013e" line after the POSIX TZ string? If we are bumping from version '2' to 'version '3' anyway, now may be a good time to add this, if it's a good idea to add it that is.
On Sun, 8 Sep 2013, Paul Eggert wrote:
would copy the value of VERSION into the zic output file.
Do you think this is a good idea?
From my experience from the last 24 hours, I think this would be a good idea.
The DST rules that existed in Israel a year ago, put the end of DST as yesterday (September 8). Since then, the DST rules have been amemded twice (November 2012 and July 2013). Hence, I have been getting e-mails from some people who have a mix of zic output files on their systems with three different change dates (Sep. 8, Oct. 6 and the correct, Oct. 27). A embedded version string would simplify sorting out the various binaries (as opposed to having to check explicit dates usings "zdump -v -c 2014 FILENAME"). _____________________________________ Ephraim Silverberg, CSE System Group, Hebrew University, Jerusalem, Israel. Phone/Fax number: +972-2-5494521
On Sun, 08 Sep 2013, Paul Eggert wrote:
One other thing. It's been privately suggested to me, more than once, that a tz binary file should have a version number in it, identifying which version of the tz data it came from. Presumably this could be a new option to 'zic', used by Makefile, e.g.,
zic -V "$(VERSION)" ...
would copy the value of VERSION into the zic output file.
I'd like that. It's useful to be able to tell somebody "run this command to check whether your system contains the new rules for your location", and to have the command be something simple like a variant of zdump that just prints the version. I'd also like the binary file to contain a string giving the zone name. This would permit a simple automated answer to the question of which binary file has been copied to /etc/localtime (or whatever file defines the default time zone on the local system). In the case of links, multiple files would share the same embedded name, but I think that's OK.
Do you think this is a good idea? If so, where would be a good place to put the version information -- a new "version=2013e" line after the POSIX TZ string?
I suggest a new list of variable=value strings, with version=.... and name=... being the first two such strings. If done carefully, additional strings could be added later without needing to bump {struct tzhead}.tzh_version again. We should probably also start thinking about what to do when tzh_version reaches '9'. --apb (Alan Barrett)
We started out life with 16 "reserved" bytes in binary files; one of those was appropriated for the compiler version number so we're now at... char tzh_version[1]; char tzh_reserved[15]; Adding something such as "2013e" wouldn't exhaust the reserved bytes; it would complicate regression testing (though not unduly). --ado On Mon, Sep 9, 2013 at 2:21 AM, Paul Eggert <eggert@cs.ucla.edu> wrote:
One other thing. It's been privately suggested to me, more than once, that a tz binary file should have a version number in it, identifying which version of the tz data it came from. Presumably this could be a new option to 'zic', used by Makefile, e.g.,
zic -V "$(VERSION)" ...
would copy the value of VERSION into the zic output file.
Do you think this is a good idea? If so, where would be a good place to put the version information -- a new "version=2013e" line after the POSIX TZ string? If we are bumping from version '2' to 'version '3' anyway, now may be a good time to add this, if it's a good idea to add it that is.
Arthur David Olson wrote:
Adding something such as "2013e" wouldn't exhaust the reserved bytes
Yes, but I anticipate that various distros will want to append their own information to the version string, and the reserved space is uncomfortably small for that. And I share Zefram's leeriness of making this version information so primary; it's really just auxiliary data that does not affect how timestamps are represented or interpreted, unlike the version byte. So how about this idea instead? As part of tzfile.v3 we allow name-value pairs at the end of the file. One of them can be the version, another the zone name, and perhaps we'll think of others. Here's a proposed patch to do this, which I've pushed into the experimental version.
From d7680ffd3d43c4da6d9ff21ffb93b6783703a301 Mon Sep 17 00:00:00 2001 From: Paul Eggert <eggert@cs.ucla.edu> Date: Mon, 9 Sep 2013 17:16:37 -0700 Subject: [PATCH] Add optional meta-information to version-3 format.
* Makefile (ZFLAGS): Add a comment about how to enable meta-info. * tzfile.5: Describe meta-information. * zic.8: Document new options -n and -o, which cause zic to generate meta-info. * zic.c: Include <stddef.h>, for ptrdiff_t. (genoption, genoptions, genname): New static vars. (usage): Summarize new options. (addgenoption, writevalue): New function. (main, writezone): Add support for new options. --- Makefile | 3 +++ tzfile.5 | 13 ++++++++++++ zic.8 | 14 +++++++++++++ zic.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 101 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 41b6ffc..eccb9da 100644 --- a/Makefile +++ b/Makefile @@ -250,6 +250,9 @@ LDFLAGS= $(LFLAGS) zic= ./zic ZIC= $(zic) $(ZFLAGS) +# Uncomment this to put name and version info into zic output files. +#ZFLAGS= -n -o version='$(VERSION)' + # The name of a Posix-compliant `awk' on your system. AWK= awk diff --git a/tzfile.5 b/tzfile.5 index ff1ec63..d609277 100644 --- a/tzfile.5 +++ b/tzfile.5 @@ -154,6 +154,19 @@ First, the hours part of its transition times may be signed and range from from 0 through 24. Second, DST is in effect all year if it starts January 1 at 00:00 and ends December 31 at 24:00 plus the difference between daylight saving and standard time. +.PP +Also, for version-3-format time zone files, the version-2 header and +data are optionally followed by a section containing auxiliary +meta-information that is not needed to process time stamps. This +section, if present, consists of the four magic bytes "=TZ\en" +followed by zero or more newline-terminated byte strings, each +containing a name-value pair separated by "=". Names consist of ASCII +letters, digits and underscores, and start with a letter; duplicate +names are not allowed. Two common names are "name", the Zone name for +the data, and "version", the version number. Values consist of any +bytes except NUL, newline, and backslash; however, newline and +backslash can represented via the two-byte strings "\en" and "\e\e" +respectively. .SH SEE ALSO newctime(3), newtzset(3) .\" This file is in the public domain, so clarified as of diff --git a/zic.8 b/zic.8 index 602c3c9..b1d3348 100644 --- a/zic.8 +++ b/zic.8 @@ -15,6 +15,11 @@ zic \- time zone compiler .B \-l .I localtime ] [ +.B \-n +] [ +.B \-o +.IB name = value +] [ .B \-p .I posixrules ] [ @@ -62,6 +67,15 @@ will act as if the input contained a link line of the form .ti +.5i Link \fItimezone\fP localtime .TP +.B "\-n" +Store each zone's name into its generated file, as meta-information +with the name "name" and value the zone's name. +.TP +.BI "\-o " name = value +Store the given name-value pair into the generated file, as +meta-information. This option can be repeated, once for each distinct +name. +.TP .BI "\-p " timezone Use the given time zone's rules when handling POSIX-format time zone environment variables. diff --git a/zic.c b/zic.c index 9939195..eefa1fb 100644 --- a/zic.c +++ b/zic.c @@ -9,6 +9,7 @@ #include "tzfile.h" #include <stdarg.h> +#include <stddef.h> #define ZIC_VERSION '3' @@ -140,6 +141,9 @@ static int yearistype(int year, const char * type); static int charcnt; static int errors; static const char * filename; +static const char ** genoption; +static int genoptions; +static int genname; static int leapcnt; static int leapseen; static zic_t leapminyear; @@ -432,7 +436,8 @@ static _Noreturn void usage(FILE *stream, int status) { (void) fprintf(stream, _("%s: usage is %s \ -[ --version ] [ --help ] [ -v ] [ -l localtime ] [ -p posixrules ] \\\n\ +[ --version ] [ --help ] [ -v ] [ -l localtime ]\\\n\ +\t[ -n ] [ -o name=value ]... [ -p posixrules ] \\\n\ \t[ -d directory ] [ -L leapseconds ] [ -y yearistype ] [ filename ... ]\n\ \n\ Report bugs to %s.\n"), @@ -446,6 +451,31 @@ static const char * directory; static const char * leapsec; static const char * yitcommand; +static int +addgenoption(char const *option) +{ + register char const *o = option; + register ptrdiff_t namelen; + register int i; + if (! (isascii (*o) && isalpha(*o))) + return 0; + while (*++o != '=') + if (! (isascii (*o) && (isalnum(*o) || *o == '_'))) + return 0; + namelen = o - option; + if (INT_MAX < namelen) + return 0; /* fprintf won't work. */ + if (namelen == sizeof "name" - 1 + && memcmp(option, "name", namelen) == 0) + return 0; + for (i = 0; i < genoptions; i++) + if (strncmp(genoption[i], option, namelen + 1) == 0) + return 0; + genoption = erealloc(genoption, (genoptions + 1) * sizeof *genoption); + genoption[genoptions++] = option; + return 1; +} + int main(int argc, char **argv) { @@ -476,7 +506,7 @@ main(int argc, char **argv) } else if (strcmp(argv[i], "--help") == 0) { usage(stdout, EXIT_SUCCESS); } - while ((c = getopt(argc, argv, "d:l:p:L:vsy:")) != EOF && c != -1) + while ((c = getopt(argc, argv, "d:l:p:L:no:vsy:")) != EOF && c != -1) switch (c) { default: usage(stderr, EXIT_FAILURE); @@ -500,6 +530,17 @@ _("%s: More than one -l option specified\n"), exit(EXIT_FAILURE); } break; + case 'n': + genname = TRUE; + break; + case 'o': + if (!addgenoption(optarg)) { + fprintf(stderr, + _("%s: %s: invalid -o option\n"), + progname, optarg); + exit(EXIT_FAILURE); + } + break; case 'p': if (psxrules == NULL) psxrules = optarg; @@ -1386,6 +1427,22 @@ is32(const zic_t x) } static void +writevalue(FILE *fp, char const *v) +{ + fputc('=', fp); + + for (; *v; v++) + if (*v == '\n') + fprintf(fp, "\\n"); + else if (*v == '\\') + fprintf(fp, "\\\\"); + else + fputc(*v, fp); + + fputc('\n', fp); +} + +static void writezone(const char *const name, const char *const string) { register FILE * fp; @@ -1708,6 +1765,18 @@ writezone(const char *const name, const char *const string) (void) putc(ttisgmts[i], fp); } (void) fprintf(fp, "\n%s\n", string); + if (genname || genoptions) + fprintf(fp, "=TZ\n"); + if (genname) { + fprintf(fp, "name"); + writevalue(fp, name); + } + for (i = 0; i < genoptions; i++) { + register char const *v = genoption[i]; + register int namelen = strchr(v, '=') - v; + fprintf(fp, "%.*s", namelen, v); + writevalue(fp, v + namelen + 1); + } if (ferror(fp) || fclose(fp)) { (void) fprintf(stderr, _("%s: Error writing %s\n"), progname, fullname); -- 1.8.3.1
I'm guessing that a way to minimize breakage is to end the file with: the POSIX string any name/value pairs a repeat of the POSIX string. That way folks who look for the POSIX string at the very end of the file will find it, and folks who look for it after the 64-bit data will too. --ado On Mon, Sep 9, 2013 at 9:04 PM, Paul Eggert <eggert@cs.ucla.edu> wrote:
Arthur David Olson wrote:
Adding something such as "2013e" wouldn't exhaust the reserved bytes
Yes, but I anticipate that various distros will want to append their own information to the version string, and the reserved space is uncomfortably small for that. And I share Zefram's leeriness of making this version information so primary; it's really just auxiliary data that does not affect how timestamps are represented or interpreted, unlike the version byte.
So how about this idea instead? As part of tzfile.v3 we allow name-value pairs at the end of the file. One of them can be the version, another the zone name, and perhaps we'll think of others. Here's a proposed patch to do this, which I've pushed into the experimental version.
From d7680ffd3d43c4da6d9ff21ffb93b6783703a301 Mon Sep 17 00:00:00 2001 From: Paul Eggert <eggert@cs.ucla.edu> Date: Mon, 9 Sep 2013 17:16:37 -0700 Subject: [PATCH] Add optional meta-information to version-3 format.
* Makefile (ZFLAGS): Add a comment about how to enable meta-info. * tzfile.5: Describe meta-information. * zic.8: Document new options -n and -o, which cause zic to generate meta-info. * zic.c: Include <stddef.h>, for ptrdiff_t. (genoption, genoptions, genname): New static vars. (usage): Summarize new options. (addgenoption, writevalue): New function. (main, writezone): Add support for new options. --- Makefile | 3 +++ tzfile.5 | 13 ++++++++++++ zic.8 | 14 +++++++++++++ zic.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 101 insertions(+), 2 deletions(-)
diff --git a/Makefile b/Makefile index 41b6ffc..eccb9da 100644 --- a/Makefile +++ b/Makefile @@ -250,6 +250,9 @@ LDFLAGS= $(LFLAGS) zic= ./zic ZIC= $(zic) $(ZFLAGS)
+# Uncomment this to put name and version info into zic output files. +#ZFLAGS= -n -o version='$(VERSION)' + # The name of a Posix-compliant `awk' on your system. AWK= awk
diff --git a/tzfile.5 b/tzfile.5 index ff1ec63..d609277 100644 --- a/tzfile.5 +++ b/tzfile.5 @@ -154,6 +154,19 @@ First, the hours part of its transition times may be signed and range from from 0 through 24. Second, DST is in effect all year if it starts January 1 at 00:00 and ends December 31 at 24:00 plus the difference between daylight saving and standard time. +.PP +Also, for version-3-format time zone files, the version-2 header and +data are optionally followed by a section containing auxiliary +meta-information that is not needed to process time stamps. This +section, if present, consists of the four magic bytes "=TZ\en" +followed by zero or more newline-terminated byte strings, each +containing a name-value pair separated by "=". Names consist of ASCII +letters, digits and underscores, and start with a letter; duplicate +names are not allowed. Two common names are "name", the Zone name for +the data, and "version", the version number. Values consist of any +bytes except NUL, newline, and backslash; however, newline and +backslash can represented via the two-byte strings "\en" and "\e\e" +respectively. .SH SEE ALSO newctime(3), newtzset(3) .\" This file is in the public domain, so clarified as of diff --git a/zic.8 b/zic.8 index 602c3c9..b1d3348 100644 --- a/zic.8 +++ b/zic.8 @@ -15,6 +15,11 @@ zic \- time zone compiler .B \-l .I localtime ] [ +.B \-n +] [ +.B \-o +.IB name = value +] [ .B \-p .I posixrules ] [ @@ -62,6 +67,15 @@ will act as if the input contained a link line of the form .ti +.5i Link \fItimezone\fP localtime .TP +.B "\-n" +Store each zone's name into its generated file, as meta-information +with the name "name" and value the zone's name. +.TP +.BI "\-o " name = value +Store the given name-value pair into the generated file, as +meta-information. This option can be repeated, once for each distinct +name. +.TP .BI "\-p " timezone Use the given time zone's rules when handling POSIX-format time zone environment variables. diff --git a/zic.c b/zic.c index 9939195..eefa1fb 100644 --- a/zic.c +++ b/zic.c @@ -9,6 +9,7 @@ #include "tzfile.h"
#include <stdarg.h> +#include <stddef.h>
#define ZIC_VERSION '3'
@@ -140,6 +141,9 @@ static int yearistype(int year, const char * type); static int charcnt; static int errors; static const char * filename; +static const char ** genoption; +static int genoptions; +static int genname; static int leapcnt; static int leapseen; static zic_t leapminyear; @@ -432,7 +436,8 @@ static _Noreturn void usage(FILE *stream, int status) { (void) fprintf(stream, _("%s: usage is %s \ -[ --version ] [ --help ] [ -v ] [ -l localtime ] [ -p posixrules ] \\\n\ +[ --version ] [ --help ] [ -v ] [ -l localtime ]\\\n\ +\t[ -n ] [ -o name=value ]... [ -p posixrules ] \\\n\ \t[ -d directory ] [ -L leapseconds ] [ -y yearistype ] [ filename ... ]\n\ \n\ Report bugs to %s.\n"), @@ -446,6 +451,31 @@ static const char * directory; static const char * leapsec; static const char * yitcommand;
+static int +addgenoption(char const *option) +{ + register char const *o = option; + register ptrdiff_t namelen; + register int i; + if (! (isascii (*o) && isalpha(*o))) + return 0; + while (*++o != '=') + if (! (isascii (*o) && (isalnum(*o) || *o == '_'))) + return 0; + namelen = o - option; + if (INT_MAX < namelen) + return 0; /* fprintf won't work. */ + if (namelen == sizeof "name" - 1 + && memcmp(option, "name", namelen) == 0) + return 0; + for (i = 0; i < genoptions; i++) + if (strncmp(genoption[i], option, namelen + 1) == 0) + return 0; + genoption = erealloc(genoption, (genoptions + 1) * sizeof *genoption); + genoption[genoptions++] = option; + return 1; +} + int main(int argc, char **argv) { @@ -476,7 +506,7 @@ main(int argc, char **argv) } else if (strcmp(argv[i], "--help") == 0) { usage(stdout, EXIT_SUCCESS); } - while ((c = getopt(argc, argv, "d:l:p:L:vsy:")) != EOF && c != -1) + while ((c = getopt(argc, argv, "d:l:p:L:no:vsy:")) != EOF && c != -1) switch (c) { default: usage(stderr, EXIT_FAILURE); @@ -500,6 +530,17 @@ _("%s: More than one -l option specified\n"), exit(EXIT_FAILURE); } break; + case 'n': + genname = TRUE; + break; + case 'o': + if (!addgenoption(optarg)) { + fprintf(stderr, + _("%s: %s: invalid -o option\n"), + progname, optarg); + exit(EXIT_FAILURE); + } + break; case 'p': if (psxrules == NULL) psxrules = optarg; @@ -1386,6 +1427,22 @@ is32(const zic_t x) }
static void +writevalue(FILE *fp, char const *v) +{ + fputc('=', fp); + + for (; *v; v++) + if (*v == '\n') + fprintf(fp, "\\n"); + else if (*v == '\\') + fprintf(fp, "\\\\"); + else + fputc(*v, fp); + + fputc('\n', fp); +} + +static void writezone(const char *const name, const char *const string) { register FILE * fp; @@ -1708,6 +1765,18 @@ writezone(const char *const name, const char *const string) (void) putc(ttisgmts[i], fp); } (void) fprintf(fp, "\n%s\n", string); + if (genname || genoptions) + fprintf(fp, "=TZ\n"); + if (genname) { + fprintf(fp, "name"); + writevalue(fp, name); + } + for (i = 0; i < genoptions; i++) { + register char const *v = genoption[i]; + register int namelen = strchr(v, '=') - v; + fprintf(fp, "%.*s", namelen, v); + writevalue(fp, v + namelen + 1); + } if (ferror(fp) || fclose(fp)) { (void) fprintf(stderr, _("%s: Error writing %s\n"), progname, fullname); -- 1.8.3.1
Arthur David Olson wrote:
That way folks who look for the POSIX string at the very end of the file will find it, and folks who look for it after the 64-bit data will too.
Thanks! Here's a patch to implement that; I've pushed this to the experimental repository.
From d74d5b95008e7a00da6fc66afc618ea8296fd43e Mon Sep 17 00:00:00 2001 From: Paul Eggert <eggert@cs.ucla.edu> Date: Mon, 9 Sep 2013 22:32:56 -0700 Subject: [PATCH] * tzfile.5: Repeat the TZ string after the name-value pairs.
Suggested by Arthur David Olson in <http://mm.icann.org/pipermail/tz/2013-September/020100.html>. * zic.c (writezone): Implement this. --- tzfile.5 | 21 ++++++++++++--------- zic.c | 22 ++++++++++++---------- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/tzfile.5 b/tzfile.5 index d609277..d9477eb 100644 --- a/tzfile.5 +++ b/tzfile.5 @@ -158,15 +158,18 @@ between daylight saving and standard time. Also, for version-3-format time zone files, the version-2 header and data are optionally followed by a section containing auxiliary meta-information that is not needed to process time stamps. This -section, if present, consists of the four magic bytes "=TZ\en" -followed by zero or more newline-terminated byte strings, each -containing a name-value pair separated by "=". Names consist of ASCII -letters, digits and underscores, and start with a letter; duplicate -names are not allowed. Two common names are "name", the Zone name for -the data, and "version", the version number. Values consist of any -bytes except NUL, newline, and backslash; however, newline and -backslash can represented via the two-byte strings "\en" and "\e\e" -respectively. +section, if present, consists of the four magic bytes "=TZ\en", +followed by zero or more newline-terminated byte strings, followed by +another copy of the newline-enclosed POSIX-TZ-style string (this last +is for the benefit of any older clients that look for the TZ string at +the very end of the file). Each newline-terminated byte string +consists of a name-value pair separated by "=" and terminated by +newline. Names consist of ASCII letters, digits and underscores, and +start with a letter; duplicate names are not allowed. Two common +names are "name", the Zone name for the data, and "version", the +version number. Values consist of any bytes except NUL, newline, and +backslash; however, newline and backslash can represented via the +two-byte strings "\en" and "\e\e" respectively. .SH SEE ALSO newctime(3), newtzset(3) .\" This file is in the public domain, so clarified as of diff --git a/zic.c b/zic.c index eefa1fb..17b9e0e 100644 --- a/zic.c +++ b/zic.c @@ -1765,17 +1765,19 @@ writezone(const char *const name, const char *const string) (void) putc(ttisgmts[i], fp); } (void) fprintf(fp, "\n%s\n", string); - if (genname || genoptions) + if (genname || genoptions) { fprintf(fp, "=TZ\n"); - if (genname) { - fprintf(fp, "name"); - writevalue(fp, name); - } - for (i = 0; i < genoptions; i++) { - register char const *v = genoption[i]; - register int namelen = strchr(v, '=') - v; - fprintf(fp, "%.*s", namelen, v); - writevalue(fp, v + namelen + 1); + if (genname) { + fprintf(fp, "name"); + writevalue(fp, name); + } + for (i = 0; i < genoptions; i++) { + register char const *v = genoption[i]; + register int namelen = strchr(v, '=') - v; + fprintf(fp, "%.*s", namelen, v); + writevalue(fp, v + namelen + 1); + } + fprintf(fp, "\n%s\n", string); } if (ferror(fp) || fclose(fp)) { (void) fprintf(stderr, _("%s: Error writing %s\n"), -- 1.8.3.1
On Monday, September 9 2013, "Paul Eggert" wrote to "Arthur David Olson, tz@iana.org" saying:
+section, if present, consists of the four magic bytes "=TZ\en", +followed by zero or more newline-terminated byte strings, followed by +another copy of the newline-enclosed POSIX-TZ-style string (this last +is for the benefit of any older clients that look for the TZ string at +the very end of the file). Each newline-terminated byte string +consists of a name-value pair separated by "=" and terminated by +newline. Names consist of ASCII letters, digits and underscores, and +start with a letter; duplicate names are not allowed. Two common +names are "name", the Zone name for the data, and "version", the +version number. Values consist of any bytes except NUL, newline, and +backslash; however, newline and backslash can represented via the +two-byte strings "\en" and "\e\e" respectively.
I assume "\e" is supposed to be interpreted in the C sense, i.e. as ASCII 27 (ESC)? If we go with this format (and not Zephram's proposed variant, or something else) I suggest that this be documented explicitly. I also think that the all the valid meta-data name/value pairs need to be explicitly documented (both syntactically and semantically), not just a few examples given. Otherwise it's unclear whether non-zic zonefile compilers are allowed to invent their own meta-data names, or what format they have to follow for the defined ones' values. I also suggest that we have a meta-data entry giving the name and version of the program that generated the zonefile. -- Jonathan Lennox lennox@cs.columbia.edu
Paul Eggert wrote:
+Also, for version-3-format time zone files, the version-2 header and +data are optionally followed by a section containing auxiliary +meta-information
Version-1 and version-2 tzfile formats have the somewhat-useful feature of being self-delimiting: you can read up to the end of the tzfile and no further without requiring lookahead. It would be wise for version 3 to retain this feature. So the aux-data section shouldn't be optional, and should have an explicit terminator.
+ newline-terminated byte strings,
Using newlines and an escaping system is unnecessarily baroque for a binary file format. I suggest NUL termination and no escaping. Taking both of these changes together, the documentation would be: For version-3 format tzfiles, the version-2 data is followed by a section containing auxiliary meta-information that is not needed to process time stamps. This section consists of zero or more name-value strings, followed by a final NUL. Each name-value string consists of a name, "=", value, and terminating NUL. Names consist of ASCII letters, digits and underscores, and start with a letter; duplicate names are not allowed. Two common names are "name", the Zone name for the data, and "version", the version number of the database from which the data came. Values consist of any bytes except NUL. -zefram
On Tue, 10 Sep 2013, Zefram wrote:
Paul Eggert wrote:
+Also, for version-3-format time zone files, the version-2 header and +data are optionally followed by a section containing auxiliary +meta-information
Version-1 and version-2 tzfile formats have the somewhat-useful feature of being self-delimiting: you can read up to the end of the tzfile and no further without requiring lookahead. It would be wise for version 3 to retain this feature. So the aux-data section shouldn't be optional, and should have an explicit terminator.
I agree, and I'd also suggest an explict length, so that readers who want to skip over this information can know how much to skip, without needing to parse it all. It might also be useful to provide some guarantees of forward compatibility. For example, one could state that future extensions to the format may insert additional information between the new keyword=value list and the new second copy of the POSIX TZ string, but will retain the following overall structure for as long as possible, until a major change in format is required: header; 32-bit version of transition data; 64-bit version of transition data [for tzh_version >= '2']; POSIX TZ string [for tzh_version >= '2']; list of keyword=value pairs [for tzh_version >= '3']; [new data for tzh_version >= '4' may appear here]; second copy of POSIX TZ string [for tzh_version >= '3', guaranteed to be at the end of the file unless there is a major change to the format]; Now is the time to tell implementors: you may look for the newline-delimited POSIX TZ string either just after the 64-bit version of the transition data, or at the very end of the file, but don't look for it just after the keyword=value list, because tzh_version >= '4' will probably put something else there. --apb (Alan Barrett)
Thanks for those suggestions. I've pushed this patch to implement most of them. I didn't add documentation for future names as it's not clear to me what they'll be or what we'll need. And I didn't suggest putting the program-generator version into the data, as my experience is that this costs more regression hassles than it's worth. I dislike compilers that scribble their names into the object code. Of course people are free to add -o options to do that and if there's sufficient interest we should probably standardize it.
From fe2db73d5f454db17a1fa4e646a79a0e5e2d9756 Mon Sep 17 00:00:00 2001 From: Paul Eggert <eggert@cs.ucla.edu> Date: Tue, 10 Sep 2013 12:49:45 -0700 Subject: [PATCH] Improve tzfile format as suggested by Zefram and Alan Barrett.
* tzfile.5: Be a bit more careful about wording for integers. Use binary terminators for the byte strings, not newlines. Add a size field. Terminate by another NUL. Mention where future extensions are expected to go. * zic.c (writezone): Implement this. (addgenoption): Omit namelen check; no longer needed. (writevalue): Remove. --- tzfile.5 | 73 ++++++++++++++++++++++++++++++++-------------------------------- zic.c | 49 +++++++++++++++---------------------------- 2 files changed, 54 insertions(+), 68 deletions(-) diff --git a/tzfile.5 b/tzfile.5 index d9477eb..edfa475 100644 --- a/tzfile.5 +++ b/tzfile.5 @@ -12,9 +12,8 @@ time zone information files, followed by a character identifying the version of the file's format (as of 2013, either an ASCII NUL, or '2', or '3') followed by fifteen bytes containing zeroes reserved for future use, -followed by six four-byte values of type -.BR long , -written in a ``standard'' byte order +followed by six four-byte integer values +written in a "standard" byte order (the high-order byte of the value is written first). These values are, in order: @@ -42,18 +41,15 @@ stored in the file. .PP The above header is followed by .I tzh_timecnt -four-byte values of type -.BR long , -sorted in ascending order. -These values are written in ``standard'' byte order. +four-byte signed integer values sorted in ascending order. +These values are written in "standard" byte order. Each is used as a transition time (as returned by .IR time (2)) at which the rules for computing local time change. Next come .I tzh_timecnt -one-byte values of type -.BR "unsigned char" ; -each one tells which of the different types of ``local time'' types +one-byte unsigned integer values; +each one tells which of the different types of "local time" types described in the file is associated with the same-indexed transition time. These values serve as indices into an array of .I ttinfo @@ -64,19 +60,17 @@ these structures are defined as follows: .in +.5i .sp .nf -.ta .5i +\w'unsigned int\0\0'u +.ta .5i +\w'unsigned char\0\0'u struct ttinfo { - long tt_gmtoff; - int tt_isdst; - unsigned int tt_abbrind; + int32_t tt_gmtoff; + unsigned char tt_isdst; + unsigned char tt_abbrind; }; .in -.5i .fi .sp -Each structure is written as a four-byte value for -.I tt_gmtoff -of type -.BR long , +Each structure is written as a four-byte signed integer value for +.IR tt_gmtoff , in a standard byte order, followed by a one-byte value for .I tt_isdst and a one-byte value for @@ -140,36 +134,43 @@ For version-2-format time zone files, the above header and data are followed by a second header and data, identical in format except that eight bytes are used for each transition time or leap second time. -After the second header and data comes a newline-enclosed, +After the second header and data, +and just before the end of the file, comes a newline-enclosed, POSIX-TZ-environment-variable-style string for use in handling instants after the last transition time stored in the file (with nothing between the newlines if there is no POSIX representation for such instants). .PP -For version-3-format time zone files, the POSIX-TZ-style string may -use two minor extensions to the POSIX TZ format, as described in +Version-3-format time zone files have the following additions: +.IP +The POSIX-TZ-style string may use two minor extensions to the +POSIX TZ format, as described in .IR newtzset (3). First, the hours part of its transition times may be signed and range from \(mi167 through 167 instead of the POSIX-required unsigned values from 0 through 24. Second, DST is in effect all year if it starts January 1 at 00:00 and ends December 31 at 24:00 plus the difference between daylight saving and standard time. +.IP +The newline-enclosed POSIX-TZ-style string is preceded by a section +containing auxiliary meta-information that is not needed to process +time stamps. This section consists of another copy of the +newline-enclosed POSIX-TZ-style string (this is for the benefit of +version-2-only clients), followed by a four-byte integer size value, +followed by zero or more NUL-terminated byte strings, followed by an +additional NUL. The size value is the total number of bytes in all +the byte strings, including the trailing NULs at the end of the +strings, but not including the additional NUL. Each byte string +consists of a name-value pair separated by "=". Names consist of +ASCII letters, digits and underscores, and start with a letter; +duplicate names are not allowed. Two common names are "name", the +Zone name for the data, and "version", the data's version number. +Values can contain any bytes except NUL. .PP -Also, for version-3-format time zone files, the version-2 header and -data are optionally followed by a section containing auxiliary -meta-information that is not needed to process time stamps. This -section, if present, consists of the four magic bytes "=TZ\en", -followed by zero or more newline-terminated byte strings, followed by -another copy of the newline-enclosed POSIX-TZ-style string (this last -is for the benefit of any older clients that look for the TZ string at -the very end of the file). Each newline-terminated byte string -consists of a name-value pair separated by "=" and terminated by -newline. Names consist of ASCII letters, digits and underscores, and -start with a letter; duplicate names are not allowed. Two common -names are "name", the Zone name for the data, and "version", the -version number. Values consist of any bytes except NUL, newline, and -backslash; however, newline and backslash can represented via the -two-byte strings "\en" and "\e\e" respectively. +Future additions to the format may insert more data just before the +newline-enclosed POSIX-TZ-style string at the end of the file, so +clients should not assume that this string immediately follows +the auxiliary meta-information. .SH SEE ALSO newctime(3), newtzset(3) .\" This file is in the public domain, so clarified as of diff --git a/zic.c b/zic.c index 17b9e0e..e59a15f 100644 --- a/zic.c +++ b/zic.c @@ -463,13 +463,11 @@ addgenoption(char const *option) if (! (isascii (*o) && (isalnum(*o) || *o == '_'))) return 0; namelen = o - option; - if (INT_MAX < namelen) - return 0; /* fprintf won't work. */ if (namelen == sizeof "name" - 1 && memcmp(option, "name", namelen) == 0) return 0; for (i = 0; i < genoptions; i++) - if (strncmp(genoption[i], option, namelen + 1) == 0) + if (strncmp(genoption[i], option, namelen + 1) == 0) return 0; genoption = erealloc(genoption, (genoptions + 1) * sizeof *genoption); genoption[genoptions++] = option; @@ -1427,22 +1425,6 @@ is32(const zic_t x) } static void -writevalue(FILE *fp, char const *v) -{ - fputc('=', fp); - - for (; *v; v++) - if (*v == '\n') - fprintf(fp, "\\n"); - else if (*v == '\\') - fprintf(fp, "\\\\"); - else - fputc(*v, fp); - - fputc('\n', fp); -} - -static void writezone(const char *const name, const char *const string) { register FILE * fp; @@ -1450,6 +1432,7 @@ writezone(const char *const name, const char *const string) register int leapcnt32, leapi32; register int timecnt32, timei32; register int pass; + register int_fast32_t genlen; static char * fullname; static const struct tzhead tzh0; static struct tzhead tzh; @@ -1765,20 +1748,22 @@ writezone(const char *const name, const char *const string) (void) putc(ttisgmts[i], fp); } (void) fprintf(fp, "\n%s\n", string); - if (genname || genoptions) { - fprintf(fp, "=TZ\n"); - if (genname) { - fprintf(fp, "name"); - writevalue(fp, name); - } - for (i = 0; i < genoptions; i++) { - register char const *v = genoption[i]; - register int namelen = strchr(v, '=') - v; - fprintf(fp, "%.*s", namelen, v); - writevalue(fp, v + namelen + 1); - } - fprintf(fp, "\n%s\n", string); + + genlen = 0; + if (genname) + genlen += sizeof "name=" + strlen (name); + for (i = 0; i < genoptions; i++) + genlen += strlen (genoption[i]) + 1; + puttzcode(genlen, fp); + + if (genname) + fprintf(fp, "name=%s%c", name, 0); + for (i = 0; i < genoptions; i++) { + register char const *v = genoption[i]; + register int namelen = strchr(v, '=') - v; + fprintf(fp, "%s%c", v, 0); } + fprintf(fp, "%c\n%s\n", 0, string); if (ferror(fp) || fclose(fp)) { (void) fprintf(stderr, _("%s: Error writing %s\n"), progname, fullname); -- 1.8.1.2
Paul Eggert wrote:
Do you think this is a good idea?
I'm dubious about it. Firstly, as identification of the tzfile goes, the db version is more distantly bound than the zone name. If the version is to be included, the zone name should be there as well. You've already had that suggested. Secondly, I'm wary of the purposes to which this information might be put. It looks like you intend it for debugging/tracing purposes, which is OK. But people might end up using it in semantically significant ways, such as selecting a tzfile by searching for the one with a particular version/name tag. Such use should be discouraged. If you explicate in tzfile(5) that these tags do not affect the meaning of the file, that should be OK. -zefram
Zefram wrote:
Secondly, I'm wary of the purposes to which this information might be put. It looks like you intend it for debugging/tracing purposes, which is OK. But people might end up using it in semantically significant ways, such as selecting a tzfile by searching for the one with a particular version/name tag. Such use should be discouraged. If you explicate in tzfile(5) that these tags do not affect the meaning of the file, that should be OK.
How is the 'current' version of the binary file identified? If one assumes that the latest version is the only 'correct' one, the problem is ensuring that IS what one is looking at? If a distribution has not updated this then the problems of old data arise. One of my other services 'dials home' and asks what the latest version number is and will warn if the local copy is older. That uses a timestamp in the data file. Managing the history of changes is a separate problem, so the embedded stamp can only define 'latest' snapshot? -- Lester Caine - G8HFL ----------------------------- Contact - http://lsces.co.uk/wiki/?page=contact L.S.Caine Electronic Services - http://lsces.co.uk EnquirySolve - http://enquirysolve.com/ Model Engineers Digital Workshop - http://medw.co.uk Rainbow Digital Media - http://rainbowdigitalmedia.co.uk
participants (8)
-
Alan Barrett -
Arthur David Olson -
Ephraim Silverberg -
lennox@cs.columbia.edu -
Lester Caine -
Nathan Stratton Treadway -
Paul Eggert -
Zefram