[PROPOSED PATCH] Support %z in Zone formats
This extends the zic input format to add support for %z, which expands to a UTC offset in as-short-as-possible ISO 8601 format. It's intended to better support zones that do not have an established abbreviation already. This is a change only to zic; it does not affect the data, which can still be compiled with older zic versions. * NEWS, zic.8: Document this. * zic.c (struct zone): New member z_format_specifier. (PERCENT_Z_LEN_BOUND): New constant. (max_abbrvar_len): Initialize to it, rather than to 0. (associate, inzsub, doabbr): Add support for %z. (abbroffset): New function. (doabbr): 2nd arg is now struct zone *, not a char *. All callers changed. --- NEWS | 5 +++++ zic.8 | 13 +++++++++++ zic.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++------------- 3 files changed, 85 insertions(+), 15 deletions(-) diff --git a/NEWS b/NEWS index b7593e7..9eff28c 100644 --- a/NEWS +++ b/NEWS @@ -25,6 +25,11 @@ Unreleased, experimental changes (Thanks to Jon Skeet and Arthur David Olson.) Constraints on simultaneity are now documented. + The two characters '%z' in a zone format now stand for the UTC + offset, e.g., '-07' for seven hours behind UTC and '+0530' for + five hours and thirty minutes ahead. This better supports time + zone abbreviations conforming to POSIX.1-2001 and later. + Changes affecting installed data files Data entries have been simplified for Atlantic/Canary, Europe/Simferopol, diff --git a/zic.8 b/zic.8 index dfb01db..94b6753 100644 --- a/zic.8 +++ b/zic.8 @@ -350,6 +350,19 @@ The pair of characters is used to show where the .q "variable part" of the time zone abbreviation goes. +Alternately, a format can use the pair of characters +.B %z +to stand for the UTC offset in the form +.RI \(+- hh , +.RI \(+- hhmm , +or +.RI \(+- hhmmss , +using the shortest form that does not lose information, where +.IR hh , +.IR mm , +and +.I ss +are the hours, minutes, and seconds east (+) or west (\(mi) of UTC. Alternately, a slash (/) separates standard and daylight abbreviations. diff --git a/zic.c b/zic.c index 6854033..3c4ae93 100644 --- a/zic.c +++ b/zic.c @@ -76,6 +76,7 @@ struct zone { zic_t z_gmtoff; const char * z_rule; const char * z_format; + char z_format_specifier; zic_t z_stdoff; @@ -130,6 +131,9 @@ static void rulesub(struct rule * rp, static zic_t tadd(zic_t t1, zic_t t2); static bool yearistype(int year, const char * type); +/* Bound on length of what %z can expand to. */ +enum { PERCENT_Z_LEN_BOUND = sizeof "+995959" - 1 }; + static int charcnt; static bool errors; static bool warnings; @@ -139,7 +143,7 @@ static bool leapseen; static zic_t leapminyear; static zic_t leapmaxyear; static int linenum; -static int max_abbrvar_len; +static int max_abbrvar_len = PERCENT_Z_LEN_BOUND; static int max_format_len; static zic_t max_year; static zic_t min_year; @@ -955,7 +959,7 @@ associate(void) ** Note, though, that if there's no rule, ** a '%s' in the format is a bad thing. */ - if (strchr(zp->z_format, '%') != 0) + if (zp->z_format_specifier == 's') error("%s", _("%s in ruleless zone")); } } @@ -1170,6 +1174,7 @@ static bool inzsub(char **fields, int nfields, bool iscont) { register char * cp; + char * cp1; static struct zone z; register int i_gmtoff, i_rule, i_format; register int i_untilyear, i_untilmonth; @@ -1201,13 +1206,21 @@ inzsub(char **fields, int nfields, bool iscont) z.z_linenum = linenum; z.z_gmtoff = gethms(fields[i_gmtoff], _("invalid UT offset"), true); if ((cp = strchr(fields[i_format], '%')) != 0) { - if (*++cp != 's' || strchr(cp, '%') != 0) { + if ((*++cp != 's' && *cp != 'z') || strchr(cp, '%') + || strchr(fields[i_format], '/')) { error(_("invalid abbreviation format")); return false; } } z.z_rule = ecpyalloc(fields[i_rule]); - z.z_format = ecpyalloc(fields[i_format]); + z.z_format = cp1 = ecpyalloc(fields[i_format]); + z.z_format_specifier = cp ? *cp : '\0'; + if (z.z_format_specifier == 'z') { + if (noise) + warning(_("format '%s' not handled by pre-2015 versions of zic"), + z.z_format); + cp1[cp - fields[i_format]] = 's'; + } if (max_format_len < strlen(z.z_format)) max_format_len = strlen(z.z_format); hasuntil = nfields > i_untilyear; @@ -1890,19 +1903,59 @@ writezone(const char *const name, const char *const string, char version) free(fullname); } +static char const * +abbroffset(char *buf, zic_t offset) +{ + char sign = '+'; + int seconds, minutes; + + if (offset < 0) { + offset = -offset; + sign = '-'; + } + + seconds = offset % SECSPERMIN; + offset /= SECSPERMIN; + minutes = offset % MINSPERHOUR; + offset /= MINSPERHOUR; + if (100 <= offset) { + error(_("%%z UTC offset magnitude exceeds 99:59:59")); + return "%z"; + } else { + char *p = buf; + *p++ = sign; + *p++ = '0' + offset / 10; + *p++ = '0' + offset % 10; + if (minutes | seconds) { + *p++ = '0' + minutes / 10; + *p++ = '0' + minutes % 10; + if (seconds) { + *p++ = '0' + seconds / 10; + *p++ = '0' + seconds % 10; + } + } + *p = '\0'; + return buf; + } +} + static size_t -doabbr(char *const abbr, const char *const format, const char *const letters, +doabbr(char *abbr, struct zone const *zp, char const *letters, bool isdst, bool doquotes) { register char * cp; register char * slashp; register size_t len; + char const *format = zp->z_format; slashp = strchr(format, '/'); if (slashp == NULL) { - if (letters == NULL) - strcpy(abbr, format); - else sprintf(abbr, format, letters); + char letterbuf[PERCENT_Z_LEN_BOUND + 1]; + if (zp->z_format_specifier == 'z') + letters = abbroffset(letterbuf, -zp->z_gmtoff); + else if (!letters) + letters = "%s"; + sprintf(abbr, format, letters); } else if (isdst) { strcpy(abbr, slashp + 1); } else { @@ -2127,7 +2180,7 @@ stringzone(char *result, const struct zone *const zpfirst, const int zonecount) if (stdrp == NULL && (zp->z_nrules != 0 || zp->z_stdoff != 0)) return -1; abbrvar = (stdrp == NULL) ? "" : stdrp->r_abbrvar; - len = doabbr(result, zp->z_format, abbrvar, false, true); + len = doabbr(result, zp, abbrvar, false, true); offsetlen = stringoffset(result + len, -zp->z_gmtoff); if (! offsetlen) { result[0] = '\0'; @@ -2136,7 +2189,7 @@ stringzone(char *result, const struct zone *const zpfirst, const int zonecount) len += offsetlen; if (dstrp == NULL) return compat; - len += doabbr(result + len, zp->z_format, dstrp->r_abbrvar, true, true); + len += doabbr(result + len, zp, dstrp->r_abbrvar, true, true); if (dstrp->r_stdoff != SECSPERMIN * MINSPERHOUR) { offsetlen = stringoffset(result + len, -(zp->z_gmtoff + dstrp->r_stdoff)); @@ -2307,8 +2360,7 @@ outzone(const struct zone * const zpfirst, const int zonecount) startoff = zp->z_gmtoff; if (zp->z_nrules == 0) { stdoff = zp->z_stdoff; - doabbr(startbuf, zp->z_format, - NULL, stdoff != 0, false); + doabbr(startbuf, zp, NULL, stdoff != 0, false); type = addtype(oadd(zp->z_gmtoff, stdoff), startbuf, stdoff != 0, startttisstd, startttisgmt); @@ -2400,7 +2452,7 @@ outzone(const struct zone * const zpfirst, const int zonecount) if (ktime < starttime) { startoff = oadd(zp->z_gmtoff, stdoff); - doabbr(startbuf, zp->z_format, + doabbr(startbuf, zp, rp->r_abbrvar, rp->r_stdoff != 0, false); @@ -2410,7 +2462,7 @@ outzone(const struct zone * const zpfirst, const int zonecount) startoff == oadd(zp->z_gmtoff, stdoff)) { doabbr(startbuf, - zp->z_format, + zp, rp->r_abbrvar, rp->r_stdoff != false, @@ -2419,7 +2471,7 @@ outzone(const struct zone * const zpfirst, const int zonecount) } eats(zp->z_filename, zp->z_linenum, rp->r_filename, rp->r_linenum); - doabbr(ab, zp->z_format, rp->r_abbrvar, + doabbr(ab, zp, rp->r_abbrvar, rp->r_stdoff != 0, false); offset = oadd(zp->z_gmtoff, rp->r_stdoff); type = addtype(offset, ab, rp->r_stdoff != 0, -- 2.1.4
I've implemented support for this in Noda Time (but it's easily removed if we decide not to go ahead with it). What's the normal lead time for a change like this? The commit message states:
This is a change only to zic; it does not affect the data, which can still be compiled with older zic versions.
... but presumably that's just saying that you haven't changed the data *yet*. At some point, we're expecting the data to take advantage of this functionality, I assume? If so, do we have an expectation of when we'd first do so? Jon On 20 July 2015 at 04:23, Paul Eggert <eggert@cs.ucla.edu> wrote:
This extends the zic input format to add support for %z, which expands to a UTC offset in as-short-as-possible ISO 8601 format. It's intended to better support zones that do not have an established abbreviation already. This is a change only to zic; it does not affect the data, which can still be compiled with older zic versions. * NEWS, zic.8: Document this. * zic.c (struct zone): New member z_format_specifier. (PERCENT_Z_LEN_BOUND): New constant. (max_abbrvar_len): Initialize to it, rather than to 0. (associate, inzsub, doabbr): Add support for %z. (abbroffset): New function. (doabbr): 2nd arg is now struct zone *, not a char *. All callers changed. --- NEWS | 5 +++++ zic.8 | 13 +++++++++++ zic.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++------------- 3 files changed, 85 insertions(+), 15 deletions(-)
diff --git a/NEWS b/NEWS index b7593e7..9eff28c 100644 --- a/NEWS +++ b/NEWS @@ -25,6 +25,11 @@ Unreleased, experimental changes (Thanks to Jon Skeet and Arthur David Olson.) Constraints on simultaneity are now documented.
+ The two characters '%z' in a zone format now stand for the UTC + offset, e.g., '-07' for seven hours behind UTC and '+0530' for + five hours and thirty minutes ahead. This better supports time + zone abbreviations conforming to POSIX.1-2001 and later. + Changes affecting installed data files
Data entries have been simplified for Atlantic/Canary, Europe/Simferopol, diff --git a/zic.8 b/zic.8 index dfb01db..94b6753 100644 --- a/zic.8 +++ b/zic.8 @@ -350,6 +350,19 @@ The pair of characters is used to show where the .q "variable part" of the time zone abbreviation goes. +Alternately, a format can use the pair of characters +.B %z +to stand for the UTC offset in the form +.RI \(+- hh , +.RI \(+- hhmm , +or +.RI \(+- hhmmss , +using the shortest form that does not lose information, where +.IR hh , +.IR mm , +and +.I ss +are the hours, minutes, and seconds east (+) or west (\(mi) of UTC. Alternately, a slash (/) separates standard and daylight abbreviations. diff --git a/zic.c b/zic.c index 6854033..3c4ae93 100644 --- a/zic.c +++ b/zic.c @@ -76,6 +76,7 @@ struct zone { zic_t z_gmtoff; const char * z_rule; const char * z_format; + char z_format_specifier;
zic_t z_stdoff;
@@ -130,6 +131,9 @@ static void rulesub(struct rule * rp, static zic_t tadd(zic_t t1, zic_t t2); static bool yearistype(int year, const char * type);
+/* Bound on length of what %z can expand to. */ +enum { PERCENT_Z_LEN_BOUND = sizeof "+995959" - 1 }; + static int charcnt; static bool errors; static bool warnings; @@ -139,7 +143,7 @@ static bool leapseen; static zic_t leapminyear; static zic_t leapmaxyear; static int linenum; -static int max_abbrvar_len; +static int max_abbrvar_len = PERCENT_Z_LEN_BOUND; static int max_format_len; static zic_t max_year; static zic_t min_year; @@ -955,7 +959,7 @@ associate(void) ** Note, though, that if there's no rule, ** a '%s' in the format is a bad thing. */ - if (strchr(zp->z_format, '%') != 0) + if (zp->z_format_specifier == 's') error("%s", _("%s in ruleless zone")); } } @@ -1170,6 +1174,7 @@ static bool inzsub(char **fields, int nfields, bool iscont) { register char * cp; + char * cp1; static struct zone z; register int i_gmtoff, i_rule, i_format; register int i_untilyear, i_untilmonth; @@ -1201,13 +1206,21 @@ inzsub(char **fields, int nfields, bool iscont) z.z_linenum = linenum; z.z_gmtoff = gethms(fields[i_gmtoff], _("invalid UT offset"), true); if ((cp = strchr(fields[i_format], '%')) != 0) { - if (*++cp != 's' || strchr(cp, '%') != 0) { + if ((*++cp != 's' && *cp != 'z') || strchr(cp, '%') + || strchr(fields[i_format], '/')) { error(_("invalid abbreviation format")); return false; } } z.z_rule = ecpyalloc(fields[i_rule]); - z.z_format = ecpyalloc(fields[i_format]); + z.z_format = cp1 = ecpyalloc(fields[i_format]); + z.z_format_specifier = cp ? *cp : '\0'; + if (z.z_format_specifier == 'z') { + if (noise) + warning(_("format '%s' not handled by pre-2015 versions of zic"), + z.z_format); + cp1[cp - fields[i_format]] = 's'; + } if (max_format_len < strlen(z.z_format)) max_format_len = strlen(z.z_format); hasuntil = nfields > i_untilyear; @@ -1890,19 +1903,59 @@ writezone(const char *const name, const char *const string, char version) free(fullname); }
+static char const * +abbroffset(char *buf, zic_t offset) +{ + char sign = '+'; + int seconds, minutes; + + if (offset < 0) { + offset = -offset; + sign = '-'; + } + + seconds = offset % SECSPERMIN; + offset /= SECSPERMIN; + minutes = offset % MINSPERHOUR; + offset /= MINSPERHOUR; + if (100 <= offset) { + error(_("%%z UTC offset magnitude exceeds 99:59:59")); + return "%z"; + } else { + char *p = buf; + *p++ = sign; + *p++ = '0' + offset / 10; + *p++ = '0' + offset % 10; + if (minutes | seconds) { + *p++ = '0' + minutes / 10; + *p++ = '0' + minutes % 10; + if (seconds) { + *p++ = '0' + seconds / 10; + *p++ = '0' + seconds % 10; + } + } + *p = '\0'; + return buf; + } +} + static size_t -doabbr(char *const abbr, const char *const format, const char *const letters, +doabbr(char *abbr, struct zone const *zp, char const *letters, bool isdst, bool doquotes) { register char * cp; register char * slashp; register size_t len; + char const *format = zp->z_format;
slashp = strchr(format, '/'); if (slashp == NULL) { - if (letters == NULL) - strcpy(abbr, format); - else sprintf(abbr, format, letters); + char letterbuf[PERCENT_Z_LEN_BOUND + 1]; + if (zp->z_format_specifier == 'z') + letters = abbroffset(letterbuf, -zp->z_gmtoff); + else if (!letters) + letters = "%s"; + sprintf(abbr, format, letters); } else if (isdst) { strcpy(abbr, slashp + 1); } else { @@ -2127,7 +2180,7 @@ stringzone(char *result, const struct zone *const zpfirst, const int zonecount) if (stdrp == NULL && (zp->z_nrules != 0 || zp->z_stdoff != 0)) return -1; abbrvar = (stdrp == NULL) ? "" : stdrp->r_abbrvar; - len = doabbr(result, zp->z_format, abbrvar, false, true); + len = doabbr(result, zp, abbrvar, false, true); offsetlen = stringoffset(result + len, -zp->z_gmtoff); if (! offsetlen) { result[0] = '\0'; @@ -2136,7 +2189,7 @@ stringzone(char *result, const struct zone *const zpfirst, const int zonecount) len += offsetlen; if (dstrp == NULL) return compat; - len += doabbr(result + len, zp->z_format, dstrp->r_abbrvar, true, true); + len += doabbr(result + len, zp, dstrp->r_abbrvar, true, true); if (dstrp->r_stdoff != SECSPERMIN * MINSPERHOUR) { offsetlen = stringoffset(result + len, -(zp->z_gmtoff + dstrp->r_stdoff)); @@ -2307,8 +2360,7 @@ outzone(const struct zone * const zpfirst, const int zonecount) startoff = zp->z_gmtoff; if (zp->z_nrules == 0) { stdoff = zp->z_stdoff; - doabbr(startbuf, zp->z_format, - NULL, stdoff != 0, false); + doabbr(startbuf, zp, NULL, stdoff != 0, false); type = addtype(oadd(zp->z_gmtoff, stdoff), startbuf, stdoff != 0, startttisstd, startttisgmt); @@ -2400,7 +2452,7 @@ outzone(const struct zone * const zpfirst, const int zonecount) if (ktime < starttime) { startoff = oadd(zp->z_gmtoff, stdoff); - doabbr(startbuf, zp->z_format, + doabbr(startbuf, zp, rp->r_abbrvar, rp->r_stdoff != 0, false); @@ -2410,7 +2462,7 @@ outzone(const struct zone * const zpfirst, const int zonecount) startoff == oadd(zp->z_gmtoff, stdoff)) { doabbr(startbuf, - zp->z_format, + zp,
rp->r_abbrvar,
rp->r_stdoff != false, @@ -2419,7 +2471,7 @@ outzone(const struct zone * const zpfirst, const int zonecount) } eats(zp->z_filename, zp->z_linenum, rp->r_filename, rp->r_linenum); - doabbr(ab, zp->z_format, rp->r_abbrvar, + doabbr(ab, zp, rp->r_abbrvar, rp->r_stdoff != 0, false); offset = oadd(zp->z_gmtoff, rp->r_stdoff); type = addtype(offset, ab, rp->r_stdoff != 0, -- 2.1.4
Jon Skeet wrote:
At some point, we're expecting the data to take advantage of this functionality, I assume? If so, do we have an expectation of when we'd first do so?
I was thinking of it, yes. In the past we've sometimes waited years. It depends on how urgent the data change is. This one doesn't seem urgent now, but one never knows.
On 20/07/2015 04:23, Paul Eggert wrote:
This extends the zic input format to add support for %z, which expands to a UTC offset in as-short-as-possible ISO 8601 format.
Is there a discrepancy between the documented and actual sign used for the %z format? With this change, the zic.8 man page now states under FORMAT: 'hh, mm, and ss are the hours, minutes, and seconds east (+) or west (−) of UTC.' However, the doabbr function negates zp->z_gmtoff before calling abbroffset:
+ if (zp->z_format_specifier == 'z') + letters = abbroffset(letterbuf, -zp->z_gmtoff);
abbroffset uses '+' as the sign for a positive offset and '-' for a negative offset:
+static char const * +abbroffset(char *buf, zic_t offset) +{ + char sign = '+'; + + if (offset < 0) { + offset = -offset; + sign = '-'; + }
Therefore, won't zic be generating + for west of UTC and − for east instead of the documented behaviour? Phil
Phil Ross wrote:
Therefore, won't zic be generating + for west of UTC and − for east instead of the documented behaviour?
Thanks, and you're right, it's backwards. Also, the DST offset is missing. I don't know how zic.c got it wrong -- I did test it, but apparently I installed something other than what I tested. Good thing tzdata isn't using this feature yet. Proposed further patch attached.
participants (3)
-
Jon Skeet -
Paul Eggert -
Phil Ross