[PROPOSED] Generalize data format upgrade procedure
* Makefile (XDST): Remove, replacing with the more-general ... (DATAFORM): ... new macro. All uses changed. (fulldata.zi, pdstdata.zi): Remove, replacing with the more-general ... (vanguard.zi, main.zi, rearguard.zi): ... new targets. All uses changed. (check_zishrink): Fix bug that caused this test to fail when PACKRATDATA was nonempty and testing vanguard or rearguard data. * NEWS, europe: Mention changes. * ziguard.awk: Handle fractional seconds as well as negative DST offsets. Rename from zidst.awk, since it now handles issues other than just DST. All uses changed. --- Makefile | 69 ++++++++++++++++++----------------------------- NEWS | 64 ++++++++++++++++++++++++++------------------ europe | 29 +++++++++----------- zidst.awk | 50 ---------------------------------- ziguard.awk | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 166 insertions(+), 135 deletions(-) delete mode 100644 zidst.awk create mode 100644 ziguard.awk diff --git a/Makefile b/Makefile index 92ddb80..659a5b0 100644 --- a/Makefile +++ b/Makefile @@ -10,25 +10,14 @@ VERSION= unknown # Email address for bug reports. BUGEMAIL= tz@iana.org -# To install the full data, which can contain daylight saving time -# offsets that are negative (relative to standard time), use -# XDST= full -# To install data containing only positive daylight saving time -# offsets, but otherwise as close to the full data as practical, use -# XDST= pdst -XDST= pdst -# Parsers requiring DST offsets to be positive should use the file -# pdstdata.zi, which contains almost all the data of 'africa' etc., -# except with positive DST offsets. This works around a problem that -# was discovered in January 2018 with negative DST in tests for ICU -# and OpenJDK. See: -# https://mm.icann.org/pipermail/tz/2018-January/025825.html -# https://mm.icann.org/pipermail/tz/2018-January/025822.html -# Currently the 'africa' etc. files use pdst form if comments are -# ignored, to ease transition for parsers that do not support -# negative DST offsets. This is intended to change to full form at -# some point, so that full-featured zi parsers that use the 'africa' -# files will get the full data without changing anything. +# Choose source data features. To get new features right away, use: +# DATAFORM= vanguard +# To wait a while before using new features, to give downstream users +# time to upgrade zic (the default), use: +# DATAFORM= main +# To wait even longer for new features, use: +# DATAFORM= rearguard +DATAFORM= main # Change the line below for your time zone (after finding the zone you want in # the time zone files, or adding it to a time zone file). @@ -483,8 +472,8 @@ TDATA= $(YDATA) $(NDATA) $(BACKWARD) ZONETABLES= zone1970.tab zone.tab TABDATA= iso3166.tab $(TZDATA_TEXT) $(ZONETABLES) LEAP_DEPS= leapseconds.awk leap-seconds.list -TZDATA_ZI_DEPS= zidst.awk zishrink.awk version $(TDATA) $(PACKRATDATA) -DSTDATA_ZI_DEPS= zidst.awk $(TDATA) $(PACKRATDATA) +TZDATA_ZI_DEPS= ziguard.awk zishrink.awk version $(TDATA) $(PACKRATDATA) +DSTDATA_ZI_DEPS= ziguard.awk $(TDATA) $(PACKRATDATA) DATA= $(TDATA_TO_CHECK) backzone iso3166.tab leap-seconds.list \ leapseconds yearistype.sh $(ZONETABLES) AWK_SCRIPTS= checklinks.awk checktab.awk leapseconds.awk zishrink.awk @@ -522,7 +511,7 @@ VERSION_DEPS= \ SHELL= /bin/sh all: tzselect yearistype zic zdump libtz.a $(TABDATA) \ - fulldata.zi pdstdata.zi + vanguard.zi main.zi rearguard.zi ALL: all date $(ENCHILADA) @@ -558,14 +547,14 @@ version: $(VERSION_DEPS) mv $@.out $@ # These files can be tailored by setting BACKWARD, PACKRATDATA, etc. -fulldata.zi pdstdata.zi: $(DSTDATA_ZI_DEPS) - $(AWK) -v outfile='$@' -f zidst.awk $(TDATA) $(PACKRATDATA) \ +vanguard.zi main.zi rearguard.zi: $(DSTDATA_ZI_DEPS) + $(AWK) -v outfile='$@' -f ziguard.awk $(TDATA) $(PACKRATDATA) \ >$@.out mv $@.out $@ -tzdata.zi: $(XDST)data.zi version +tzdata.zi: $(DATAFORM).zi version version=`sed 1q version` && \ LC_ALL=C $(AWK) -v version="$$version" -f zishrink.awk \ - $(XDST)data.zi >$@.out + $(DATAFORM).zi >$@.out mv $@.out $@ version.h: version @@ -747,31 +736,25 @@ check_tzs: $(TZS) $(TZS_NEW) check_web: tz-how-to.html $(VALIDATE_ENV) $(VALIDATE) $(VALIDATE_FLAGS) tz-how-to.html -# The format of the source files, either full or pdst. -# Currently they are in pdst format, but this is expected to change. -SDST = pdst - -# Check that zishrink.awk does not alter the data, and that zidst.awk -# preserves $(SDST) data. +# Check that zishrink.awk does not alter the data, and that ziguard.awk +# preserves main-format data. check_zishrink: zic leapseconds $(PACKRATDATA) $(TDATA) \ - $(XDST)data.zi tzdata.zi + $(DATAFORM).zi tzdata.zi for type in posix right; do \ - mkdir -p time_t.dir/$$type time_t.dir/$$type-$(SDST) \ + mkdir -p time_t.dir/$$type time_t.dir/$$type-t \ time_t.dir/$$type-shrunk && \ case $$type in \ right) leap='-L leapseconds';; \ *) leap=;; \ esac && \ - $(ZIC) $$leap -d time_t.dir/$$type $(XDST)data.zi && \ - $(AWK) '/^Rule/' $(XDST)data.zi | \ - $(ZIC) $$leap -d time_t.dir/$$type - $(PACKRATDATA) && \ - case $(XDST) in \ - $(SDST)) \ - $(ZIC) $$leap -d time_t.dir/$$type-$(SDST) $(TDATA) && \ + $(ZIC) $$leap -d time_t.dir/$$type $(DATAFORM).zi && \ + case $(DATAFORM) in \ + main) \ + $(ZIC) $$leap -d time_t.dir/$$type-t $(TDATA) && \ $(AWK) '/^Rule/' $(TDATA) | \ - $(ZIC) $$leap -d time_t.dir/$$type-$(SDST) \ - $(XDST)data.zi && \ - diff -r time_t.dir/$$type time_t.dir/$$type-$(SDST);; \ + $(ZIC) $$leap -d time_t.dir/$$type-t - \ + $(PACKRATDATA) && \ + diff -r time_t.dir/$$type time_t.dir/$$type-t;; \ esac && \ $(ZIC) $$leap -d time_t.dir/$$type-shrunk tzdata.zi && \ diff -r time_t.dir/$$type time_t.dir/$$type-shrunk || exit; \ diff --git a/NEWS b/NEWS index adc9814..b13c356 100644 --- a/NEWS +++ b/NEWS @@ -3,8 +3,8 @@ News for the tz database Unreleased, experimental changes Briefly: - Support zi parsers that mishandle negative DST offsets - Add fractional seconds to source data format. + Add support for vanguard and rearguard data consumers. + Add fractional seconds to source data format and to vanguard data. Changes to past time stamps @@ -16,30 +16,41 @@ Unreleased, experimental changes Changes to build procedure - The new XDST macro in the Makefile lets the installer choose - XDST=full, which allows arbitrary DST offsets in the data, or - XDST=pdst, which allows only positive DST offsets. Choosing - XDST=full is arguably more correct for Ireland, which observes - Irish Standard Time (IST, UTC+01) in summer and GMT (UTC) in - winter. Choosing XDST=pdst is better for zoneinfo parsers that do - not work well with negative DST offsets, notably OpenJDK+CLDR. - On platforms using tzcode or similar APIs, XDST should not affect - any behavior other than that depending on the tm_isdst flag. - - For now this change does not affect client-visible behavior by - default, as the Makefile defaults to XDST=pdst and uncommented - parts of the data source files contain only pdst-format data. - After a bit of time for testing, XDST=full and full-format source - files are planned to become the default, so that parsers that - support negative DST offsets can get full data without changing - their build procedures. Parsers requiring positive DST offsets - should use the new file pdstdata.zi instead of tzdata.zi or the - source files 'africa' etc.: pdstdata.zi is pdst-compatible, it is - automatically built from the data source files, and it will - continue to be pdst-compatible regardless of XDST. To get - full-format data now, use the new file fulldata.zi, which will - continue to be full-format regardless of XDST. To get the format - selected by XDST, use tzdata.zi. + The new DATAFORM macro in the Makefile lets the installer choose + among three source data formats. The idea is to lessen downstream + disruption when data formats are improved. + + * DATAFORM=vanguard installs from the latest, bleeding-edge + format. DATAFORM=main (the default) installs from the format + used in the 'africa' etc. files. DATAFORM=rearguard installs + from a trailing-edge format. Eventually, elements of today's + vanguard format should move to the main format, and similarly + the main format's features should eventually move to the + rearguard format. + + * In the current version, the main and rearguard formats are + identical and match that of 2018c, so this change does not + affect default behavior. The vanguard format contains two + features not in the main format: fractional seconds and negative + DST offsets. Fractional seconds were added in this release, + where they affect only zic input (output is unaffected). + Negative DST offsets improve support for Ireland, which uses + Irish Standard Time (IST, UTC+01) in summer and GMT (UTC) in + winter. tzcode has supported negative DST offsets for decades, + and this feature should move to the main format soon. However, + it will not move to the rearguard format for quite some time + because some downstream parsers do not support it. + + * The build procedure constructs three files vanguard.zi, main.zi, + and rearguard.zi, one for each format. The files represent the + same data as closely as the formats allow. These three files + are intended for downstream data consumers and are not + installed. Zoneinfo parsers that require positive DST offsets + should start using rearguard.zi, so that they will be unaffected + when the negative-DST feature moves from vanguard to main. + Bleeding-edge Zoneinfo parsers that support the new features + already can use vanguard.zi; in this respect, current tzcode is + bleeding-edge. Changes to code @@ -48,6 +59,7 @@ Unreleased, experimental changes zic currently rounds these fractions to the nearest integer (breaking ties to the even integer), the fractions may be useful to applications requiring more precision in historical timestamps. + This extension is currently used only in vanguard.zi. The code is a bit more portable to MS-Windows. (Thanks to Manuela Friedrich). diff --git a/europe b/europe index 76cbb5d..8aab26e 100644 --- a/europe +++ b/europe @@ -514,21 +514,19 @@ Link Europe/London Europe/Isle_of_Man # https://mm.icann.org/pipermail/tz/2018-January/025825.html # and with tests for OpenJDK: # https://mm.icann.org/pipermail/tz/2018-January/025822.html -# To work around this problem, zidst.awk translates the following data -# lines into two forms. First, fulldata.zi contains the full data, -# which includes negative DST offsets. Second, pdstdata.zi uses a -# traditional approximation for Irish time stamps after 1971-10-31 -# 02:00 UTC; although this approximation has tm_isdst flags that are -# the reverse of the full data, its UTC offsets are correct and this -# suffices for ICU and OpenJDK. Although this source file currently -# has pdstdata.zi lines active and fulldata.zi lines commented out, -# this is intended to change in the near future and downstream code -# should not rely on it. +# +# To work around this problem, the build procedure can translate the +# following data into two forms, one with negative DST offsets and the +# other form with a traditional approximation for Irish time stamps +# after 1971-10-31 02:00 UTC; although this approximation has tm_isdst +# flags that are reversed, its UTC offsets are correct and this often +# suffices. This source file currently uses only positive DST +# offsets, but this is intended to change and downstream code should +# not rely on it. # # The following is like GB-Eire and EU, except with standard time in -# summer and negative daylight saving time in winter. -# This rule set is active in fulldata.zi and is commented out in -# pdstdata.zi. +# summer and negative daylight saving time in winter. It is for when +# negative DST offsets are used. # Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S #Rule Eire 1971 only - Oct 31 2:00u -1:00 GMT #Rule Eire 1972 1980 - Mar Sun>=16 2:00u 0 IST @@ -549,10 +547,9 @@ Zone Europe/Dublin -0:25:00 - LMT 1880 Aug 2 0:00 1:00 IST 1947 Nov 2 2:00s 0:00 - GMT 1948 Apr 18 2:00s 0:00 GB-Eire GMT/IST 1968 Oct 27 -# The next line is active in fulldata.zi and commented out in pdstdata.zi. +# The next line is for when negative DST offsets are used. # 1:00 Eire IST/GMT -# These three lines are active in pdstdata.zi and commented out in -# fulldata.zi. +# These three lines are for when positive DST offsets are used. 1:00 - IST 1971 Oct 31 2:00u 0:00 GB-Eire GMT/IST 1996 0:00 EU GMT/IST diff --git a/zidst.awk b/zidst.awk deleted file mode 100644 index 7885e9a..0000000 --- a/zidst.awk +++ /dev/null @@ -1,50 +0,0 @@ -# Convert tzdata source into full or positive-DST form - -# Contributed by Paul Eggert. This file is in the public domain. - -# This is not a general-purpose converter; it is designed for current tzdata. -# -# When converting to full form, the output can use negative DST offsets. -# -# When converting to positive-DST form, the output uses only positive -# DST offsets. The idea is for the output data to simulate the -# behavior of the input data as best it can within the constraints of -# positive DST offsets. -# -# In the input, lines requiring the full format are commented #[full] -# and the positive DST near-equivalents are commented #[pdst]. - -BEGIN { - dst_type["full"] = 1 - dst_type["pdst"] = 1 - - # The command line should set OUTFILE to the name of the output file, - # which should start with either "full" or "pdst". - todst = substr(outfile, 1, 4) - if (!dst_type[todst]) exit 1 -} - -/^Zone/ { zone = $2 } - -{ - in_comment = /^#/ - - # Test whether this line should differ between the full and the pdst versions. - Rule_Eire = /^#?Rule[\t ]+Eire[\t ]/ - Zone_Dublin_post_1968 \ - = (zone == "Europe/Dublin" && /^#?[\t ]+[01]:00[\t ]/ \ - && (!$(in_comment + 4) || 1968 < $(in_comment + 4))) - - # If so, uncomment the desired version and comment out the undesired one. - if (Rule_Eire || Zone_Dublin_post_1968) { - if ((Rule_Eire \ - || (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT")) \ - == (todst == "full")) { - sub(/^#/, "") - } else if (/^[^#]/) { - sub(/^/, "#") - } - } -} - -{ print } diff --git a/ziguard.awk b/ziguard.awk new file mode 100644 index 0000000..795b4ef --- /dev/null +++ b/ziguard.awk @@ -0,0 +1,89 @@ +# Convert tzdata source into vanguard or rearguard form. + +# Contributed by Paul Eggert. This file is in the public domain. + +# This is not a general-purpose converter; it is designed for current tzdata. +# +# When converting to vanguard form, the output can use fractional seconds +# and negative DST offsets. +# +# When converting to rearguard form, the output omits fractional +# seconds and uses only positive DST offsets. The idea is for the +# output data to simulate the behavior of the input data as best it +# can within the constraints of the rearguard format. + +BEGIN { + dst_type["vanguard.zi"] = 1 + dst_type["main.zi"] = 1 + dst_type["rearguard.zi"] = 1 + + # The command line should set OUTFILE to the name of the output file. + if (!dst_type[outfile]) exit 1 + vanguard = outfile == "vanguard.zi" + + # List non-integer standard times more accurately if known. + # This list does not attempt to record every UT offset that is + # not an integral multiple of 1 s; it merely records those that + # do not appear to be just LMT. + frac["-5:36:13"] = "-5:36:13.3" # America/Costa_Rica before 1921 + frac["-5:07:10"] = "-5:07:10.41" # America/Jamaica before 1912 + frac["-4:16:48"] = "-4:16:48.25" # America/Cordoba etc. 1894-1920 + frac["-0:36:45"] = "-0:36:44.68" # Europe/Lisbon before 1912 + frac["-0:25:21"] = "-0:25:21.1" # Europe/Dublin 1880-1916 + frac["0:19:32"] = "0:19:32.13" # Europe/Amsterdam before 1937 + frac["1:39:49"] = "1:39:49.2" # Europe/Helsinki before 1921 + frac["2:05:09"] = "2:05:08.9" # Africa/Cairo before 1900 + frac["4:37:11"] = "4:37:10.8" # Asia/Tashkent before 1924 + frac["7:06:30"] = "7:06:30.1333" #... Asia/Ho_Chi_Minh 1906-1911 + frac["7:07:12"] = "7:07:12.5" # Asia/Jakarta before 1923 + frac["7:36:42"] = "7:36:41.7" # Asia/Hong_Kong before 1904 + frac["8:05:43"] = "8:05:43.2" # Asia/Shanghai before 1901 +} + +/^Zone/ { zone = $2 } + +outfile != "main.zi" { + in_comment = /^#/ + + # If this line should differ due to Ireland using negative DST offsets, + # uncomment the desired version and comment out the undesired one. + Rule_Eire = /^#?Rule[\t ]+Eire[\t ]/ + Zone_Dublin_post_1968 \ + = (zone == "Europe/Dublin" && /^#?[\t ]+[01]:00[\t ]/ \ + && (!$(in_comment + 4) || 1968 < $(in_comment + 4))) + if (Rule_Eire || Zone_Dublin_post_1968) { + if ((Rule_Eire \ + || (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT")) \ + == vanguard) { + sub(/^#/, "") + } else if (/^[^#]/) { + sub(/^/, "#") + } + } + + # Add or remove fractional seconds as needed. + f = $1 == "Zone" ? 3 : 1 + for (rounded in frac) { + original = frac[rounded] + if ($f == rounded || $f == original) { + $f = vanguard ? original : rounded + } + } +} + +# If a Link line is followed by a Zone line for the same data, comment +# out the Link line. This can happen if backzone overrides a Link +# with a Zone. +/^Link/ { + linkline[$3] = NR +} +/^Zone/ { + sub(/^Link/, "#Link", line[linkline[$2]]) +} + +{ line[NR] = $0 } + +END { + for (i = 1; i <= NR; i++) + print line[i] +} -- 2.14.3
participants (1)
-
Paul Eggert