This is intended to provide a way to support both clients that require data to have only positive DST offsets, and clients that do not have this restriction. * Makefile (XDST, SDST): New macros. (TZDATA_ZI_DEPS): Add zidst.awk. (DSTDATA_ZI_DEPS): New macro. (all): Depend on fulldata.zi and pdstdata.zi. (fulldata.zi pdstdata.zi): New rule. (tzdata.zi): Use $(XDST)data.zi instead of reading original source. (check_zishrink): Check zidst.awk, too. (clean): Remove all *.zi files, not just tzdata.zi. * NEWS, europe: Mention this. * zidst.awk: New file. --- Makefile | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++----------- NEWS | 30 ++++++++++++++++++++++++++++++ europe | 39 ++++++++++++++++++++++----------------- zidst.awk | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 154 insertions(+), 28 deletions(-) create mode 100644 zidst.awk diff --git a/Makefile b/Makefile index 8c84cd9..92ddb80 100644 --- a/Makefile +++ b/Makefile @@ -10,6 +10,26 @@ VERSION= unknown # Email address for bug reports. BUGEMAIL= tz@iana.org +# To install the full data, which can contain daylight saving time +# offsets that are negative (relative to standard time), use +# XDST= full +# To install data containing only positive daylight saving time +# offsets, but otherwise as close to the full data as practical, use +# XDST= pdst +XDST= pdst +# Parsers requiring DST offsets to be positive should use the file +# pdstdata.zi, which contains almost all the data of 'africa' etc., +# except with positive DST offsets. This works around a problem that +# was discovered in January 2018 with negative DST in tests for ICU +# and OpenJDK. See: +# https://mm.icann.org/pipermail/tz/2018-January/025825.html +# https://mm.icann.org/pipermail/tz/2018-January/025822.html +# Currently the 'africa' etc. files use pdst form if comments are +# ignored, to ease transition for parsers that do not support +# negative DST offsets. This is intended to change to full form at +# some point, so that full-featured zi parsers that use the 'africa' +# files will get the full data without changing anything. + # Change the line below for your time zone (after finding the zone you want in # the time zone files, or adding it to a time zone file). # Alternately, if you discover you've got the wrong time zone, you can just @@ -463,7 +483,8 @@ TDATA= $(YDATA) $(NDATA) $(BACKWARD) ZONETABLES= zone1970.tab zone.tab TABDATA= iso3166.tab $(TZDATA_TEXT) $(ZONETABLES) LEAP_DEPS= leapseconds.awk leap-seconds.list -TZDATA_ZI_DEPS= zishrink.awk version $(TDATA) $(PACKRATDATA) +TZDATA_ZI_DEPS= zidst.awk zishrink.awk version $(TDATA) $(PACKRATDATA) +DSTDATA_ZI_DEPS= zidst.awk $(TDATA) $(PACKRATDATA) DATA= $(TDATA_TO_CHECK) backzone iso3166.tab leap-seconds.list \ leapseconds yearistype.sh $(ZONETABLES) AWK_SCRIPTS= checklinks.awk checktab.awk leapseconds.awk zishrink.awk @@ -500,7 +521,8 @@ VERSION_DEPS= \ SHELL= /bin/sh -all: tzselect yearistype zic zdump libtz.a $(TABDATA) +all: tzselect yearistype zic zdump libtz.a $(TABDATA) \ + fulldata.zi pdstdata.zi ALL: all date $(ENCHILADA) @@ -535,11 +557,15 @@ version: $(VERSION_DEPS) printf '%s\n' "$$V" >$@.out mv $@.out $@ -# This file can be tailored by setting BACKWARD, PACKRATDATA, etc. -tzdata.zi: $(TZDATA_ZI_DEPS) +# These files can be tailored by setting BACKWARD, PACKRATDATA, etc. +fulldata.zi pdstdata.zi: $(DSTDATA_ZI_DEPS) + $(AWK) -v outfile='$@' -f zidst.awk $(TDATA) $(PACKRATDATA) \ + >$@.out + mv $@.out $@ +tzdata.zi: $(XDST)data.zi version version=`sed 1q version` && \ LC_ALL=C $(AWK) -v version="$$version" -f zishrink.awk \ - $(TDATA) $(PACKRATDATA) >$@.out + $(XDST)data.zi >$@.out mv $@.out $@ version.h: version @@ -721,17 +747,32 @@ check_tzs: $(TZS) $(TZS_NEW) check_web: tz-how-to.html $(VALIDATE_ENV) $(VALIDATE) $(VALIDATE_FLAGS) tz-how-to.html -# Check that tzdata.zi generates the same binary data that its sources do. -check_zishrink: tzdata.zi zic leapseconds $(PACKRATDATA) $(TDATA) +# The format of the source files, either full or pdst. +# Currently they are in pdst format, but this is expected to change. +SDST = pdst + +# Check that zishrink.awk does not alter the data, and that zidst.awk +# preserves $(SDST) data. +check_zishrink: zic leapseconds $(PACKRATDATA) $(TDATA) \ + $(XDST)data.zi tzdata.zi for type in posix right; do \ - mkdir -p time_t.dir/$$type time_t.dir/$$type-shrunk && \ + mkdir -p time_t.dir/$$type time_t.dir/$$type-$(SDST) \ + time_t.dir/$$type-shrunk && \ case $$type in \ right) leap='-L leapseconds';; \ *) leap=;; \ esac && \ - $(ZIC) $$leap -d time_t.dir/$$type $(TDATA) && \ - $(AWK) '/^Rule/' $(TDATA) | \ + $(ZIC) $$leap -d time_t.dir/$$type $(XDST)data.zi && \ + $(AWK) '/^Rule/' $(XDST)data.zi | \ $(ZIC) $$leap -d time_t.dir/$$type - $(PACKRATDATA) && \ + case $(XDST) in \ + $(SDST)) \ + $(ZIC) $$leap -d time_t.dir/$$type-$(SDST) $(TDATA) && \ + $(AWK) '/^Rule/' $(TDATA) | \ + $(ZIC) $$leap -d time_t.dir/$$type-$(SDST) \ + $(XDST)data.zi && \ + diff -r time_t.dir/$$type time_t.dir/$$type-$(SDST);; \ + esac && \ $(ZIC) $$leap -d time_t.dir/$$type-shrunk tzdata.zi && \ diff -r time_t.dir/$$type time_t.dir/$$type-shrunk || exit; \ done @@ -741,7 +782,7 @@ clean_misc: rm -f core *.o *.out \ date tzselect version.h zdump zic yearistype libtz.a clean: clean_misc - rm -fr *.dir tzdata.zi tzdb-*/ $(TZS_NEW) + rm -fr *.dir *.zi tzdb-*/ $(TZS_NEW) maintainer-clean: clean @echo 'This command is intended for maintainers to use; it' diff --git a/NEWS b/NEWS index 4f763c0..c455f3c 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,36 @@ News for the tz database Unreleased, experimental changes + Briefly: + Support zi parsers that mishandle negative DST offsets + + Changes to build procedure + + The new XDST macro in the Makefile lets the installer choose + XDST=full, which allows arbitrary DST offsets in the data, or + XDST=pdst, which allows only positive DST offsets. Choosing + XDST=full is arguably more correct for Ireland, which observes + Irish Standard Time (IST, UTC+01) in summer and GMT (UTC) in + winter. Choosing XDST=pdst is better for zoneinfo parsers that do + not work well with negative DST offsets, notably OpenJDK+CLDR. + On platforms using tzcode or similar APIs, XDST should not affect + any behavior other than that depending on the tm_isdst flag. + + For now this change does not affect client-visible behavior by + default, as the Makefile defaults to XDST=pdst and uncommented + parts of the data source files contain only pdst-format data. + After a bit of time for testing, XDST=full and full-format source + files are planned to become the default, so that parsers that + support negative DST offsets can get full data without changing + their build procedures. Parsers requiring positive DST offsets + should use the new file pdstdata.zi instead of tzdata.zi or the + source files 'africa' etc.: pdstdata.zi is pdst-compatible, it is + automatically built from the data source files, and it will + continue to be pdst-compatible regardless of XDST. To get + full-format data now, use the new file fulldata.zi, which will + continue to be full-format regardless of XDST. To get the format + selected by XDST, use tzdata.zi. + Changes to code The code is a bit more portable to MS-Windows. (Thanks to Manuela diff --git a/europe b/europe index 6c1ccbe..5aeda33 100644 --- a/europe +++ b/europe @@ -508,11 +508,27 @@ Link Europe/London Europe/Jersey Link Europe/London Europe/Guernsey Link Europe/London Europe/Isle_of_Man -# From Paul Eggert (2018-01-19): +# From Paul Eggert (2018-01-30): +# In January 2018 we discovered that the negative DST offsets in the +# Eire rules cause problems with tests for ICU: +# https://mm.icann.org/pipermail/tz/2018-January/025825.html +# and with tests for OpenJDK: +# https://mm.icann.org/pipermail/tz/2018-January/025822.html +# To work around this problem, zidst.awk translates the following data +# lines into two forms. First, fulldata.zi contains the full data, +# which includes negative DST offsets. Second, pdstdata.zi uses a +# traditional approximation for Irish time stamps after 1971-10-31 +# 02:00 UTC; although this approximation has tm_isdst flags that are +# the reverse of the full data, its UTC offsets are correct and this +# suffices for ICU and OpenJDK. Although this source file currently +# has pdstdata.zi lines active and fulldata.zi lines commented out, +# this is intended to change in the near future and downstream code +# should not rely on it. +# # The following is like GB-Eire and EU, except with standard time in # summer and negative daylight saving time in winter. -# Although currently commented out, this will need to become uncommented -# once the ICU/OpenJDK workaround is removed; see below. +# This rule set is active in fulldata.zi and is commented out in +# pdstdata.zi. # Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S #Rule Eire 1971 only - Oct 31 2:00u -1:00 GMT #Rule Eire 1972 1980 - Mar Sun>=16 2:00u 0 IST @@ -533,24 +549,13 @@ Zone Europe/Dublin -0:25:00 - LMT 1880 Aug 2 0:00 1:00 IST 1947 Nov 2 2:00s 0:00 - GMT 1948 Apr 18 2:00s 0:00 GB-Eire GMT/IST 1968 Oct 27 -# From Paul Eggert (2018-01-18): -# The next line should look like this: +# The next line is active in fulldata.zi and commented out in pdstdata.zi. # 1:00 Eire IST/GMT -# However, in January 2018 we discovered that the Eire rules cause -# problems with tests for ICU: -# https://mm.icann.org/pipermail/tz/2018-January/025825.html -# and with tests for OpenJDK: -# https://mm.icann.org/pipermail/tz/2018-January/025822.html -# To work around this problem, use a traditional approximation for -# time stamps after 1971-10-31 02:00 UTC, to give ICU and OpenJDK -# developers breathing room to fix bugs. This approximation has -# correct UTC offsets, but results in tm_isdst flags are the reverse -# of what they should be. This workaround is temporary and should be -# removed reasonably soon. +# These three lines are active in pdstdata.zi and commented out in +# fulldata.zi. 1:00 - IST 1971 Oct 31 2:00u 0:00 GB-Eire GMT/IST 1996 0:00 EU GMT/IST -# End of workaround for ICU and OpenJDK bugs. ############################################################################### diff --git a/zidst.awk b/zidst.awk new file mode 100644 index 0000000..7885e9a --- /dev/null +++ b/zidst.awk @@ -0,0 +1,50 @@ +# Convert tzdata source into full or positive-DST form + +# Contributed by Paul Eggert. This file is in the public domain. + +# This is not a general-purpose converter; it is designed for current tzdata. +# +# When converting to full form, the output can use negative DST offsets. +# +# When converting to positive-DST form, the output uses only positive +# DST offsets. The idea is for the output data to simulate the +# behavior of the input data as best it can within the constraints of +# positive DST offsets. +# +# In the input, lines requiring the full format are commented #[full] +# and the positive DST near-equivalents are commented #[pdst]. + +BEGIN { + dst_type["full"] = 1 + dst_type["pdst"] = 1 + + # The command line should set OUTFILE to the name of the output file, + # which should start with either "full" or "pdst". + todst = substr(outfile, 1, 4) + if (!dst_type[todst]) exit 1 +} + +/^Zone/ { zone = $2 } + +{ + in_comment = /^#/ + + # Test whether this line should differ between the full and the pdst versions. + Rule_Eire = /^#?Rule[\t ]+Eire[\t ]/ + Zone_Dublin_post_1968 \ + = (zone == "Europe/Dublin" && /^#?[\t ]+[01]:00[\t ]/ \ + && (!$(in_comment + 4) || 1968 < $(in_comment + 4))) + + # If so, uncomment the desired version and comment out the undesired one. + if (Rule_Eire || Zone_Dublin_post_1968) { + if ((Rule_Eire \ + || (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT")) \ + == (todst == "full")) { + sub(/^#/, "") + } else if (/^[^#]/) { + sub(/^/, "#") + } + } +} + +{ print } -- 2.14.3