New subject: winnow patches

Aug. 30, 2013

This lets us preserve information about pre-1970 time stamps when
we change a Zone to a Link to another zone whose time stamps agree
after 1970.  This should address concerns about some recent
changes that removed this information.  This implementation is a
stripped-down version of a suggestion by Andrew Main (Zefram) in
<http://mm.icann.org/pipermail/tz/2013-August/019615.html> and
<http://mm.icann.org/pipermail/tz/2013-August/019639.html> to
allow filtering tz data by date range.  Unlike Zefram's
suggestion, this implementation supports only two date ranges,
namely 1970 on, using 'make BACKWARD=backward'; and all dates,
using 'make BACKWARD="pre1970 back-pre1970"'.  At some point I'd
like to improve it to support arbitrary date ranges, but at least
we've now restored the data whose loss was of some concern.
* .gitignore: Add back-pre1970.
* Makefile (BACKWARD): New macro.
(YDATA): Use it instead of 'backward'.
(AWK_SCRIPTS): New macro, with additional script back-pre1970.awk.
(MISC): Use it.
(back-pre1970): New rule.
(clean_misc): Clean back-pre1970.  Also clean time.tab, while we're
at it.
(check_public): Don't require pre1970 to stand alone.
* pre1970, back-pre1970.awk: New files.
---
 .gitignore       |   1 +
 Makefile         |  32 +++++-
 back-pre1970.awk |  18 ++++
 pre1970          | 291 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 337 insertions(+), 5 deletions(-)
 create mode 100644 back-pre1970.awk
 create mode 100644 pre1970

diff --git a/.gitignore b/.gitignore
index 18dbbcc..28b1bc9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
 *.txt
 *~
 ChangeLog
+back-pre1970
 date
 leapseconds
 time.tab
diff --git a/Makefile b/Makefile
index a74d1a7..ffddb08 100644
--- a/Makefile
+++ b/Makefile
@@ -49,6 +49,22 @@ POSIXRULES=	America/New_York
 
 ZONETABTYPE=	zone
 
+# How to support obsolescent time zones in a backward-compatible way.
+# This variable affects only pre-1970 time stamps, on hosts that support them.
+# It has two possible values, 'backward' and 'pre1970 back-pre1970'.
+#
+# 'backward' is the traditional approach, and is simpler and more efficient;
+# it is designed to generate one zone for each region where clocks have agreed
+# since 1970.
+#
+# 'pre1970 back-pre1970' can generate more than one zone in that situation,
+# which means it can preserve a bit of pre-1970 data that 'backward' does not;
+# almost all pre-1970 data is missing, though, so don't get your hopes up.
+#
+# Sometimes 'backward' is more-compatible with earlier versions of this database,
+# and sometimes 'pre1970 back-pre1970' is; it depends on the situation.
+BACKWARD=	backward
+
 # Also see TZDEFRULESTRING below, which takes effect only
 # if the time zone files cannot be accessed.
 
@@ -322,7 +338,7 @@ COMMON=		Makefile
 DOCS=		README Theory $(MANS) date.1
 PRIMARY_YDATA=	africa antarctica asia australasia \
 		europe northamerica southamerica
-YDATA=		$(PRIMARY_YDATA) pacificnew etcetera backward
+YDATA=		$(PRIMARY_YDATA) pacificnew etcetera $(BACKWARD)
 NDATA=		systemv factory
 SDATA=		solar87 solar88 solar89
 TDATA=		$(YDATA) $(NDATA) $(SDATA)
@@ -330,9 +346,10 @@ TABDATA=	iso3166.tab time.tab zone.tab
 DATA=		$(YDATA) $(NDATA) $(SDATA) $(TABDATA) \
 			leap-seconds.list yearistype.sh
 WEB_PAGES=	tz-art.htm tz-link.htm
+AWK_SCRIPTS=	back-pre1970.awk checktab.awk leapseconds.awk zone-time.awk
 MISC=		usno1988 usno1989 usno1989a usno1995 usno1997 usno1998 \
-			$(WEB_PAGES) checktab.awk leapseconds.awk workman.sh \
-			zoneinfo2tdf.pl
+			$(WEB_PAGES) $(AWK_SCRIPTS) \
+			workman.sh zoneinfo2tdf.pl
 ENCHILADA=	$(COMMON) $(DOCS) $(SOURCES) $(DATA) $(MISC)
 
 # And for the benefit of csh users on systems that assume the user
@@ -423,6 +440,9 @@ zones:		$(REDO)
 time.tab:	$(YDATA) zone.tab zone-time.awk
 		$(AWK) -f zone-time.awk $(YDATA) >$@
 
+back-pre1970:	pre1970 backward
+		$(AWK) -v pre1970=pre1970 -f $@.awk backward >$@
+
 $(TZLIB):	$(LIBOBJS)
 		-mkdir $(TOPDIR) $(LIBDIR)
 		ar ru $@ $(LIBOBJS)
@@ -457,6 +477,7 @@ check_web:	$(WEB_PAGES)
 
 clean_misc:
 		rm -f core *.o *.out \
+		  back-pre1970 time.tab \
 		  date leapseconds tzselect version.h zdump zic yearistype
 clean:		clean_misc
 		rm -f -r tzpublic
@@ -488,7 +509,7 @@ set-timestamps:
 		  $$cmd || exit; \
 		done
 
-# The zics below ensure that each data file can stand on its own.
+# The zics below ensure that each non-pre1970 data file can stand on its own.
 # We also do an all-files run to catch links to links.
 
 check_public:	$(ENCHILADA)
@@ -496,7 +517,8 @@ check_public:	$(ENCHILADA)
 		make "CFLAGS=$(GCC_DEBUG_FLAGS)"
 		mkdir tzpublic
 		for i in $(TDATA) ; do \
-		  $(zic) -v -d tzpublic $$i 2>&1 || exit; \
+		  test $$i = pre1970 || $(zic) -v -d tzpublic $$i 2>&1 \
+		    || exit; \
 		done
 		$(zic) -v -d tzpublic $(TDATA)
 		rm -f -r tzpublic
diff --git a/back-pre1970.awk b/back-pre1970.awk
new file mode 100644
index 0000000..f7c54fc
--- /dev/null
+++ b/back-pre1970.awk
@@ -0,0 +1,18 @@
+# Generate 'back-pre1970' from the two input files 'pre1970' and 'backward'.
+# The output consists of all lines in 'backward' that are not links to
+# files mentioned in 'pre1970'.  Think of it as 'backward' minus 'pre1970'.
+
+# The 'backward' file is the input.
+# The awk variable 'pre1970' contains the name of the pre1970 file.
+
+# This file is in the public domain.
+
+# Contributed by Paul Eggert.
+
+BEGIN {
+    while ((getline <pre1970) == 1)
+	if ($1 == "Zone")
+	    pre1970_zone[$2] = 1
+}
+
+! (/^Link/ && pre1970_zone[$3]) { print }
diff --git a/pre1970 b/pre1970
new file mode 100644
index 0000000..d8b8f34
--- /dev/null
+++ b/pre1970
@@ -0,0 +1,291 @@
+# Pre-1970 data
+
+# This file is in the public domain.
+
+# This file contains zones that were formerly in other source files,
+# but were later removed or replaced by backward-compatibility links
+# as they differ from other zones only in pre-1970 time stamps.
+
+# Although the tz database focuses on post-1970 time stamps, these
+# entries are retained here as they may be of some use to people
+# interested in pre-1970 time stamps, even though they cover only a
+# tiny sliver of pre-1970 data and are unreliable for that data.
+# Also, these entries can help with backward compatibility with some
+# old versions of the tz database.  They are incompatible with other
+# old versions of the database, though; it depends on which old
+# version you're interested in.
+
+# Entries are sorted by Zone name.  Each entry is preceded by the name
+# of the country that the entry is in, along with any other commentary
+# and rules associated with the entry.  Some rules, e.g., 'Canada',
+# are defined by other source files; this file is not intended to be
+# used without those other files.
+
+# Zone	NAME		GMTOFF	RULES	FORMAT	[UNTIL]
+
+# Mali
+# no longer different from Bamako, but too famous to omit
+Zone	Africa/Timbuktu	-0:12:04 -	LMT	1912
+			 0:00	-	GMT
+
+# Anguilla
+Zone America/Anguilla	-4:12:16 -	LMT	1912 Mar 2
+			-4:00	-	AST
+
+# Antigua and Barbuda
+Zone	America/Antigua	-4:07:12 -	LMT	1912 Mar 2
+			-5:00	-	EST	1951
+			-4:00	-	AST
+
+# Argentina
+# Chubut (CH)
+# The name "Comodoro Rivadavia" exceeds the 14-byte POSIX limit.
+Zone America/Argentina/ComodRivadavia -4:30:00 - LMT	1894 Oct 31
+			-4:16:48 -	CMT	1920 May
+			-4:00	-	ART	1930 Dec
+			-4:00	Arg	AR%sT	1969 Oct  5
+			-3:00	Arg	AR%sT	1991 Mar  3
+			-4:00	-	WART	1991 Oct 20
+			-3:00	Arg	AR%sT	1999 Oct  3
+			-4:00	Arg	AR%sT	2000 Mar  3
+			-3:00	-	ART	2004 Jun  1
+			-4:00	-	WART	2004 Jun 20
+			-3:00	-	ART
+
+# Aruba
+Zone	America/Aruba	-4:40:24 -	LMT	1912 Feb 12	# Oranjestad
+			-4:30	-	ANT	1965 # Netherlands Antilles Time
+			-4:00	-	AST
+
+# Canada
+
+Zone America/Atikokan	-6:06:28 -	LMT	1895
+			-6:00	Canada	C%sT	1940 Sep 29
+			-6:00	1:00	CDT	1942 Feb  9 2:00s
+			-6:00	Canada	C%sT	1945 Sep 30 2:00
+			-5:00	-	EST
+
+Zone America/Blanc-Sablon -3:48:28 -	LMT	1884
+			-4:00	Canada	A%sT	1970
+			-4:00	-	AST
+
+# Cayman Is
+Zone	America/Cayman	-5:25:32 -	LMT	1890		# Georgetown
+			-5:07:12 -	KMT	1912 Feb    # Kingston Mean Time
+			-5:00	-	EST
+
+# Canada
+Zone America/Coral_Harbour -5:32:40 -	LMT	1884
+			-5:00	NT_YK	E%sT	1946
+			-5:00	-	EST
+
+# Curacao
+Zone	America/Curacao	-4:35:47 -	LMT	1912 Feb 12	# Willemstad
+			-4:30	-	ANT	1965 # Netherlands Antilles Time
+			-4:00	-	AST
+
+# Dominica
+Zone America/Dominica	-4:05:36 -	LMT	1911 Jul 1 0:01		# Roseau
+			-4:00	-	AST
+
+# Mexico
+Zone America/Ensenada	-7:46:28 -	LMT	1922 Jan  1  0:13:32
+			-8:00	-	PST	1927 Jun 10 23:00
+			-7:00	-	MST	1930 Nov 16
+			-8:00	-	PST	1942 Apr
+			-7:00	-	MST	1949 Jan 14
+			-8:00	-	PST	1996
+			-8:00	Mexico	P%sT
+
+# US
+Zone America/Fort_Wayne -5:00	US	E%sT	1946
+			-5:00	-	EST	# Always EST as of 1986
+
+# Grenada
+Zone	America/Grenada	-4:07:00 -	LMT	1911 Jul	# St George's
+			-4:00	-	AST
+
+# Guadeloupe
+Zone America/Guadeloupe	-4:06:08 -	LMT	1911 Jun 8	# Pointe a Pitre
+			-4:00	-	AST
+
+# Canada
+# Rule	NAME	FROM	TO	TYPE	IN	ON	AT	SAVE	LETTER/S
+Rule	Mont	1917	only	-	Mar	25	2:00	1:00	D
+Rule	Mont	1917	only	-	Apr	24	0:00	0	S
+Rule	Mont	1919	only	-	Mar	31	2:30	1:00	D
+Rule	Mont	1919	only	-	Oct	25	2:30	0	S
+Rule	Mont	1920	only	-	May	 2	2:30	1:00	D
+Rule	Mont	1920	1922	-	Oct	Sun>=1	2:30	0	S
+Rule	Mont	1921	only	-	May	 1	2:00	1:00	D
+Rule	Mont	1922	only	-	Apr	30	2:00	1:00	D
+Rule	Mont	1924	only	-	May	17	2:00	1:00	D
+Rule	Mont	1924	1926	-	Sep	lastSun	2:30	0	S
+Rule	Mont	1925	1926	-	May	Sun>=1	2:00	1:00	D
+# The 1927-to-1937 rules can be expressed more simply as
+# Rule	Mont	1927	1937	-	Apr	lastSat	24:00	1:00	D
+# Rule	Mont	1927	1937	-	Sep	lastSat	24:00	0	S
+# The rules below avoid use of 24:00
+# (which pre-1998 versions of zic cannot handle).
+Rule	Mont	1927	only	-	May	1	0:00	1:00	D
+Rule	Mont	1927	1932	-	Sep	lastSun	0:00	0	S
+Rule	Mont	1928	1931	-	Apr	lastSun	0:00	1:00	D
+Rule	Mont	1932	only	-	May	1	0:00	1:00	D
+Rule	Mont	1933	1940	-	Apr	lastSun	0:00	1:00	D
+Rule	Mont	1933	only	-	Oct	1	0:00	0	S
+Rule	Mont	1934	1939	-	Sep	lastSun	0:00	0	S
+Rule	Mont	1946	1973	-	Apr	lastSun	2:00	1:00	D
+Rule	Mont	1945	1948	-	Sep	lastSun	2:00	0	S
+Rule	Mont	1949	1950	-	Oct	lastSun	2:00	0	S
+Rule	Mont	1951	1956	-	Sep	lastSun	2:00	0	S
+Rule	Mont	1957	1973	-	Oct	lastSun	2:00	0	S
+# Zone	NAME		GMTOFF	RULES	FORMAT	[UNTIL]
+Zone America/Montreal	-4:54:16 -	LMT	1884
+			-5:00	Mont	E%sT	1918
+			-5:00	Canada	E%sT	1919
+			-5:00	Mont	E%sT	1942 Feb  9 2:00s
+			-5:00	Canada	E%sT	1946
+			-5:00	Mont	E%sT	1974
+			-5:00	Canada	E%sT
+
+# Montserrat
+Zone America/Montserrat	-4:08:52 -	LMT	1911 Jul 1 0:01   # Cork Hill
+			-4:00	-	AST
+
+# Bahamas
+# Rule	NAME	FROM	TO	TYPE	IN	ON	AT	SAVE	LETTER/S
+Rule	Bahamas	1964	1975	-	Oct	lastSun	2:00	0	S
+Rule	Bahamas	1964	1975	-	Apr	lastSun	2:00	1:00	D
+# Zone	NAME		GMTOFF	RULES	FORMAT	[UNTIL]
+Zone	America/Nassau	-5:09:30 -	LMT	1912 Mar 2
+			-5:00	Bahamas	E%sT	1976
+			-5:00	US	E%sT
+
+# Trinidad and Tobago
+Zone America/Port_of_Spain -4:06:04 -	LMT	1912 Mar 2
+			-4:00	-	AST
+
+# Brazil
+# Rio_Branco is too ambiguous, since there's a Rio Branco in Uruguay too.
+Zone America/Porto_Acre	-4:31:12 -	LMT	1914
+			-5:00	Brazil	AC%sT	1988 Sep 12
+			-5:00	-	ACT
+
+# Argentina
+# Santa Fe (SF), Entre Rios (ER), Corrientes (CN), Misiones (MN), Chaco (CC),
+# Formosa (FM), La Pampa (LP), Chubut (CH)
+Zone America/Rosario	-4:02:40 -	LMT	1894 Nov
+			-4:16:44 -	CMT	1920 May
+			-4:00	-	ART	1930 Dec
+			-4:00	Arg	AR%sT	1969 Oct  5
+			-3:00	Arg	AR%sT	1991 Jul
+			-3:00	-	ART	1999 Oct  3 0:00
+			-4:00	Arg	AR%sT	2000 Mar  3 0:00
+			-3:00	-	ART
+
+# St Kitts-Nevis
+Zone America/St_Kitts	-4:10:52 -	LMT	1912 Mar 2	# Basseterre
+			-4:00	-	AST
+
+# St Lucia
+Zone America/St_Lucia	-4:04:00 -	LMT	1890		# Castries
+			-4:04:00 -	CMT	1912	    # Castries Mean Time
+			-4:00	-	AST
+
+# Virgin Is
+Zone America/St_Thomas	-4:19:44 -	LMT	1911 Jul    # Charlotte Amalie
+			-4:00	-	AST
+
+# St Vincent and the Grenadines
+Zone America/St_Vincent	-4:04:56 -	LMT	1890		# Kingstown
+			-4:04:56 -	KMT	1912	   # Kingstown Mean Time
+			-4:00	-	AST
+
+# British Virgin Is
+Zone America/Tortola	-4:18:28 -	LMT	1911 Jul    # Road Town
+			-4:00	-	AST
+
+# McMurdo, Ross Island, since 1955-12
+Zone Antarctica/McMurdo	0	-	zzz	1956
+			12:00	NZAQ	NZ%sT
+
+# Japan
+Zone	Asia/Ishigaki	8:16:36	-	LMT	1896
+			8:00	-	CST
+
+# Israel
+Zone	Asia/Tel_Aviv	2:19:04 -	LMT	1880
+			2:21	-	JMT	1918
+			2:00	Zion	I%sT
+
+# Russia
+Zone Asia/Tomsk		 5:39:52 -	LMT	1924 May  2
+			 6:00	-	TSK	1957 Mar
+			 7:00	Russia	TS%s	1991 Mar 31 2:00s
+			 6:00	1:00	TSD	1991 Sep 29 2:00s
+			 6:00	-	TSK	1992 Jan 19 2:00s
+			 7:00	Russia	TS%s
+
+# Svalbard & Jan Mayen
+Zone Atlantic/Jan_Mayen	-1:00	-	EGT
+
+# Australia
+Zone Australia/Canberra	 9:56:32 -	LMT	1895 Feb
+			10:00	-	EST	1917 Jan  1 0:01
+			10:00	Aus	EST	1971 Oct 31 2:00
+			10:00	AN	EST	1981 Oct 25 2:00
+			10:00	1:00	EST	1982 Apr  4 3:00
+			10:00	AN	EST
+
+# UK
+Zone	Europe/Belfast	-0:23:40 -	LMT	1880 Aug  2
+			-0:25:21 -	DMT	1916 May 21 2:00 # Dublin/Dunsink MT
+			-0:25:21 1:00	IST	1916 Oct  1 2:00s   # Irish Summer Time
+			 0:00	GB-Eire	%s	1968 Oct 27
+			 1:00	-	BST	1971 Oct 31 2:00u
+			 0:00	GB-Eire	%s	1996
+			 0:00	EU	GMT/BST
+
+# Slovenia
+Zone Europe/Ljubljana	0:58:04	-	LMT	1884
+			1:00	-	CET	1941 Apr 18 23:00
+			1:00	C-Eur	CE%sT	1945 May  8  2:00s
+			1:00	1:00	CEST	1945 Sep 16  2:00s
+			1:00	-	CET	1982 Nov 27
+			1:00	EU	CE%sT
+
+# Bosnia and Herzegovina
+Zone	Europe/Sarajevo	1:13:40	-	LMT	1884
+			1:00	-	CET	1941 Apr 18 23:00
+			1:00	C-Eur	CE%sT	1945 May  8  2:00s
+			1:00	1:00	CEST	1945 Sep 16  2:00s
+			1:00	-	CET	1982 Nov 27
+			1:00	EU	CE%sT
+
+# Macedonia
+Zone	Europe/Skopje	1:25:44	-	LMT	1884
+			1:00	-	CET	1941 Apr 18 23:00
+			1:00	C-Eur	CE%sT	1945 May  8  2:00s
+			1:00	1:00	CEST	1945 Sep 16  2:00s
+			1:00	-	CET	1982 Nov 27
+			1:00	EU	CE%sT
+
+# Moldova
+Zone	Europe/Tiraspol	1:58:32	-	LMT	1880
+			1:55	-	CMT	1918 Feb 15 # Chisinau MT
+			1:44:24	-	BMT	1931 Jul 24 # Bucharest MT
+			2:00	Romania	EE%sT	1940 Aug 15
+			2:00	1:00	EEST	1941 Jul 17
+			1:00	C-Eur	CE%sT	1944 Aug 24
+			3:00	Russia	MSK/MSD	1991 Mar 31 2:00
+			2:00	Russia	EE%sT	1992 Jan 19 2:00
+			3:00	Russia	MSK/MSD
+
+# Croatia
+# Zone	NAME		GMTOFF	RULES	FORMAT	[UNTIL]
+Zone	Europe/Zagreb	1:03:52	-	LMT	1884
+			1:00	-	CET	1941 Apr 18 23:00
+			1:00	C-Eur	CE%sT	1945 May  8  2:00s
+			1:00	1:00	CEST	1945 Sep 16  2:00s
+			1:00	-	CET	1982 Nov 27
+			1:00	EU	CE%sT
-- 
1.8.1.2

    

[PATCH] New file 'pre1970' for zones that differ only in pre-1970 time stamps.

Paul Eggert

gunther vermeir

Marc Lehmann

Zefram

Paul Eggert

Stephen Colebourne

Zefram

Zefram

random832＠fastmail.us

Zefram

Paul Eggert

Zefram

Alan Barrett

Zefram

Bennett Todd

Alan Barrett

Paul Eggert

Stephen Colebourne

Lester Caine

Lester Caine

Steffen Daode Nurpmeso

Andrew Paprocki

enh

Gwillim Law

tags

participants (13)