From e743ce970fe7f5d53d79fd523f923f5a9307aab0 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Tue, 26 Jul 2022 18:05:00 -0700
Subject: [PROPOSED] Add PACKRATLIST build-time option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This allows downstream users to select parts of PACKRATDATA.
For example, "make PACKRATDATA=backzone PACKRATLIST=zone.tab"
generates TZif files identical to those of global-tz.
PACKRATLIST has no effect unless PACKRATDATA is set.
The default PACKRATLIST is empty,
which has the same behavior as before.
* Makefile (PACKRATLIST): New macro.
(TZDATA_ZI_DEPS, DSTDATA_ZI_DEPS, vanguard.zi, main.zi)
(rearguard.zi, PACKRATDATA, check_zishrink_posix)
(check_zishrink_right, check_public): Generate results
that depend on PACKRATLIST.
* backzone: Add "#PACKRATLIST zone.tab" comment directives
for cases where the zone.tab-only backzone needs a link
that plain backzone doesn’t.
* ziguard.awk (PACKRATLIST, packratlist): New vars.
Handle #PACKRATLIST comments.  If PACKRATLIST is specified,
limit PACKRATDATA to the zones specified by PACKRATLIST.
---
 Makefile    | 36 +++++++++++++++++++++++++++---------
 NEWS        |  6 ++++++
 backzone    |  7 +++++++
 ziguard.awk | 18 ++++++++++++++++++
 4 files changed, 58 insertions(+), 9 deletions(-)

diff --git a/Makefile b/Makefile
index e3e5c0d..0d2fbab 100644
--- a/Makefile
+++ b/Makefile
@@ -176,12 +176,19 @@ TZDATA_TEXT=	leapseconds tzdata.zi
 
 BACKWARD=	backward
 
-# If you want out-of-scope and often-wrong data from the file 'backzone', use
+# If you want out-of-scope and often-wrong data from the file 'backzone',
+# but only for entries listed in the backward-compatibility file zone.tab, use
 #	PACKRATDATA=	backzone
+#	PACKRATLIST=	zone.tab
+# If you want all the 'backzone' data, use
+#	PACKRATDATA=	backzone
+#	PACKRATLIST=
 # To omit this data, use
 #	PACKRATDATA=
+#	PACKRATLIST=
 
 PACKRATDATA=
+PACKRATLIST=
 
 # The name of a locale using the UTF-8 encoding, used during self-tests.
 # The tests are skipped if the name does not appear to work on this system.
@@ -524,8 +531,9 @@ TDATA=		$(YDATA) $(NDATA) $(BACKWARD)
 ZONETABLES=	zone1970.tab zone.tab
 TABDATA=	iso3166.tab $(TZDATA_TEXT) $(ZONETABLES)
 LEAP_DEPS=	leapseconds.awk leap-seconds.list
-TZDATA_ZI_DEPS=	ziguard.awk zishrink.awk version $(TDATA) $(PACKRATDATA)
-DSTDATA_ZI_DEPS= ziguard.awk $(TDATA) $(PACKRATDATA)
+TZDATA_ZI_DEPS=	ziguard.awk zishrink.awk version $(TDATA) \
+		  $(PACKRATDATA) $(PACKRATLIST)
+DSTDATA_ZI_DEPS= ziguard.awk $(TDATA) $(PACKRATDATA) $(PACKRATLIST)
 DATA=		$(TDATA_TO_CHECK) backzone iso3166.tab leap-seconds.list \
 			leapseconds $(ZONETABLES)
 AWK_SCRIPTS=	checklinks.awk checktab.awk leapseconds.awk \
@@ -611,13 +619,17 @@ version:	$(VERSION_DEPS)
 		printf '%s\n' "$$V" >$@.out
 		mv $@.out $@
 
-# These files can be tailored by setting BACKWARD and PACKRATDATA.
+# These files can be tailored by setting BACKWARD, PACKRATDATA, PACKRATLIST.
 vanguard.zi main.zi rearguard.zi: $(DSTDATA_ZI_DEPS)
-		$(AWK) -v DATAFORM=`expr $@ : '\(.*\).zi'` -f ziguard.awk \
+		$(AWK) \
+		  -v DATAFORM=`expr $@ : '\(.*\).zi'` \
+		  -v PACKRATDATA='$(PACKRATDATA)' \
+		  -v PACKRATLIST='$(PACKRATLIST)' \
+		  -f ziguard.awk \
 		  $(TDATA) $(PACKRATDATA) >$@.out
 		mv $@.out $@
 # This file has a version comment that attempts to capture any tailoring
-# via BACKWARD, DATAFORM, PACKRATDATA, and REDO.
+# via BACKWARD, DATAFORM, PACKRATDATA, PACKRATLIST, and REDO.
 tzdata.zi:	$(DATAFORM).zi version zishrink.awk
 		version=`sed 1q version` && \
 		  LC_ALL=C $(AWK) \
@@ -655,6 +667,7 @@ INSTALLARGS = \
  DESTDIR='$(DESTDIR)' \
  LEAPSECONDS='$(LEAPSECONDS)' \
  PACKRATDATA='$(PACKRATDATA)' \
+ PACKRATLIST='$(PACKRATLIST)' \
  TZDEFAULT='$(TZDEFAULT)' \
  TZDIR='$(TZDIR)' \
  ZIC='$(ZIC)'
@@ -843,7 +856,8 @@ check_theory.html check_tz-art.html check_tz-how-to.html check_tz-link.html:
 # preserves main-format data.
 check_zishrink: check_zishrink_posix check_zishrink_right
 check_zishrink_posix check_zishrink_right: \
-  zic leapseconds $(PACKRATDATA) $(TDATA) $(DATAFORM).zi tzdata.zi
+  zic leapseconds $(PACKRATDATA) $(PACKRATLIST) \
+  $(TDATA) $(DATAFORM).zi tzdata.zi
 		rm -fr $@.dir $@-t.dir $@-shrunk.dir
 		mkdir $@.dir $@-t.dir $@-shrunk.dir
 		case $@ in \
@@ -852,8 +866,8 @@ check_zishrink_posix check_zishrink_right: \
 		esac && \
 		  $(ZIC) $$leap -d $@.dir $(DATAFORM).zi && \
 		  $(ZIC) $$leap -d $@-shrunk.dir tzdata.zi && \
-		  case $(DATAFORM) in \
-		    main) \
+		  case $(DATAFORM),$(PACKRATLIST) in \
+		    main,) \
 		      $(ZIC) $$leap -d $@-t.dir $(TDATA) && \
 		      $(AWK) '/^Rule/' $(TDATA) | \
 			$(ZIC) $$leap -d $@-t.dir - $(PACKRATDATA) && \
@@ -974,6 +988,10 @@ check_public: $(VERSION_DEPS)
 		rm public.dir/main.zi
 		cd public.dir && $(MAKE) PACKRATDATA=backzone main.zi
 		public.dir/zic -d public.dir/zoneinfo main.zi
+		rm public.dir/main.zi
+		cd public.dir && \
+		  $(MAKE) PACKRATDATA=backzone PACKRATLIST=zone.tab main.zi
+		public.dir/zic -d public.dir/zoneinfo main.zi
 		:
 		rm -fr public.dir
 		touch $@
diff --git a/NEWS b/NEWS
index 34621ed..35f87b6 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,7 @@ Unreleased, experimental changes
     Rename Europe/Kiev to Europe/Kyiv.
     New zic -R option
     Vanguard form now uses %z.
+    New build option PACKRATLIST
     New tailored_tarballs target, replacing rearguard_tarballs
 
   Changes to future timestamps
@@ -71,6 +72,11 @@ Unreleased, experimental changes
     to use %z eventually; in the meantime maintainers of zi parsers
     are encouraged to test the parsers on vanguard.zi.
 
+    The Makefile has a new PACKRATLIST option to select a subset of
+    'backzone'.  For example, 'make PACKRATDATA=backzone
+    PACKRATLIST=zone.tab' now generates TZif files identical to those
+    of the global-tz project.
+
     The Makefile has a new tailored_tarballs target for generating
     special-purpose tarballs.  It generalizes and replaces the
     rearguard_tarballs target and related targets and macros, which
diff --git a/backzone b/backzone
index 84269ad..f384d5d 100644
--- a/backzone
+++ b/backzone
@@ -65,6 +65,11 @@
 # Zones are sorted by zone name.  Each zone is preceded by the
 # name of the country that the zone is in, along with any other
 # commentary and rules associated with the entry.
+# If the zone overrides links in the main data, it
+# is followed by the corresponding Link lines.
+# If the zone overrides main-data links only when building with
+# PACKRATLIST=zone.tab, it is followed by a commented-out Link line
+# that starts with "#PACKRATLIST zone.tab".
 #
 # As explained in the zic man page, the zone columns are:
 # Zone	NAME		STDOFF	RULES	FORMAT	[UNTIL]
@@ -181,6 +186,7 @@ Zone	Africa/Bamako	-0:32:00 -	LMT	1912
 			 0:00	-	GMT	1934 Feb 26
 			-1:00	-	-01	1960 Jun 20
 			 0:00	-	GMT
+#PACKRATLIST zone.tab Link Africa/Bamako Africa/Timbuktu
 
 # Central African Republic
 Zone	Africa/Bangui	1:14:20	-	LMT	1912
@@ -498,6 +504,7 @@ Zone America/Atikokan	-6:06:28 -	LMT	1895
 			-6:00	1:00	CDT	1942 Feb  9  2:00s
 			-6:00	Canada	C%sT	1945 Sep 30  2:00
 			-5:00	-	EST
+#PACKRATLIST zone.tab Link America/Atikokan America/Coral_Harbour
 
 # Quebec east of Natashquan
 
diff --git a/ziguard.awk b/ziguard.awk
index 91f6896..0faacbc 100644
--- a/ziguard.awk
+++ b/ziguard.awk
@@ -55,10 +55,21 @@ BEGIN {
   dataform_type["main"] = 1
   dataform_type["rearguard"] = 1
 
+  if (PACKRATLIST) {
+    while (getline <PACKRATLIST) {
+      if ($0 ~ /^#/) continue
+      packratlist[$3] = 1
+    }
+  }
+
   # The command line should set DATAFORM.
   if (!dataform_type[DATAFORM]) exit 1
 }
 
+$1 == "#PACKRATLIST" && $2 == PACKRATLIST {
+  sub(/^#PACKRATLIST[\t ]+[^\t ]+[\t ]+/, "")
+}
+
 /^Zone/ { zone = $2 }
 
 DATAFORM != "main" {
@@ -208,6 +219,13 @@ DATAFORM != "main" {
   }
 }
 
+/^Zone/ {
+  packrat_ignored = FILENAME == PACKRATDATA && PACKRATLIST && !packratlist[$2];
+}
+packrat_ignored && !/^Rule/ {
+  sub(/^/, "#")
+}
+
 # If a Link line is followed by a Link or Zone line for the same data, comment
 # out the Link line.  This can happen if backzone overrides a Link
 # with a Zone or a different Link.
-- 
2.37.1

