>From 4a6a2d26b519cedc4af6782dbddcd84c2879c6db Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Fri, 8 Mar 2019 21:29:27 -0800
Subject: [PROPOSED] zic: new -r option

(Inspired by a feature request from Christopher Wong.)
* Makefile (ZFLAGS), NEWS, zic.8, zic.c (usage): Mention it.
* zic.8: Remove documentation for old -s option, which
no longer was supported anyway.
* zic.c (lo_time, hi_time): New static vars.
(timerange_option): New function.
(main): Use it to parse -r.
(struct timerange): New type.
(limitrange): New function.
(writezone): Use it to support -r.
(stringzone): Do not output a TZ string if the output
is truncated on the high end; see Internet RFC
---
 Makefile |   3 +
 NEWS     |  10 +++
 zic.8    |  29 ++++++--
 zic.c    | 200 ++++++++++++++++++++++++++++++++++++++-----------------
 4 files changed, 177 insertions(+), 65 deletions(-)

diff --git a/Makefile b/Makefile
index f88c913..4bc72a9 100644
--- a/Makefile
+++ b/Makefile
@@ -362,6 +362,9 @@ LEAPSECONDS=
 zic=		./zic
 ZIC=		$(zic) $(ZFLAGS)
 
+# To shrink the size of installed TZif files,
+# append "-r @N" to omit data before N-seconds-after-the-Epoch.
+# See the zic man page for more about -r.
 ZFLAGS=
 
 # How to use zic to install TZif files.
diff --git a/NEWS b/NEWS
index d288c6d..9ad46b0 100644
--- a/NEWS
+++ b/NEWS
@@ -24,6 +24,16 @@ Unreleased, experimental changes
     which nowadays is typically a typo.  (Problem reported by Isiah
     Meadows.)
 
+  Changes to code
+
+    zic now has an -r option to limit the time range of output data.
+    For example, 'zic -r @1000000000' limits the output data to
+    timestamps starting 1000000000 seconds after the Epoch.
+    This helps shrink output size and can be useful for applications
+    not needing the full timestamp history, such as TZDIST truncation;
+    see Internet RFC 8536 section 5.1.  (Inspired by a feature request
+    from Christopher Wong.)
+
   Changes to documentation
 
     Mention Internet RFC 8536 (February 2019), which documents TZif.
diff --git a/zic.8 b/zic.8
index 590e88a..5ca7079 100644
--- a/zic.8
+++ b/zic.8
@@ -79,6 +79,30 @@ Read leap second information from the file with the given name.
 If this option is not used,
 no leap second information appears in output files.
 .TP
+.BR "\*-r " "[\fB@\fP\fIlo\fP][\fB/@\fP\fIhi\fP]"
+Reduce the size of output files by limiting their applicability
+to timestamps in the range from
+.I lo
+through
+.IR hi ,
+where
+.I lo
+and
+.I hi
+are possibly-signed decimal counts of seconds since the Epoch
+(1970-01-01 00:00:00 UTC).
+Omitted counts default to extreme values.
+For example,
+.q "zic \*-r @0"
+omits data intended for negative timestamps (i.e., before the Epoch), and
+.q "zic \*-r @0/@2147483647"
+outputs data intended only for nonnegative timestamps that fit into
+31-bit signed integers.
+On platforms with GNU
+.BR date ,
+.q "zic \-r @$(date +%s)"
+omits data intended for past timestamps.
+.TP
 .B \*-v
 Be more verbose, and complain about the following situations:
 .RS
@@ -125,11 +149,6 @@ or it contains a file name component that contains more than 14 bytes
 or that starts with
 .q "\*-" .
 .RE
-.TP
-.B \*-s
-Limit time values stored in output files to values that are the same
-whether they're taken to be signed or unsigned.
-You can use this option to generate SVVS-compatible files.
 .SH FILES
 Input files use the format described in this section; output files use
 .IR tzfile (5)
diff --git a/zic.c b/zic.c
index 2ebc66a..e6b4bd2 100644
--- a/zic.c
+++ b/zic.c
@@ -575,7 +575,8 @@ usage(FILE *stream, int status)
   fprintf(stream,
 	  _("%s: usage is %s [ --version ] [ --help ] [ -v ] \\\n"
 	    "\t[ -l localtime ] [ -p posixrules ] [ -d directory ] \\\n"
-	    "\t[ -t localtime-link ] [ -L leapseconds ] [ filename ... ]\n\n"
+	    "\t[ -t localtime-link ] [ -L leapseconds ] [ -r '[@lo][/@hi]' ] \\\n"
+	    "\t[ filename ... ]\n\n"
 	    "Report bugs to %s.\n"),
 	  progname, progname, REPORT_BUGS_TO);
   if (status == EXIT_SUCCESS)
@@ -603,6 +604,38 @@ change_directory (char const *dir)
   }
 }
 
+#define TIME_T_BITS_IN_FILE 64
+static zic_t const min_time = MINVAL(zic_t, TIME_T_BITS_IN_FILE);
+static zic_t const max_time = MAXVAL(zic_t, TIME_T_BITS_IN_FILE);
+static zic_t lo_time = MINVAL(zic_t, TIME_T_BITS_IN_FILE);
+static zic_t hi_time = MAXVAL(zic_t, TIME_T_BITS_IN_FILE);
+
+/* Set the time range of the output to TIMERANGE.
+   Return true if successful.  */
+static bool
+timerange_option(char *timerange)
+{
+  intmax_t lo = min_time, hi = max_time;
+  char *lo_end = timerange, *hi_end;
+  bool badrange = false;
+  if (*timerange == '@') {
+    lo = strtoimax (timerange + 1, &lo_end, 10);
+    if (lo_end == timerange + 1)
+      badrange = true;
+  }
+  hi_end = lo_end;
+  if (lo_end[0] == '/' && lo_end[1] == '@') {
+    hi = strtoimax (lo_end + 2, &hi_end, 10);
+    if (hi_end == lo_end + 2)
+      badrange = true;
+  }
+  if (badrange || *hi_end || hi < lo)
+    return false;
+  lo_time = lo < min_time ? min_time : lo <= max_time ? lo : max_time;
+  hi_time = hi < min_time ? min_time : hi <= max_time ? hi : max_time;
+  return true;
+}
+
 static const char *	psxrules;
 static const char *	lcltime;
 static const char *	directory;
@@ -615,6 +648,7 @@ main(int argc, char **argv)
 {
 	register int c, k;
 	register ptrdiff_t i, j;
+	bool timerange_given = false;
 
 #ifdef S_IWGRP
 	umask(umask(S_IWGRP | S_IWOTH) | (S_IWGRP | S_IWOTH));
@@ -640,7 +674,7 @@ main(int argc, char **argv)
 		} else if (strcmp(argv[k], "--help") == 0) {
 			usage(stdout, EXIT_SUCCESS);
 		}
-	while ((c = getopt(argc, argv, "d:l:L:p:st:vy:")) != EOF && c != -1)
+	while ((c = getopt(argc, argv, "d:l:L:p:r:st:vy:")) != EOF && c != -1)
 		switch (c) {
 			default:
 				usage(stderr, EXIT_FAILURE);
@@ -708,6 +742,21 @@ _("%s: More than one -L option specified\n"),
 			case 'v':
 				noise = true;
 				break;
+			case 'r':
+				if (timerange_given) {
+				  fprintf(stderr,
+_("%s: More than one -r option specified\n"),
+					  progname);
+				  return EXIT_FAILURE;
+				}
+				if (! timerange_option(optarg)) {
+				  fprintf(stderr,
+_("%s: invalid time range: %s\n"),
+					  progname, optarg);
+				  return EXIT_FAILURE;
+				}
+				timerange_given = true;
+				break;
 			case 's':
 				warning(_("-s ignored"));
 				break;
@@ -961,11 +1010,6 @@ dolink(char const *fromfield, char const *tofield, bool staysymlink)
 	}
 }
 
-#define TIME_T_BITS_IN_FILE	64
-
-static zic_t const min_time = MINVAL(zic_t, TIME_T_BITS_IN_FILE);
-static zic_t const max_time = MAXVAL(zic_t, TIME_T_BITS_IN_FILE);
-
 /* Return true if NAME is a directory.  */
 static bool
 itsdir(char const *name)
@@ -1742,14 +1786,43 @@ swaptypes(int i, int j)
   { bool t = ttisgmts[i]; ttisgmts[i] = ttisgmts[j]; ttisgmts[j] = t; }
 }
 
+struct timerange {
+  int defaulttype;
+  ptrdiff_t base, count;
+  int leapbase, leapcount;
+};
+
+static struct timerange
+limitrange(struct timerange r, zic_t lo, zic_t hi,
+	   zic_t const *ats, unsigned char const *types)
+{
+  while (0 < r.count && hi < ats[r.base + r.count - 1])
+    r.count--;
+  while (1 < r.count && ats[r.base] < lo && ats[r.base + 1] <= lo) {
+    /* Discard too-low transitions, except keep any last too-low
+       transition if no transition is exactly at LO.  The kept
+       transition will be output as a LO "transition" appropriate for
+       buggy clients that do not use time type 0 for timestamps before
+       the first transition; see "Output a LO_TIME transition" below.  */
+    r.count--;
+    r.base++;
+    r.defaulttype = types[r.base - (ats[r.base] == lo)];
+  }
+  while (0 < r.leapcount && hi < trans[r.leapbase + r.leapcount - 1])
+    r.leapcount--;
+  while (0 < r.leapcount && trans[r.leapbase] < lo) {
+    r.leapcount--;
+    r.leapbase++;
+  }
+  return r;
+}
+
 static void
 writezone(const char *const name, const char *const string, char version,
 	  int defaulttype)
 {
 	register FILE *			fp;
 	register ptrdiff_t		i, j;
-	register int			leapcnt32, leapi32;
-	register ptrdiff_t		timecnt32, timei32;
 	register int			pass;
 	static const struct tzhead	tzh0;
 	static struct tzhead		tzh;
@@ -1764,6 +1837,7 @@ writezone(const char *const name, const char *const string, char version,
 				      _Alignof(zic_t)));
 	void *typesptr = ats + nats;
 	unsigned char *types = typesptr;
+	struct timerange rangeall, range32, range64;
 
 	/*
 	** Sort.
@@ -1839,32 +1913,13 @@ writezone(const char *const name, const char *const string, char version,
 	  timecnt++;
 	}
 
-	/*
-	** Figure out 32-bit-limited starts and counts.
-	*/
-	timecnt32 = timecnt;
-	timei32 = 0;
-	leapcnt32 = leapcnt;
-	leapi32 = 0;
-	while (0 < timecnt32 && INT32_MAX < ats[timecnt32 - 1])
-		--timecnt32;
-	while (1 < timecnt32 && ats[timei32] < INT32_MIN
-	       && ats[timei32 + 1] <= INT32_MIN) {
-		/* Discard too-low transitions, except keep any last too-low
-		   transition if no transition is exactly at INT32_MIN.
-		   The kept transition will be output as an INT32_MIN
-		   "transition" appropriate for buggy 32-bit clients that do
-		   not use time type 0 for timestamps before the first
-		   transition; see below.  */
-		--timecnt32;
-		++timei32;
-	}
-	while (0 < leapcnt32 && INT32_MAX < trans[leapcnt32 - 1])
-		--leapcnt32;
-	while (0 < leapcnt32 && trans[leapi32] < INT32_MIN) {
-		--leapcnt32;
-		++leapi32;
-	}
+	rangeall.defaulttype = defaulttype;
+	rangeall.base = rangeall.leapbase = 0;
+	rangeall.count = timecnt;
+	rangeall.leapcount = leapcnt;
+	range64 = limitrange(rangeall, lo_time, hi_time, ats, types);
+	range32 = limitrange(range64, INT32_MIN, INT32_MAX, ats, types);
+
 	/*
 	** Remove old file, if any, to snap links.
 	*/
@@ -1894,6 +1949,8 @@ writezone(const char *const name, const char *const string, char version,
 	for (pass = 1; pass <= 2; ++pass) {
 		register ptrdiff_t thistimei, thistimecnt, thistimelim;
 		register int	thisleapi, thisleapcnt, thisleaplim;
+		int thisdefaulttype;
+		zic_t lo;
 		int old0;
 		char		omittype[TZ_MAX_TYPES];
 		int		typemap[TZ_MAX_TYPES];
@@ -1904,33 +1961,47 @@ writezone(const char *const name, const char *const string, char version,
 		int		indmap[TZ_MAX_CHARS];
 
 		if (pass == 1) {
-			thistimei = timei32;
-			thistimecnt = timecnt32;
+			/* Arguably the default time type in the 32-bit data
+			   should be range32.defaulttype, which is suited for
+			   timestamps just before INT32_MIN.  However, zic
+			   traditionally used the time type of the indefinite
+			   past instead.  Internet RFC 8532 says readers should
+			   ignore 32-bit data, so this discrepancy matters only
+			   to obsolete readers where the traditional type might
+			   be more appropriate even if it's "wrong".  So, use
+			   the historical zic value, unless -r specifies a low
+			   cutoff that excludes some 32-bit timestamps.  */
+			thisdefaulttype = (lo_time <= INT32_MIN
+					   ? range64.defaulttype
+					   : range32.defaulttype);
+
+			thistimei = range32.base;
+			thistimecnt = range32.count;
 			toomanytimes = thistimecnt >> 31 >> 1 != 0;
-			thisleapi = leapi32;
-			thisleapcnt = leapcnt32;
+			thisleapi = range32.leapbase;
+			thisleapcnt = range32.leapcount;
 		} else {
-			thistimei = 0;
-			thistimecnt = timecnt;
+			thistimei = range64.base;
+			thistimecnt = range64.count;
 			toomanytimes = thistimecnt >> 31 >> 31 >> 2 != 0;
-			thisleapi = 0;
-			thisleapcnt = leapcnt;
+			thisleapi = range64.leapbase;
+			thisleapcnt = range64.leapcount;
 		}
 		if (toomanytimes)
 		  error(_("too many transition times"));
 		thistimelim = thistimei + thistimecnt;
 		thisleaplim = thisleapi + thisleapcnt;
 		memset(omittype, true, typecnt);
-		omittype[defaulttype] = false;
+		omittype[thisdefaulttype] = false;
 		for (i = thistimei; i < thistimelim; i++)
 		  omittype[types[i]] = false;
 
-		/* Reorder types to make DEFAULTTYPE type 0.
-		   Use TYPEMAP to swap OLD0 and DEFAULTTYPE so that
-		   DEFAULTTYPE appears as type 0 in the output instead
+		/* Reorder types to make THISDEFAULTTYPE type 0.
+		   Use TYPEMAP to swap OLD0 and THISDEFAULTTYPE so that
+		   THISDEFAULTTYPE appears as type 0 in the output instead
 		   of OLD0.  TYPEMAP also omits unused types.  */
 		old0 = strlen(omittype);
-		swaptypes(old0, defaulttype);
+		swaptypes(old0, thisdefaulttype);
 
 #ifndef LEAVE_SOME_PRE_2011_SYSTEMS_IN_THE_LURCH
 		/*
@@ -1982,8 +2053,8 @@ writezone(const char *const name, const char *const string, char version,
 		thistypecnt = 0;
 		for (i = old0; i < typecnt; i++)
 		  if (!omittype[i])
-		    typemap[i == old0 ? defaulttype
-			    : i == defaulttype ? old0 : i]
+		    typemap[i == old0 ? thisdefaulttype
+			    : i == thisdefaulttype ? old0 : i]
 		      = thistypecnt++;
 
 		for (i = 0; i < sizeof indmap / sizeof indmap[0]; ++i)
@@ -2026,15 +2097,18 @@ writezone(const char *const name, const char *const string, char version,
 		DO(tzh_typecnt);
 		DO(tzh_charcnt);
 #undef DO
-		for (i = thistimei; i < thistimelim; ++i)
-			if (pass == 1)
-				/*
-				** Output an INT32_MIN "transition"
-				** if appropriate; see above.
-				*/
-				puttzcode(((ats[i] < INT32_MIN) ?
-					INT32_MIN : ats[i]), fp);
-			else	puttzcode64(ats[i], fp);
+		/* Output a LO_TIME transition if needed; see limitrange.
+		   But do not go below the minimum representable value
+		   for this pass.  */
+		lo = pass == 1 && lo_time < INT32_MIN ? INT32_MIN : lo_time;
+
+		for (i = thistimei; i < thistimelim; ++i) {
+		  zic_t at = ats[i] < lo ? lo : ats[i];
+		  if (pass == 1)
+		    puttzcode(at, fp);
+		  else
+		    puttzcode64(at, fp);
+		}
 		for (i = thistimei; i < thistimelim; ++i) {
 			unsigned char	uc;
 
@@ -2081,7 +2155,7 @@ writezone(const char *const name, const char *const string, char version,
 		for (i = old0; i < typecnt; i++)
 			if (!omittype[i])
 				putc(ttisgmts[i], fp);
-		swaptypes(old0, defaulttype);
+		swaptypes(old0, thisdefaulttype);
 	}
 	fprintf(fp, "\n%s\n", string);
 	close_file(fp, directory, name);
@@ -2301,6 +2375,12 @@ stringzone(char *result, struct zone const *zpfirst, ptrdiff_t zonecount)
 	struct rule			stdr, dstr;
 
 	result[0] = '\0';
+
+	/* Internet RFC 8536 section 5.1 says to use an empty TZ string if
+	   future timestamps are truncated.  */
+	if (hi_time < max_time)
+	  return -1;
+
 	zp = zpfirst + zonecount - 1;
 	stdrp = dstrp = NULL;
 	for (i = 0; i < zp->z_nrules; ++i) {
-- 
2.17.1

