From 081c50f30308b589e7e296f485135d03cf046cb1 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Thu, 4 Aug 2016 18:43:51 -0700
Subject: [PROPOSED PATCH 1/2] Fix bug with Casablanca in late 2037

Problem reported by Alexander Belopolsky in:
http://mm.icann.org/pipermail/tz/2016-July/023895.html
Way forward suggested by him in:
http://mm.icann.org/pipermail/tz/2016-July/023899.html
* NEWS: Document this.
* tzfile.5 (DESCRIPTION): Document that the last transition should
agree with POSIX TZ string.
* zic.c (struct attype): New member dontmerge.
(writezone): Don't merge adjacent duplicates if the latter is
marked with dontmerge.
(outzone): Mark last 'max' transition with dontmerge.
Simplify addition of dummy transition, by using dontmerge.
(addtt): Initialize dontmerge.
---
 NEWS     | 11 +++++++++++
 tzfile.5 | 14 ++++++++++++++
 zic.c    | 28 ++++++++++++----------------
 3 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/NEWS b/NEWS
index 023ee1d..037f779 100644
--- a/NEWS
+++ b/NEWS
@@ -7,8 +7,19 @@ Unreleased, experimental changes
     New leap second 2016-12-31 23:59:60 UTC as per IERS Bulletin C 52.
     (Thanks to Tim Parenti.)
 
+  Changes affecting code
+
+    zic no longer generates binary files containing POSIX TZ-like
+    strings that disagree with the local time type after the last
+    explicit transition in the data.  This fixes a bug with
+    Africa/Casablanca and Africa/El_Aaiun in some year-2037 time
+    stamps on the reference platform.
+
   Changes affecting documentation and commentary
 
+    tzfile.5 now documents the new restriction on POSIX TZ-like
+    strings that is now implemented by zic.
+
     tz-link.htm mentions Time4J (thanks to Meno Hochschild) and
     ThreeTen-Extra, and its description of Java 8 has been brought up
     to date (thanks to Stephen Colebourne).
diff --git a/tzfile.5 b/tzfile.5
index b7e6d45..0c907bc 100644
--- a/tzfile.5
+++ b/tzfile.5
@@ -2,6 +2,13 @@
 .SH NAME
 tzfile \- time zone information
 .SH DESCRIPTION
+.ie '\(lq'' .ds lq \&"\"
+.el .ds lq \(lq\"
+.ie '\(rq'' .ds rq \&"\"
+.el .ds rq \(rq\"
+.de q
+\\$3\*(lq\\$1\*(rq\\$2
+..
 The time zone information files used by
 .IR tzset (3)
 begin with the magic characters "TZif" to identify them as
@@ -138,6 +145,13 @@ POSIX-TZ-environment-variable-style string for use in handling instants
 after the last transition time stored in the file
 (with nothing between the newlines if there is no POSIX representation for
 such instants).
+The POSIX-style string must must agree with the local time type after
+both data's last transition times; for example, given the string
+.q "WET0WEST,M3.5.0,M10.5.0/3"
+then if a last transition time is in July, the transition's local time
+type must specify a daylight-saving time abbreviated
+.q "WEST"
+that is one hour east of UT.
 .PP
 For version-3-format time zone files, the POSIX-TZ-style string may
 use two minor extensions to the POSIX TZ format, as described in
diff --git a/zic.c b/zic.c
index 3eb65d3..2b0193b 100644
--- a/zic.c
+++ b/zic.c
@@ -355,6 +355,7 @@ static const int	len_years[2] = {
 
 static struct attype {
 	zic_t		at;
+	bool		dontmerge;
 	unsigned char	type;
 } *			attypes;
 static zic_t		gmtoffs[TZ_MAX_TYPES];
@@ -1664,8 +1665,9 @@ writezone(const char *const name, const char *const string, char version)
 						attypes[fromi].type;
 					continue;
 			}
-			if (toi == 0 ||
-				attypes[toi - 1].type != attypes[fromi].type)
+			if (toi == 0
+			    || attypes[fromi].dontmerge
+			    || attypes[toi - 1].type != attypes[fromi].type)
 					attypes[toi++] = attypes[fromi];
 		}
 		timecnt = toi;
@@ -2288,6 +2290,7 @@ outzone(const struct zone *zpfirst, int zonecount)
 	register int			compat;
 	register bool			do_extend;
 	register char			version;
+	struct attype *lastatmax = NULL;
 
 	max_abbr_len = 2 + max_format_len + max_abbrvar_len;
 	max_envvar_len = 2 * max_abbr_len + 5 * 9;
@@ -2520,6 +2523,9 @@ outzone(const struct zone *zpfirst, int zonecount)
 				offset = oadd(zp->z_gmtoff, rp->r_stdoff);
 				type = addtype(offset, ab, rp->r_stdoff != 0,
 					rp->r_todisstd, rp->r_todisgmt);
+				if (rp->r_hiyear == ZIC_MAX
+				    && ! (lastatmax && ktime < lastatmax->at))
+				  lastatmax = &attypes[timecnt];
 				addtt(ktime, type);
 			}
 		}
@@ -2551,6 +2557,8 @@ error(_("can't determine time zone abbreviation to use just after until time"));
 				starttime = tadd(starttime, -gmtoff);
 		}
 	}
+	if (lastatmax)
+	  lastatmax->dontmerge = true;
 	if (do_extend) {
 		/*
 		** If we're extending the explicitly listed observations
@@ -2572,21 +2580,8 @@ error(_("can't determine time zone abbreviation to use just after until time"));
 			if (attypes[i].at > lastat->at)
 				lastat = &attypes[i];
 		if (lastat->at < rpytime(&xr, max_year - 1)) {
-			/*
-			** Create new type code for the redundant entry,
-			** to prevent it being optimized away.
-			*/
-			if (typecnt >= TZ_MAX_TYPES) {
-				error(_("too many local time types"));
-				exit(EXIT_FAILURE);
-			}
-			gmtoffs[typecnt] = gmtoffs[lastat->type];
-			isdsts[typecnt] = isdsts[lastat->type];
-			ttisstds[typecnt] = ttisstds[lastat->type];
-			ttisgmts[typecnt] = ttisgmts[lastat->type];
-			abbrinds[typecnt] = abbrinds[lastat->type];
-			++typecnt;
 			addtt(rpytime(&xr, max_year + 1), typecnt-1);
+			attypes[timecnt - 1].dontmerge = true;
 		}
 	}
 	writezone(zpfirst->z_name, envvar, version);
@@ -2614,6 +2609,7 @@ addtt(zic_t starttime, int type)
 	}
 	attypes = growalloc(attypes, sizeof *attypes, timecnt, &timecnt_alloc);
 	attypes[timecnt].at = starttime;
+	attypes[timecnt].dontmerge = false;
 	attypes[timecnt].type = type;
 	++timecnt;
 }
-- 
2.7.4

