From e5f99199ff4efe0808a5076c2484d5b86193124d Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Fri, 12 Aug 2022 02:00:34 -0700
Subject: [PROPOSED] Improve tzselect behavior for Turkey etc.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix ‘tzselect’ so that you can choose Istanbul regardless of
whether you choose Asia or Europe at the top level, and
similarly for other Zones that straddle continents/oceans.
Inspired by a problem reported by Peter Krefting in:
https://mm.icann.org/pipermail/tz/2022-August/031795.html
* tzselect.ksh (quoted_continents, countries): Parse new #@ lines
and use them to deduce continents and countries better.
* zone1970.tab: Add #@ comments for Zones whose continent
or ocean are not easily deducible.
---
 NEWS         |  5 +++++
 tzselect.ksh | 54 +++++++++++++++++++++++++++++++++++++++++-----------
 zone1970.tab | 24 +++++++++++++++++++++++
 3 files changed, 72 insertions(+), 11 deletions(-)

diff --git a/NEWS b/NEWS
index af99e2b7..9042cd8e 100644
--- a/NEWS
+++ b/NEWS
@@ -7,9 +7,14 @@ Unreleased, experimental changes
     Work around a bug in macOS 'awk' that broke commands like 'make
     traditional_tarballs'.  (Problem reported by Deborah Goldsmith.)
 
+    Add structured comments to zone1970.tab that clarify whether Zones
+    like Europe/Istanbul cross continent or ocean boundaries.
+    (Inspired by a problem reported by Peter Krefting.)
+
     Fix bug with 'zic -d /a/b/c' when /a is unwritable but the
     directory /a/b already exists.
 
+
 Release 2022b - 2022-08-10 15:38:32 -0700
 
   Briefly:
diff --git a/tzselect.ksh b/tzselect.ksh
index 7b6b7895..5741b6ff 100644
--- a/tzselect.ksh
+++ b/tzselect.ksh
@@ -303,15 +303,24 @@ while
 
         quoted_continents=`
 	  $AWK '
+	    function handle_entry(entry) {
+	      entry = substr(entry, 1, index(entry, "/") - 1)
+	      if (entry == "America")
+	       entry = entry "s"
+	      if (entry ~ /^(Arctic|Atlantic|Indian|Pacific)$/)
+	       entry = entry " Ocean"
+	      printf "'\''%s'\''\n", entry
+	    }
 	    BEGIN { FS = "\t" }
 	    /^[^#]/ {
-              entry = substr($3, 1, index($3, "/") - 1)
-              if (entry == "America")
-		entry = entry "s"
-              if (entry ~ /^(Arctic|Atlantic|Indian|Pacific)$/)
-		entry = entry " Ocean"
-              printf "'\''%s'\''\n", entry
+              handle_entry($3)
             }
+	    /^#@/ {
+	      ncont = split($2, cont, /,/)
+	      for (ci = 1; ci <= ncont; ci++) {
+	        handle_entry(cont[ci])
+	      }
+	    }
           ' <"$TZ_ZONE_TABLE" |
 	  sort -u |
 	  tr '\n' ' '
@@ -395,15 +404,37 @@ while
 		*)
 		# Get list of names of countries in the continent or ocean.
 		countries=`$AWK \
-			-v continent="$continent" \
+			-v continent_re="^$continent/" \
 			-v TZ_COUNTRY_TABLE="$TZ_COUNTRY_TABLE" \
 		'
 			BEGIN { FS = "\t" }
-			/^#/ { next }
-			$3 ~ ("^" continent "/") {
-			    ncc = split($1, cc, /,/)
-			    for (i = 1; i <= ncc; i++)
+			/^#$/ { next }
+			/^#[^@]/ { next }
+			{
+			  commentary = $0 ~ /^#@/
+			  if (commentary) {
+			    col1ccs = substr($1, 3)
+			    conts = $2
+			  } else {
+			    col1ccs = $1
+			    conts = $3
+			  }
+			  ncc = split(col1ccs, cc, /,/)
+			  ncont = split(conts, cont, /,/)
+			  for (i = 1; i <= ncc; i++) {
+			    elsewhere = commentary
+			    for (ci = 1; ci <= ncont; ci++) {
+			      if (cont[ci] ~ continent_re) {
 				if (!cc_seen[cc[i]]++) cc_list[++ccs] = cc[i]
+				elsewhere = 0
+			      }
+			    }
+			    if (elsewhere) {
+			      for (i = 1; i <= ncc; i++) {
+			        cc_elsewhere[cc[i]] = 1
+			      }
+			    }
+			  }
 			}
 			END {
 				while (getline <TZ_COUNTRY_TABLE) {
@@ -411,6 +442,7 @@ while
 				}
 				for (i = 1; i <= ccs; i++) {
 					country = cc_list[i]
+					if (cc_elsewhere[country]) continue
 					if (cc_name[country]) {
 					  country = cc_name[country]
 					}
diff --git a/zone1970.tab b/zone1970.tab
index 3e1948cb..19515f7a 100644
--- a/zone1970.tab
+++ b/zone1970.tab
@@ -351,3 +351,27 @@ VN	+1045+10640	Asia/Ho_Chi_Minh	Vietnam (south)
 VU	-1740+16825	Pacific/Efate
 WS	-1350-17144	Pacific/Apia
 ZA,LS,SZ	-2615+02800	Africa/Johannesburg
+#
+# The next section contains experimental tab-separated comments for
+# use by user agents like tzselect that identify continents and oceans.
+#
+# For example, the comment "#@AQ<tab>Antarctica/" means the country code
+# AQ is in the continent Antarctica regardless of the Zone name,
+# so Pacific/Auckland should be listed under Antarctica as well as
+# under the Pacific because its line's country codes include AQ.
+#
+# If more than one country code is affected each is listed separated
+# by commas, e.g., #@IS,SH<tab>Atlantic/".  If a country code is in
+# more than one continent or ocean, each is listed separated by
+# commas, e.g., the second column of "#@CY,TR<tab>Asia/,Europe/".
+#
+# These experimental comments are present only for country codes where
+# the continent or ocean is not already obvious from the Zone name.
+# For example, there is no such comment for RU since it already
+# corresponds to Zone names starting with both "Europe/" and "Asia/".
+#
+#@AQ	Antarctica/
+#@IS,SH	Atlantic/
+#@CY,TR	Asia/,Europe/
+#@SJ	Arctic/
+#@CC,CX,KM,MG,YT	Indian/
-- 
2.34.1

