While using the tz source to test some other program I found an unwanted hair space (U+200A) in the commentary. Fix this, and adjust Makefile to catch this sort of thing in the future. * Makefile (SAFE_CHARSET): Exclude ] and -, as they're now the invoker's responsibility. Invoker changed. (NONSYM_CHAR): Remove, replacing with ... (OK_CHAR): ... this new macro. All uses changed. (NONSYM_LINE, VALID_LINE): Remove, replacing with ... (OK_LINE): ... this new macro. All uses changed. (check_character_set): Simplify test, and report all non-ASCII non-letters. Remove the exception for Makefile, as it no longer needs to contain non-OK characters. * southamerica: Replace an inadvertent hair space with a space. --- Makefile | 30 ++++++++++++++---------------- southamerica | 2 +- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index b398727..c3b23c8 100644 --- a/Makefile +++ b/Makefile @@ -292,23 +292,24 @@ TAB_CHAR= ' ' SAFE_CHARSET1= $(TAB_CHAR)' !\"'$$sharp'$$%&'\''()*+,./0123456789:;<=>?@' SAFE_CHARSET2= 'ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\^_`' SAFE_CHARSET3= 'abcdefghijklmnopqrstuvwxyz{|}~' -SAFE_CHARSET= ]$(SAFE_CHARSET1)$(SAFE_CHARSET2)$(SAFE_CHARSET3)- -SAFE_CHAR= '['$(SAFE_CHARSET)']' -# NONSYM_CHAR is a regular expression that matches any character -# except for a small number of symbols, where we prefer to stick with +SAFE_CHARSET= $(SAFE_CHARSET1)$(SAFE_CHARSET2)$(SAFE_CHARSET3) +SAFE_CHAR= '[]'$(SAFE_CHARSET)'-]' + +# OK_CHAR matches any character allowed in the distributed files. +# This is the same as SAFE_CHAR, except that multibyte letters are +# also allowed so that commentary can contain people's names and quote +# non-English sources. For non-letters the sources are limited to # ASCII renderings for the convenience of maintainers whose text editors # mishandle UTF-8 by default (e.g., XEmacs 21.4.22). -NONSYM_CHAR= '[^–—°′″≈≠≤≥±−×÷∞←→↔·•§¶«»‘’‚‛“”„‟‹›「」『』〝〞〟]' +OK_CHAR= '[][:alpha:]'$(SAFE_CHARSET)'-]' # SAFE_LINE matches a line of safe characters. -# SAFE_SHARP_LINE is similar, except any character can follow '#'; +# SAFE_SHARP_LINE is similar, except any OK character can follow '#'; # this is so that comments can contain non-ASCII characters. -# NONSYM_LINE matches a line of non-symbols. -# VALID_LINE matches a line of any validly-encoded characters. +# OK_LINE matches a line of OK characters. SAFE_LINE= '^'$(SAFE_CHAR)'*$$' -SAFE_SHARP_LINE='^'$(SAFE_CHAR)'*('$$sharp$(NONSYM_CHAR)'*)?$$' -NONSYM_LINE= '^'$(NONSYM_CHAR)'*$$' -VALID_LINE= '^.*$$' +SAFE_SHARP_LINE='^'$(SAFE_CHAR)'*('$$sharp$(OK_CHAR)'*)?$$' +OK_LINE= '^'$(OK_CHAR)'*$$' # Flags to give 'tar' when making a distribution. # Try to use flags appropriate for GNU tar. @@ -475,14 +476,11 @@ check: check_character_set check_white_space check_links check_sorted \ check_character_set: $(ENCHILADA) LC_ALL=en_US.utf8 && export LC_ALL && \ sharp='#' && \ - ! grep -Env $(SAFE_LINE) $(MANS) date.1 $(MANTXTS) \ + ! grep -Env $(SAFE_LINE) Makefile $(MANS) date.1 $(MANTXTS) \ $(MISC) $(SOURCES) $(WEB_PAGES) && \ ! grep -Env $(SAFE_SHARP_LINE) $(TDATA) backzone \ iso3166.tab leapseconds yearistype.sh zone.tab && \ - test $$(grep -Ecv $(SAFE_SHARP_LINE) Makefile) -eq 1 && \ - ! grep -Env $(NONSYM_LINE) CONTRIBUTING NEWS README Theory \ - $(MANS) date.1 zone1970.tab && \ - ! grep -Env $(VALID_LINE) $(ENCHILADA) + ! grep -Env $(OK_LINE) $(ENCHILADA) check_white_space: $(ENCHILADA) ! grep -En ' '$(TAB_CHAR)"|$$(printf '[\f\r\v]')" $(ENCHILADA) diff --git a/southamerica b/southamerica index be63a88..6bbc2c8 100644 --- a/southamerica +++ b/southamerica @@ -30,7 +30,7 @@ # I suggest the use of _Summer time_ instead of the more cumbersome # _daylight-saving time_. _Summer time_ seems to be in general use # in Europe and South America. -# -- E O Cutler, _New York Times_ (1937-02-14), quoted in +# -- E O Cutler, _New York Times_ (1937-02-14), quoted in # H L Mencken, _The American Language: Supplement I_ (1960), p 466 # # Earlier editions of these tables also used the North American style -- 2.1.4