diff --git a/NEWS b/NEWS index 1ee5813..5e5ba8e 100644 --- a/NEWS +++ b/NEWS @@ -15,10 +15,17 @@ Unreleased, experimental changes Error diagnostics of 'zic' and 'yearistype' have been reworded so that they no longer use ASCII '-' as if it were a dash. + 'zic -v' now warns about output file names that do not follow POSIX rules, + or that contain a digit or a file name component of '.' or '..'. + (Thanks to Arthur David Olson for starting the ball rolling on this.) + Some lint has been removed when using GCC_DEBUG_FLAGS with GCC 4.9.0. Changes affecting documentation and commentary + The 'Theory' file documents the longstanding exceptions to the + POSIX file name rules that are in 'etcetera' and 'backward'. + Documentation and commentary now prefer UTF-8 to US-ASCII, allowing the use of proper accents in foreign words and names. Code and data have not changed because of this. diff --git a/Theory b/Theory index ce43b60..c31731a 100644 --- a/Theory +++ b/Theory @@ -405,7 +405,8 @@ in decreasing order of importance: digits, as that might create an ambiguity with POSIX TZ strings. A file name component must not exceed 14 characters or start with '-'. E.g., prefer 'Brunei' - to 'Bandar_Seri_Begawan'. + to 'Bandar_Seri_Begawan'. Exceptions: see the discussion + of the 'etcetera' file below. A name must not be empty, or contain '//', or start or end with '/'. Do not use names that differ only in case. Although the reference implementation is case-sensitive, some other implementations @@ -464,11 +465,21 @@ longitude, this relationship is not exact. Older versions of this package used a different naming scheme, and these older names are still supported. See the file 'backward' for most of these older names -(e.g. 'US/Eastern' instead of 'America/New_York'); -excluding 'backward' should not affect the other data. +(e.g., 'US/Eastern' instead of 'America/New_York'). The other old-fashioned names still supported are 'WET', 'CET', 'MET', and 'EET' (see the file 'europe'). +Older versions of this package defined names that were +incompatible with POSIX. These older names are still supported, +even though they do not conform to first rule of location names. +These incompatible names are mostly defined in the file 'etcetera'. +Also, the file 'backward' defines the incompatible names 'GMT0', +'GMT-0', 'GMT+0', and 'Canada/East-Saskatchewan'. + +Excluding 'backward' should not affect the other data. If +'backward' is excluded, excluding 'etcetera' should not affect the +remaining data. + ----- Time zone abbreviations ----- diff --git a/zic.8 b/zic.8 index cfe0ad4..95dd038 100644 --- a/zic.8 +++ b/zic.8 @@ -112,6 +112,20 @@ before 1970 or after the start of 2038. .PP A time zone abbreviation has fewer than 3 characters. POSIX requires at least 3. +.PP +An output file name contains a byte that is not an ASCII letter, digit, +.q "-" , +.q "." , +.q "/" , +or +.q "_" ; +or it contains a file name component that contains more than 14 bytes +or that starts with +.q "-" +or is +.q "." +or +.q ".." . .RE .TP .B \-s diff --git a/zic.c b/zic.c index 64d6781..62c5fd5 100644 --- a/zic.c +++ b/zic.c @@ -622,11 +622,58 @@ _("%s: More than one -L option specified\n"), } static void +componentcheck(char const *name, char const *component, + char const *component_end) +{ + enum { component_len_max = 14 }; + size_t component_len = component_end - component; + if (0 < component_len && component[0] == '-') + warning(_("file name '%s' component contains leading '-'"), + name); + if (0 < component_len && component_len <= 2 + && component[0] == '.' && component_end[-1] == '.') + warning(_("file name '%s' contains '%.*s' component"), + name, (int) component_len, component); + if (component_len_max < component_len) + warning(_("file name '%s' contains overlength component" + " '%.*s...'"), + name, component_len_max, component); +} + +static void +namecheck(const char *name) +{ + register char const *cp; + static char const benign[] = ("-./_" + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + register char const *component = name; + if (!noise) + return; + for (cp = name; *cp; cp++) { + unsigned char c = *cp; + if (!strchr(benign, c)) { + warning((isascii(c) && isprint(c) + ? _("file name '%s' contains byte '%c'") + : _("file name '%s' contains byte '\\%o'")), + name, c); + return; + } + if (c == '/') { + componentcheck(name, component, cp); + component = cp + 1; + } + } + componentcheck(name, component, cp); +} + +static void dolink(const char *const fromfield, const char *const tofield) { register char * fromname; register char * toname; + namecheck(tofield); if (fromfield[0] == '/') fromname = ecpyalloc(fromfield); else { @@ -1495,6 +1542,7 @@ writezone(const char *const name, const char *const string, char version) void *typesptr = ats + timecnt; unsigned char *types = typesptr; + namecheck(name); /* ** Sort. */