Re: Strftime's %C and %y formats versus wide-ranging tm_year values
OK, here's a proposed patch to the tz code. It is relative to the latest published code on elsie. It incorporates most of Arthur's patch circulated yesterday, except it addresses the issues I mentioned with C99IPMOD, with floating point, and with integer overflow checking; the resulting code uses int arithmetic instead of assuming that long and double are wider than int. =================================================================== RCS file: RCS/tzfile.h,v retrieving revision 1997.9 retrieving revision 1997.9.0.3 diff -pu -r1997.9 -r1997.9.0.3 --- tzfile.h 1997/12/29 14:31:51 1997.9 +++ tzfile.h 2004/10/06 22:25:15 1997.9.0.3 @@ -21,7 +21,7 @@ #ifndef lint #ifndef NOID -static char tzfilehid[] = "@(#)tzfile.h 7.14"; +static char tzfilehid[] = "@(#)tzfile.h 7.15"; #endif /* !defined NOID */ #endif /* !defined lint */ @@ -156,12 +156,21 @@ struct tzhead { #define EPOCH_YEAR 1970 #define EPOCH_WDAY TM_THURSDAY +#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0)) + /* -** Accurate only for the past couple of centuries; -** that will probably do. +** Since everything in isleap is modulo 400 (or a factor of 400), we know that +** isleap(y) == isleap(y % 400) +** and so +** isleap(a + b) == isleap((a + b) % 400) +** or +** isleap(a + b) == isleap(a % 400 + b % 400) +** This is true even if % means modulo rather than Fortran remainder +** (which is allowed by C89 but not C99). +** We use this to avoid addition overflow problems. */ -#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0)) +#define isleap_sum(a, b) isleap((a) % 400 + (b) % 400) #ifndef USG =================================================================== RCS file: RCS/asctime.c,v retrieving revision 2004.3 retrieving revision 2004.3.0.2 diff -pu -r2004.3 -r2004.3.0.2 --- asctime.c 2004/08/11 15:59:06 2004.3 +++ asctime.c 2004/10/06 23:03:13 2004.3.0.2 @@ -5,7 +5,7 @@ #ifndef lint #ifndef NOID -static char elsieid[] = "@(#)asctime.c 7.22"; +static char elsieid[] = "@(#)asctime.c 7.23"; #endif /* !defined NOID */ #endif /* !defined lint */ @@ -15,8 +15,8 @@ static char elsieid[] = "@(#)asctime.c 7 #include "tzfile.h" #if STRICTLY_STANDARD_ASCTIME -#define ASCTIME_FMT "%.3s %.3s%3d %.2d:%.2d:%.2d %ld\n" -#define ASCTIME_FMT_B ASCTIME_FMT +#define ASCTIME_FMT "%.3s %.3s%3d %.2d:%.2d:%.2d %d\n" +#define ASCTIME_FMT_B "%.3s %.3s%3d %.2d:%.2d:%.2d %d%d\n" #else /* !STRICTLY_STANDARD_ASCTIME */ /* ** Some systems only handle "%.2d"; others only handle "%02d"; @@ -31,14 +31,14 @@ static char elsieid[] = "@(#)asctime.c 7 ** For years that are less than four digits, we pad the output with ** spaces before the newline to get the newline in the traditional place. */ -#define ASCTIME_FMT "%.3s %.3s%3d %02.2d:%02.2d:%02.2d %-4ld\n" +#define ASCTIME_FMT "%.3s %.3s%3d %02.2d:%02.2d:%02.2d %-4d\n" /* ** For years that are more than four digits we put extra spaces before the year ** so that code trying to overwrite the newline won't end up overwriting ** a digit within a year and truncating the year (operating on the assumption ** that no output is better than wrong output). */ -#define ASCTIME_FMT_B "%.3s %.3s%3d %02.2d:%02.2d:%02.2d %ld\n" +#define ASCTIME_FMT_B "%.3s %.3s%3d %02.2d:%02.2d:%02.2d %d%d\n" #endif /* !STRICTLY_STANDARD_ASCTIME */ #define STD_ASCTIME_BUF_SIZE 26 @@ -74,7 +74,8 @@ char * buf; }; register const char * wn; register const char * mn; - long year; + int decade; + int y; char result[MAX_ASCTIME_BUF_SIZE]; if (timeptr->tm_wday < 0 || timeptr->tm_wday >= DAYSPERWEEK) @@ -83,16 +84,34 @@ char * buf; if (timeptr->tm_mon < 0 || timeptr->tm_mon >= MONSPERYEAR) mn = "???"; else mn = mon_name[timeptr->tm_mon]; - year = timeptr->tm_year + (long) TM_YEAR_BASE; /* ** We avoid using snprintf since it's not available on all systems. */ - (void) sprintf(result, - ((year >= -999 && year <= 9999) ? ASCTIME_FMT : ASCTIME_FMT_B), - wn, mn, - timeptr->tm_mday, timeptr->tm_hour, - timeptr->tm_min, timeptr->tm_sec, - year); + if (-999 - TM_YEAR_BASE <= timeptr->tm_year + && timeptr->tm_year <= 9999 - TM_YEAR_BASE) { + (void) sprintf(result, + ASCTIME_FMT, + wn, mn, + timeptr->tm_mday, timeptr->tm_hour, + timeptr->tm_min, timeptr->tm_sec, + timeptr->tm_year + TM_YEAR_BASE); + } else { + decade = timeptr->tm_year / 10 + TM_YEAR_BASE / 10; + y = timeptr->tm_year % 10; + if (y < 0 && 0 < decade) { + y += 10; + decade--; + } else if (decade < 0 && 0 < y) { + y -= 10; + decade++; + } + (void) sprintf(result, + ASCTIME_FMT_B, + wn, mn, + timeptr->tm_mday, timeptr->tm_hour, + timeptr->tm_min, timeptr->tm_sec, + decade, y < 0 ? -y : y); + } if (strlen(result) < STD_ASCTIME_BUF_SIZE || buf == buf_asctime) { (void) strcpy(buf, result); return buf; =================================================================== RCS file: RCS/strftime.c,v retrieving revision 2004.4 retrieving revision 2004.4.0.4 diff -pu -r2004.4 -r2004.4.0.4 --- strftime.c 2004/09/09 15:48:53 2004.4 +++ strftime.c 2004/10/06 23:03:13 2004.4.0.4 @@ -1,12 +1,6 @@ -/* -** XXX To do: figure out correct (as distinct from standard-mandated) -** output for "two digits of year" and "century" formats when -** the year is negative or less than 100. --ado, 2004-09-09 -*/ - #ifndef lint #ifndef NOID -static char elsieid[] = "@(#)strftime.c 7.67"; +static char elsieid[] = "@(#)strftime.c 7.70"; /* ** Based on the UCB version with the ID appearing below. ** This is ANSIish only when "multibyte character == plain character". @@ -114,10 +108,8 @@ static const struct lc_time_T C_time_loc static char * _add P((const char *, char *, const char *)); static char * _conv P((int, const char *, char *, const char *)); -static char * _lconv P((long, const char *, char *, const char *)); static char * _fmt P((const char *, const struct tm *, char *, const char *, int *)); - -size_t strftime P((char *, size_t, const char *, const struct tm *)); +static char * _yconv P((int, int, int, int, char *, const char *)); extern char * tzname[]; @@ -125,7 +117,6 @@ extern char * tzname[]; #define YEAR_2000_NAME "CHECK_STRFTIME_FORMATS_FOR_TWO_DIGIT_YEARS" #endif /* !defined YEAR_2000_NAME */ - #define IN_NONE 0 #define IN_SOME 1 #define IN_THIS 2 @@ -217,9 +208,8 @@ label: ** something completely different. ** (ado, 1993-05-24) */ - pt = _conv((int) ((t->tm_year + - (long) TM_YEAR_BASE) / 100), - "%02d", pt, ptlim); + pt = _yconv(t->tm_year, TM_YEAR_BASE, 1, 0, + pt, ptlim); continue; case 'c': { @@ -387,13 +377,14 @@ label: ** (ado, 1996-01-02) */ { - long year; + int year; + int base; int yday; int wday; int w; year = t->tm_year; - year += TM_YEAR_BASE; + base = TM_YEAR_BASE; yday = t->tm_yday; wday = t->tm_wday; for ( ; ; ) { @@ -401,7 +392,7 @@ label: int bot; int top; - len = isleap(year) ? + len = isleap_sum(year, base) ? DAYSPERLYEAR : DAYSPERNYEAR; /* @@ -420,7 +411,7 @@ label: top += DAYSPERWEEK; top += len; if (yday >= top) { - ++year; + ++base; w = 1; break; } @@ -429,8 +420,8 @@ label: DAYSPERWEEK); break; } - --year; - yday += isleap(year) ? + --base; + yday += isleap_sum(year, base) ? DAYSPERLYEAR : DAYSPERNYEAR; } @@ -444,12 +435,12 @@ label: if (*format == 'V') pt = _conv(w, "%02d", pt, ptlim); - else if (*format == 'g') { - *warnp = IN_ALL; - pt = _conv(int(year % 100), - "%02d", pt, ptlim); - } else pt = _lconv(year, "%04ld", - pt, ptlim); + else if (*format == 'g') { + *warnp = IN_ALL; + pt = _yconv(year, base, 0, 1, + pt, ptlim); + } else pt = _yconv(year, base, 1, 1, + pt, ptlim); } continue; case 'v': @@ -484,15 +475,14 @@ label: *warnp = warn2; } continue; - case 'y': - *warnp = IN_ALL; - pt = _conv((int) ((t->tm_year + - (long) TM_YEAR_BASE) % 100), - "%02d", pt, ptlim); - continue; - case 'Y': - pt = _lconv(t->tm_year + (long) TM_YEAR_BASE, - "%04ld", pt, ptlim); + case 'y': + *warnp = IN_ALL; + pt = _yconv(t->tm_year, TM_YEAR_BASE, 0, 1, + pt, ptlim); + continue; + case 'Y': + pt = _yconv(t->tm_year, TM_YEAR_BASE, 1, 1, + pt, ptlim); continue; case 'Z': #ifdef TM_ZONE @@ -556,9 +546,10 @@ label: diff = -diff; } else sign = "+"; pt = _add(sign, pt, ptlim); - diff /= 60; - pt = _conv((diff/60)*100 + diff%60, - "%04d", pt, ptlim); + diff /= SECSPERMIN; + diff = (diff / MINSPERHOUR) * 100 + + (diff % MINSPERHOUR); + pt = _conv(diff, "%04d", pt, ptlim); } continue; case '+': @@ -596,26 +587,59 @@ const char * const ptlim; } static char * -_lconv(n, format, pt, ptlim) -const long n; -const char * const format; -char * const pt; +_add(str, pt, ptlim) +const char * str; +char * pt; const char * const ptlim; { - char buf[INT_STRLEN_MAXIMUM(long) + 1]; - - (void) sprintf(buf, format, n); - return _add(buf, pt, ptlim); + while (pt < ptlim && (*pt = *str++) != '\0') + ++pt; + return pt; } static char * -_add(str, pt, ptlim) -const char * str; +_yconv(a, b, convert_top, convert_yy, pt, ptlim) +const int a; +const int b; +const int convert_top; +const int convert_yy; char * pt; const char * const ptlim; { - while (pt < ptlim && (*pt = *str++) != '\0') - ++pt; + /* + ** POSIX and the C Standard are unclear or inconsistent about + ** what %C and %y do if the year is negative or exceeds 9999. + ** Use the convention that %C concatenated with %y yields the + ** same output as %Y, and that %Y contains at least 4 bytes, + ** with more only if necessary. + */ + + int top, yy; + + yy = a % 100 + b % 100; + top = a / 100 + b / 100 + yy / 100; + yy %= 100; + + if (yy < 0 && 0 < top) { + yy += 100; + top--; + } else if (top < 0 && 0 < yy) { + yy -= 100; + top++; + } + + if (convert_top) { + if (top == 0 && yy < 0) { + pt = _add("-0", pt, ptlim); + } else { + pt = _conv(top, "%02d", pt, ptlim); + } + } + + if (convert_yy) { + pt = _conv(yy < 0 ? -yy : yy, "%02d", pt, ptlim); + } + return pt; } =================================================================== RCS file: RCS/zdump.c,v retrieving revision 2004.4 retrieving revision 2004.4.0.3 diff -pu -r2004.4 -r2004.4.0.3 --- zdump.c 2004/09/06 20:00:46 2004.4 +++ zdump.c 2004/10/06 23:03:13 2004.4.0.3 @@ -1,4 +1,4 @@ -static char elsieid[] = "@(#)zdump.c 7.40"; +static char elsieid[] = "@(#)zdump.c 7.41"; /* ** This code has been made independent of the rest of the time @@ -61,9 +61,25 @@ static char elsieid[] = "@(#)zdump.c 7.4 #endif /* !defined DAYSPERNYEAR */ #ifndef isleap -#define isleap(y) ((((y) % 4) == 0 && ((y) % 100) != 0) || ((y) % 400) == 0) +#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0)) #endif /* !defined isleap */ +#ifndef isleap_sum +/* +** Since everything in isleap is modulo 400 (or a factor of 400), we know that +** isleap(y) == isleap(y % 400) +** and so +** isleap(a + b) == isleap((a + b) % 400) +** or +** isleap(a + b) == isleap(a % 400 + b % 400) +** This is true even if % means modulo rather than Fortran remainder +** (which is allowed by C89 but not C99). +** We use this to avoid addition overflow problems. +*/ + +#define isleap_sum(a, b) isleap((a) % 400 + (b) % 400) +#endif /* !defined isleap_sum */ + #if HAVE_GETTEXT #include "locale.h" /* for setlocale */ #include "libintl.h" @@ -321,7 +337,7 @@ struct tm * oldp; return -delta(oldp, newp); result = 0; for (tmy = oldp->tm_year; tmy < newp->tm_year; ++tmy) - result += DAYSPERNYEAR + isleap(tmy + (long) TM_YEAR_BASE); + result += DAYSPERNYEAR + isleap_sum(tmy, TM_YEAR_BASE); result += newp->tm_yday - oldp->tm_yday; result *= HOURSPERDAY; result += newp->tm_hour - oldp->tm_hour; @@ -384,6 +400,8 @@ register const struct tm * timeptr; }; register const char * wn; register const char * mn; + int decade; + int y; /* ** The packaged versions of localtime and gmtime never put out-of-range @@ -398,9 +416,18 @@ register const struct tm * timeptr; (int) (sizeof mon_name / sizeof mon_name[0])) mn = "???"; else mn = mon_name[timeptr->tm_mon]; - (void) printf("%.3s %.3s%3d %.2d:%.2d:%.2d %ld", + decade = timeptr->tm_year / 10 + TM_YEAR_BASE / 10; + y = timeptr->tm_year % 10; + if (y < 0 && 0 < decade) { + y += 10; + decade--; + } else if (decade < 0 && 0 < y) { + y -= 10; + decade++; + } + (void) printf("%.3s %.3s%3d %.2d:%.2d:%.2d %s%03d%d", wn, mn, timeptr->tm_mday, timeptr->tm_hour, timeptr->tm_min, timeptr->tm_sec, - timeptr->tm_year + (long) TM_YEAR_BASE); + decade == 0 && y < 0 ? "-" : "", decade, y < 0 ? -y : y); }
On Wed, Oct 06, 2004 at 04:12:46PM -0700, Paul Eggert wrote:
OK, here's a proposed patch to the tz code. It is relative to the latest published code on elsie. It incorporates most of Arthur's patch circulated yesterday, except it addresses the issues I mentioned with C99IPMOD, with floating point, and with integer overflow checking; the resulting code uses int arithmetic instead of assuming that long and double are wider than int.
For the most part, I like it. Two things bother me a little though: 1) I think it's a shame that isleap_sum() needs the compiler to do a % more than isleap() does, but I certainly don't see any clean way of getting around this. (There are ways, but the cures I'm coming up with are all worse than the "disease".) Not a big deal, but I felt a need to mention it in case someone else does have a bright idea. 2) More importantly, I think that there are too many duplicate definitions in zdump.c. Given the comment about independence at the top of zdump.c this means that or tzfile.h should be broken up into two files: one which includes basic #define's such as isleap(), TM_YEAR_BASE, and SECSPERMIN (all conditionalized as zdump.c currently does), and the other which contains the more internals-specific definitions of tzcode.h. Attached is a patch, relative to Paul's most recent patch, which does this. --Ken Pizzini
Ken Pizzini <"tz."@explicate.org> writes:
1) I think it's a shame that isleap_sum() needs the compiler to do a % more than isleap() does,
Actually, it's two %s more -- and this is assuming the compiler does common subexpression elimination.
I certainly don't see any clean way of getting around this. (There are ways, but the cures I'm coming up with are all worse than the "disease".)
One can optimize the case of isleap_sum(y, TM_YEAR_BASE) a bit (because TM_YEAR_BASE is known), so that it doesn't do any more %s than isleap does. But it's a bit messy, and since that case occurs only in zdump I don't think it's worth the hassle.
I felt a need to mention it in case someone else does have a bright idea.
I think both Arthur and I have noticed the problem. He took a crack at a simpler approach with the C99IPMOD macro, but that turned out to be problematic. It'd be nice to come up with a better solution. Also, the code that looks like this: decade = timeptr->tm_year / 10 + TM_YEAR_BASE / 10; y = timeptr->tm_year % 10; if (y < 0 && 0 < decade) { y += 10; decade--; } else if (decade < 0 && 0 < y) { y -= 10; decade++; } is repeated in asctime,c and zdump.c, and (in slightly different form, with a divisor of 100 and where the 2nd value is arbitrary so the computation is more general) in strftime.c. It might be nice if that could be packaged up somehow.
2) More importantly, I think that there are too many duplicate definitions in zdump.c. Given the comment about independence at the top of zdump.c this means that or tzfile.h should be broken up into two files
Makes sense to me but it's Arthur's call. I suppose the above code could be factored into the isleap() .h file as well. PS. I did think of one further minor optimization to the patch I just submitted. This doesn't affect correctness, just performance on C99 and most other hosts. Here it is: =================================================================== RCS file: RCS/asctime.c,v retrieving revision 2004.3.0.2 retrieving revision 2004.3.0.3 diff -pu -r2004.3.0.2 -r2004.3.0.3 --- asctime.c 2004/10/06 23:03:13 2004.3.0.2 +++ asctime.c 2004/10/07 05:17:19 2004.3.0.3 @@ -101,7 +101,7 @@ char * buf; if (y < 0 && 0 < decade) { y += 10; decade--; - } else if (decade < 0 && 0 < y) { + } else if (-1 % 2 == 1 && decade < 0 && 0 < y) { y -= 10; decade++; } =================================================================== RCS file: RCS/zdump.c,v retrieving revision 2004.4.0.3 retrieving revision 2004.4.0.4 diff -pu -r2004.4.0.3 -r2004.4.0.4 --- zdump.c 2004/10/06 23:03:13 2004.4.0.3 +++ zdump.c 2004/10/07 05:17:19 2004.4.0.4 @@ -421,7 +421,7 @@ register const struct tm * timeptr; if (y < 0 && 0 < decade) { y += 10; decade--; - } else if (decade < 0 && 0 < y) { + } else if (-1 % 2 == 1 && decade < 0 && 0 < y) { y -= 10; decade++; }
On Wed, Oct 06, 2004 at 05:36:07PM -0700, Ken Pizzini wrote:
For the most part, I like it. Two things bother me a little though:
1) I think it's a shame that isleap_sum() needs the compiler to do a % more than isleap() does, but I certainly don't see any clean way of getting around this. (There are ways, but the cures I'm coming up with are all worse than the "disease".) Not a big deal, but I felt a need to mention it in case someone else does have a bright idea.
Well, one can easily quibble over whether the patch that I'm attaching to this message uses a "clean" solution to this minor problem, but I find that the more I look at it, the fonder I am, so I'll offer it for general consideration. Note that I also threw in some officialese for good measure, which can be considered independently of the the proposed isleap_sum() redefinintion. Thanks to Paul Eggert, who pointed out a technical flaw with an earlier incarnation of this expression. --Ken Pizzini
participants (2)
-
Ken Pizzini -
Paul Eggert