RE: Strftime's %C and %y formats versus wide-ranging tm_year valu es
At the end of this message you'll find proposed changes to strftime.c to do the right thing for wide-ranging years. I compiled it with this quick-and-dirty source code... #include "stdio.h" #include "sys/types.h" #include "time.h" static int years[] = { 10001, 10000, 9999, 1001, 1000, 999, 101, 100, 99, 11, 10, 9, 1, 0, -1, -9, -10, -11, -99, -100, -101, -999, -1000, -1001, -9999, -10000, -10001 }; #define NVALUES ((sizeof years) / (sizeof years[0])) static char * formats[] = { "%C", "%y", "%G", "%g" }; #define NFORMATS ((sizeof formats) / (sizeof formats[0])) #define TM_BASE_YEAR 1900 main() { static struct tm tm; int i; int j; char buf[1024]; tm.tm_mon = 0; /* months since January => January */ tm.tm_mday = 8; /* day of the month */ tm.tm_yday = 7; /* days since January 1 */ (void) printf("YEAR"); for (j = 0; j < NFORMATS; ++j) { (void) printf("\t%s", formats[j]); } (void) printf("\n"); for (i = 0; i < NVALUES; ++i) { (void) printf("%d", years[i]); tm.tm_year = years[i] - TM_BASE_YEAR; for (j = 0; j < NFORMATS; ++j) { (void) strftime(buf, sizeof buf, formats[j], &tm); (void) printf("\t%s", buf); } (void) printf("\n"); } return 0; } ...and got these results... YEAR %C %y %G %g 10001 100 01 10001 01 10000 100 00 10000 00 9999 99 99 9999 99 1001 10 01 1001 01 1000 10 00 1000 00 999 09 99 0999 99 101 01 01 0101 01 100 01 00 0100 00 99 00 99 0099 99 11 00 11 0011 11 10 00 10 0010 10 9 00 09 0009 09 1 00 01 0001 01 0 00 00 0000 00 -1 -0 01 -001 01 -9 -0 09 -009 09 -10 -0 10 -010 10 -11 -0 11 -011 11 -99 -0 99 -099 99 -100 -1 00 -100 00 -101 -1 01 -101 01 -999 -9 99 -999 99 -1000 -10 00 -1000 00 -1001 -10 01 -1001 01 -9999 -99 99 -9999 99 -10000 -100 00 -10000 00 -10001 -100 01 -10001 01 Absent any objections, I'll promulgate these changes next week (along with Paul Eggert's revised update to tz-link.htm). --ado ------- strftime.c ------- *** /tmp/geta12949 Thu Sep 23 15:27:03 2004 --- /tmp/getb12949 Thu Sep 23 15:27:03 2004 *************** *** 1,12 **** - /* - ** XXX To do: figure out correct (as distinct from standard-mandated) - ** output for "two digits of year" and "century" formats when - ** the year is negative or less than 100. --ado, 2004-09-09 - */ - #ifndef lint #ifndef NOID ! static char elsieid[] = "@(#)strftime.c 7.67"; /* ** Based on the UCB version with the ID appearing below. ** This is ANSIish only when "multibyte character == plain character". --- 1,6 ---- #ifndef lint #ifndef NOID ! static char elsieid[] = "@(#)strftime.c 7.69"; /* ** Based on the UCB version with the ID appearing below. ** This is ANSIish only when "multibyte character == plain character". *************** *** 217,225 **** ** something completely different. ** (ado, 1993-05-24) */ ! pt = _conv((int) ((t->tm_year + ! (long) TM_YEAR_BASE) / 100), ! "%02d", pt, ptlim); continue; case 'c': { --- 211,230 ---- ** something completely different. ** (ado, 1993-05-24) */ ! { ! long year; ! int top; ! ! year = t->tm_year; ! year += TM_YEAR_BASE; ! top = year / 100; ! if (top == 0 && year < 0) { ! pt = _add("-0", pt, ptlim); ! } else { ! pt = _conv(top, "%02d", ! pt, ptlim); ! } ! } continue; case 'c': { *************** *** 445,453 **** pt = _conv(w, "%02d", pt, ptlim); else if (*format == 'g') { *warnp = IN_ALL; ! pt = _conv(int(year % 100), ! "%02d", pt, ptlim); } else pt = _lconv(year, "%04ld", pt, ptlim); } --- 450,464 ---- pt = _conv(w, "%02d", pt, ptlim); else if (*format == 'g') { + int i; + *warnp = IN_ALL; ! i = year % 100; ! if (i < 0) { ! i = -i; ! } ! pt = _conv(i, "%02d", ! pt, ptlim); } else pt = _lconv(year, "%04ld", pt, ptlim); } *************** *** 486,494 **** continue; case 'y': *warnp = IN_ALL; ! pt = _conv((int) ((t->tm_year + ! (long) TM_YEAR_BASE) % 100), ! "%02d", pt, ptlim); continue; case 'Y': pt = _lconv(t->tm_year + (long) TM_YEAR_BASE, --- 497,512 ---- continue; case 'y': *warnp = IN_ALL; ! { ! int i; ! ! i = (t->tm_year + ! (long) TM_YEAR_BASE) % 100; ! if (i < 0) { ! i = -i; ! } ! pt = _conv(i, "%02d", pt, ptlim); ! } continue; case 'Y': pt = _lconv(t->tm_year + (long) TM_YEAR_BASE, *************** *** 556,564 **** diff = -diff; } else sign = "+"; pt = _add(sign, pt, ptlim); ! diff /= 60; ! pt = _conv((diff/60)*100 + diff%60, ! "%04d", pt, ptlim); } continue; case '+': --- 574,583 ---- diff = -diff; } else sign = "+"; pt = _add(sign, pt, ptlim); ! diff /= SECSPERMIN; ! diff = (diff / MINSPERHOUR) * 100 + ! (diff % MINSPERHOUR); ! pt = _conv(diff, "%04d", pt, ptlim); } continue; case '+':
On Thu, Sep 23, 2004 at 03:39:37PM -0400, Olson, Arthur David (NIH/NCI) wrote:
! i = year % 100; ! if (i < 0) { ! i = -i; ! }
The C language definition has typically left as "implementation defined" whether the result of (-42)%100 is -42 or +58 (with corresponding variation in what the results of integer division will be); this code is making the assumption that the -42 answer will always be given. Now I grant that this is true on any system I can think of offhand, I still think it is worth recoding this (in both the %y and %g fragments) as something more like: i = (year < 0 ? -year : year) % 100; And the %C code:
! top = year / 100; would need a corresponding pedantic fix, such as: top = (year + (year < 0 ? -year % 100 : 0)) / 100;
--Ken Pizzini
Ken Pizzini <"tz."@explicate.org> writes:
The C language definition has typically left as "implementation defined" whether the result of (-42)%100 is -42 or +58
That was true for C89, but it's not true for C99, which standardized on "Fortran" integer division, so that (-42)%100 is -42 and (-42)/100 is 0. It may be worth recording for older C89 systems that don't implement Fortran division, not that I know of any; but for newer systems we can rely on the Fortran model.
On Fri, Sep 24, 2004 at 12:20:35PM -0700, Paul Eggert wrote:
Ken Pizzini <"tz."@explicate.org> writes:
The C language definition has typically left as "implementation defined" whether the result of (-42)%100 is -42 or +58
That was true for C89, but it's not true for C99, which standardized on "Fortran" integer division, so that (-42)%100 is -42 and (-42)/100 is 0.
It may be worth recording for older C89 systems that don't implement Fortran division, not that I know of any; but for newer systems we can rely on the Fortran model.
Yes, but it was true from the earliest K&R days until C99, and the rest of the code certainly caters to all manner of non-C99 implementations. If we are willing to accept C99 guarantees, there is a lot of code simplification that could be made. (Besides, IMHO C99 is philosophically wrong on this. ;-) --Ken Pizzini
"Olson, Arthur David (NIH/NCI)" <olsona@dc37a.nci.nih.gov> writes:
Absent any objections, ...
I found a bug on 32-bit hosts: it misformats %Y, %y, %C, etc., when t->tm_year is close to INT_MAX. This is because tm_year + 1900 t->overflows. In the common 32-bit case where 'long' is the same width as 'int', converting to 'long' first doesn't help. This bug was present in the old version too, but we might as well fix it while we're in the neighborhood. I'll send a proposed fix in my next email, but first I'll give an illustration of the bug. The following test program (derived from the program you emailed) will output lines that look like this (modulo white space): Y-1900 %C %Y %y %G %g ... 2147483647 -21474817 -2147481749 49 -2147481749 49 2147483646 -21474817 -2147481750 50 -2147481750 50 2147483645 -21474817 -2147481751 51 -2147481751 51 2147481748 -21474836 -2147483648 48 -2147483648 48 instead of the correct answers, which look like this: Y-1900 %C %Y %y %G %g ... 2147483647 21474855 2147485547 47 2147485547 47 2147483646 21474855 2147485546 46 2147485546 46 2147483645 21474855 2147485545 45 2147485545 45 2147481748 21474836 2147483648 48 2147483648 48 #include <limits.h> #include <stdio.h> #include <time.h> static int years[] = { 10001 - 1900, 10000 - 1900, 9999 - 1900, 1001 - 1900, 1000 - 1900, 999 - 1900, 101 - 1900, 100 - 1900, 99 - 1900, 11 - 1900, 10 - 1900, 9 - 1900, 1 - 1900, 0 - 1900, -1 - 1900, -9 - 1900, -10 - 1900, -11 - 1900, -99 - 1900, -100 - 1900, -101 - 1900, -999 - 1900, -1000 - 1900, -1001 - 1900, -9999 - 1900, -10000 - 1900, -10001 - 1900, INT_MAX, INT_MAX - 1, INT_MAX - 2, INT_MAX - 1900 + 1, INT_MAX - 1900, INT_MAX - 1 - 1900, INT_MIN + 2, INT_MIN + 1, INT_MIN }; #define NVALUES ((sizeof years) / (sizeof years[0])) static char *formats[] = { "%C", "%Y", "%y", "%G", "%g" }; #define NFORMATS ((sizeof formats) / (sizeof formats[0])) #define TM_BASE_YEAR 1900 main () { static struct tm tm; int i; int j; char buf[1024]; tm.tm_mon = 0; /* months since January => January */ tm.tm_mday = 8; /* day of the month */ tm.tm_yday = 7; /* days since January 1 */ (void) printf ("Y-1900"); for (j = 0; j < NFORMATS; ++j) { (void) printf ("\t%s", formats[j]); } (void) printf ("\n"); for (i = 0; i < NVALUES; ++i) { (void) printf ("%d", years[i]); tm.tm_year = years[i]; for (j = 0; j < NFORMATS; ++j) { (void) strftime (buf, sizeof buf, formats[j], &tm); (void) printf ("\t%s", buf); } (void) printf ("\n"); } return 0; }
Here's a patch to fix the mishandling of large tm_year values that I noted in my previous email. It assumes that you've already applied the strftime.c patch (in email from ado dated 2004-09-23 19:39:37 UTC) that increments strftime.c's elsieid from 7.67 to 7.69. *** tzfile.h 1997/12/29 14:31:51 1997.9 --- tzfile.h 2004/09/27 04:35:02 1997.9.0.2 *************** *** 157,167 **** #define EPOCH_WDAY TM_THURSDAY /* ! ** Accurate only for the past couple of centuries; ** that will probably do. */ #define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0)) #ifndef USG --- 157,176 ---- #define EPOCH_WDAY TM_THURSDAY /* ! ** Accurate only for the proleptic Gregorian calendar; ** that will probably do. + ** isleap(y) is 1 if y is a leap year. + ** ismult_sum(y1, y2, d) is 1 if y1 + y2 is a multiple of d (d > 1), and + ** returns the correct answer even if the addition would overflow. + ** isleap_sum(y1, y2) equals isleap(y1 + y2) except that it also + ** returns the correct answer even if the addition would overflow. */ #define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0)) + #define ismult_sum(y1, y2, d) ((((y1) % (d) + (y2) % (d))) % (d) == 0) + #define isleap_sum(y1, y2) \ + (ismult_sum(y1, y2, 4) \ + && (!ismult_sum(y1, y2, 100) || ismult_sum(y1, y2, 400))) #ifndef USG *** strftime.c 2004/09/23 19:39:37 2004.4.0.1 --- strftime.c 2004/09/27 04:35:02 2004.4.0.2 *************** *** 108,114 **** static char * _add P((const char *, char *, const char *)); static char * _conv P((int, const char *, char *, const char *)); ! static char * _lconv P((long, const char *, char *, const char *)); static char * _fmt P((const char *, const struct tm *, char *, const char *, int *)); size_t strftime P((char *, size_t, const char *, const struct tm *)); --- 108,114 ---- static char * _add P((const char *, char *, const char *)); static char * _conv P((int, const char *, char *, const char *)); ! static char * _yconv P((int, int, int, int, char *, const char *)); static char * _fmt P((const char *, const struct tm *, char *, const char *, int *)); size_t strftime P((char *, size_t, const char *, const struct tm *)); *************** *** 211,230 **** ** something completely different. ** (ado, 1993-05-24) */ ! { ! long year; ! int top; ! ! year = t->tm_year; ! year += TM_YEAR_BASE; ! top = year / 100; ! if (top == 0 && year < 0) { ! pt = _add("-0", pt, ptlim); ! } else { ! pt = _conv(top, "%02d", ! pt, ptlim); ! } ! } continue; case 'c': { --- 211,218 ---- ** something completely different. ** (ado, 1993-05-24) */ ! pt = _yconv(t->tm_year, TM_YEAR_BASE, 1, 0, ! pt, ptlim); continue; case 'c': { *************** *** 392,404 **** ** (ado, 1996-01-02) */ { ! long year; int yday; int wday; int w; year = t->tm_year; ! year += TM_YEAR_BASE; yday = t->tm_yday; wday = t->tm_wday; for ( ; ; ) { --- 380,393 ---- ** (ado, 1996-01-02) */ { ! int year; ! int base; int yday; int wday; int w; year = t->tm_year; ! base = TM_YEAR_BASE; yday = t->tm_yday; wday = t->tm_wday; for ( ; ; ) { *************** *** 406,412 **** int bot; int top; ! len = isleap(year) ? DAYSPERLYEAR : DAYSPERNYEAR; /* --- 395,401 ---- int bot; int top; ! len = isleap_sum(year, base) ? DAYSPERLYEAR : DAYSPERNYEAR; /* *************** *** 425,431 **** top += DAYSPERWEEK; top += len; if (yday >= top) { ! ++year; w = 1; break; } --- 414,420 ---- top += DAYSPERWEEK; top += len; if (yday >= top) { ! ++base; w = 1; break; } *************** *** 434,441 **** DAYSPERWEEK); break; } ! --year; ! yday += isleap(year) ? DAYSPERLYEAR : DAYSPERNYEAR; } --- 423,430 ---- DAYSPERWEEK); break; } ! --base; ! yday += isleap_sum(year,base) ? DAYSPERLYEAR : DAYSPERNYEAR; } *************** *** 450,465 **** pt = _conv(w, "%02d", pt, ptlim); else if (*format == 'g') { - int i; - *warnp = IN_ALL; ! i = year % 100; ! if (i < 0) { ! i = -i; ! } ! pt = _conv(i, "%02d", pt, ptlim); ! } else pt = _lconv(year, "%04ld", pt, ptlim); } continue; --- 439,448 ---- pt = _conv(w, "%02d", pt, ptlim); else if (*format == 'g') { *warnp = IN_ALL; ! pt = _yconv(year, base, 0, 1, pt, ptlim); ! } else pt = _yconv(year, base, 1, 1, pt, ptlim); } continue; *************** *** 497,516 **** continue; case 'y': *warnp = IN_ALL; ! { ! int i; ! ! i = (t->tm_year + ! (long) TM_YEAR_BASE) % 100; ! if (i < 0) { ! i = -i; ! } ! pt = _conv(i, "%02d", pt, ptlim); ! } continue; case 'Y': ! pt = _lconv(t->tm_year + (long) TM_YEAR_BASE, ! "%04ld", pt, ptlim); continue; case 'Z': #ifdef TM_ZONE --- 480,491 ---- continue; case 'y': *warnp = IN_ALL; ! pt = _yconv(t->tm_year, TM_YEAR_BASE, 0, 1, ! pt, ptlim); continue; case 'Y': ! pt = _yconv(t->tm_year, TM_YEAR_BASE, 1, 1, ! pt, ptlim); continue; case 'Z': #ifdef TM_ZONE *************** *** 615,630 **** } static char * ! _lconv(n, format, pt, ptlim) ! const long n; ! const char * const format; ! char * const pt; const char * const ptlim; { ! char buf[INT_STRLEN_MAXIMUM(long) + 1]; ! (void) sprintf(buf, format, n); ! return _add(buf, pt, ptlim); } static char * --- 590,638 ---- } static char * ! _yconv(y1, y2, convert_top, convert_yy, pt, ptlim) ! const int y1; ! const int y2; ! const int convert_top; ! const int convert_yy; ! char * pt; const char * const ptlim; { ! /* ! ** POSIX and the C Standard are unclear or inconsistent about ! ** what %C and %y do if the year is negative or exceeds 9999. ! ** Use the convention that %C concatenated with %y yields the ! ** same output as %Y, and that %Y contains at least 4 bytes, ! ** with more only if necessary. ! */ ! int top, yy; ! ! yy = y1 % 100 + y2 % 100; ! top = y1 / 100 + y2 / 100 + yy / 100; ! yy %= 100; ! ! if (0 < top && yy < 0) { ! yy += 100; ! top--; ! } else if (top < 0 && 0 < yy) { ! yy -= 100; ! top++; ! } ! ! if (convert_top) { ! if (top == 0 && yy < 0) { ! pt = _add("-0", pt, ptlim); ! } else { ! pt = _conv(top, "%02d", pt, ptlim); ! } ! } ! ! if (convert_yy) { ! pt = _conv(yy < 0 ? -yy : yy, "%02d", pt, ptlim); ! } ! ! return pt; } static char *
participants (3)
-
Ken Pizzini -
Olson, Arthur David (NIH/NCI) -
Paul Eggert