New subject: Strftime's %C and %y formats versus wide-ranging tm_year values

Oct. 6, 2004

OK, here's a proposed patch to the tz code.  It is relative to the
latest published code on elsie.  It incorporates most of Arthur's
patch circulated yesterday, except it addresses the issues I mentioned
with C99IPMOD, with floating point, and with integer overflow
checking; the resulting code uses int arithmetic instead of assuming
that long and double are wider than int.

===================================================================
RCS file: RCS/tzfile.h,v
retrieving revision 1997.9
retrieving revision 1997.9.0.3
diff -pu -r1997.9 -r1997.9.0.3

--- tzfile.h	1997/12/29 14:31:51	1997.9
+++ tzfile.h	2004/10/06 22:25:15	1997.9.0.3
@@ -21,7 +21,7 @@
 
 #ifndef lint
 #ifndef NOID
-static char	tzfilehid[] = "@(#)tzfile.h	7.14";
+static char	tzfilehid[] = "@(#)tzfile.h	7.15";
 #endif /* !defined NOID */
 #endif /* !defined lint */
 
@@ -156,12 +156,21 @@ struct tzhead {
 #define EPOCH_YEAR	1970
 #define EPOCH_WDAY	TM_THURSDAY
 
+#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0))
+
 /*
-** Accurate only for the past couple of centuries;
-** that will probably do.
+** Since everything in isleap is modulo 400 (or a factor of 400), we know that
+**	isleap(y) == isleap(y % 400)
+** and so
+**	isleap(a + b) == isleap((a + b) % 400)
+** or
+**	isleap(a + b) == isleap(a % 400 + b % 400)
+** This is true even if % means modulo rather than Fortran remainder
+** (which is allowed by C89 but not C99).
+** We use this to avoid addition overflow problems.
 */
 
-#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0))
+#define isleap_sum(a, b)	isleap((a) % 400 + (b) % 400)
 
 #ifndef USG
 
===================================================================
RCS file: RCS/asctime.c,v
retrieving revision 2004.3
retrieving revision 2004.3.0.2
diff -pu -r2004.3 -r2004.3.0.2
--- asctime.c	2004/08/11 15:59:06	2004.3
+++ asctime.c	2004/10/06 23:03:13	2004.3.0.2
@@ -5,7 +5,7 @@
 
 #ifndef lint
 #ifndef NOID
-static char	elsieid[] = "@(#)asctime.c	7.22";
+static char	elsieid[] = "@(#)asctime.c	7.23";
 #endif /* !defined NOID */
 #endif /* !defined lint */
 
@@ -15,8 +15,8 @@ static char	elsieid[] = "@(#)asctime.c	7
 #include "tzfile.h"
 
 #if STRICTLY_STANDARD_ASCTIME
-#define ASCTIME_FMT	"%.3s %.3s%3d %.2d:%.2d:%.2d %ld\n"
-#define ASCTIME_FMT_B	ASCTIME_FMT
+#define ASCTIME_FMT	"%.3s %.3s%3d %.2d:%.2d:%.2d %d\n"
+#define ASCTIME_FMT_B	"%.3s %.3s%3d %.2d:%.2d:%.2d %d%d\n"
 #else /* !STRICTLY_STANDARD_ASCTIME */
 /*
 ** Some systems only handle "%.2d"; others only handle "%02d";
@@ -31,14 +31,14 @@ static char	elsieid[] = "@(#)asctime.c	7
 ** For years that are less than four digits, we pad the output with
 ** spaces before the newline to get the newline in the traditional place.
 */
-#define ASCTIME_FMT	"%.3s %.3s%3d %02.2d:%02.2d:%02.2d %-4ld\n"
+#define ASCTIME_FMT	"%.3s %.3s%3d %02.2d:%02.2d:%02.2d %-4d\n"
 /*
 ** For years that are more than four digits we put extra spaces before the year
 ** so that code trying to overwrite the newline won't end up overwriting
 ** a digit within a year and truncating the year (operating on the assumption
 ** that no output is better than wrong output).
 */
-#define ASCTIME_FMT_B	"%.3s %.3s%3d %02.2d:%02.2d:%02.2d     %ld\n"
+#define ASCTIME_FMT_B	"%.3s %.3s%3d %02.2d:%02.2d:%02.2d     %d%d\n"
 #endif /* !STRICTLY_STANDARD_ASCTIME */
 
 #define STD_ASCTIME_BUF_SIZE	26
@@ -74,7 +74,8 @@ char *				buf;
 	};
 	register const char *	wn;
 	register const char *	mn;
-	long			year;
+	int			decade;
+	int			y;
 	char			result[MAX_ASCTIME_BUF_SIZE];
 
 	if (timeptr->tm_wday < 0 || timeptr->tm_wday >= DAYSPERWEEK)
@@ -83,16 +84,34 @@ char *				buf;
 	if (timeptr->tm_mon < 0 || timeptr->tm_mon >= MONSPERYEAR)
 		mn = "???";
 	else	mn = mon_name[timeptr->tm_mon];
-	year = timeptr->tm_year + (long) TM_YEAR_BASE;
 	/*
 	** We avoid using snprintf since it's not available on all systems.
 	*/
-	(void) sprintf(result,
-		((year >= -999 && year <= 9999) ? ASCTIME_FMT : ASCTIME_FMT_B),
-		wn, mn,
-		timeptr->tm_mday, timeptr->tm_hour,
-		timeptr->tm_min, timeptr->tm_sec,
-		year);
+	if (-999 - TM_YEAR_BASE <= timeptr->tm_year
+	    && timeptr->tm_year <= 9999 - TM_YEAR_BASE) {
+		(void) sprintf(result,
+			ASCTIME_FMT,
+			wn, mn,
+			timeptr->tm_mday, timeptr->tm_hour,
+			timeptr->tm_min, timeptr->tm_sec,
+			timeptr->tm_year + TM_YEAR_BASE);
+	} else {
+		decade = timeptr->tm_year / 10 + TM_YEAR_BASE / 10;
+		y = timeptr->tm_year % 10;
+		if (y < 0 && 0 < decade) {
+			y += 10;
+			decade--;
+		} else if (decade < 0 && 0 < y) {
+			y -= 10;
+			decade++;
+		}
+		(void) sprintf(result,
+			ASCTIME_FMT_B,
+			wn, mn,
+			timeptr->tm_mday, timeptr->tm_hour,
+			timeptr->tm_min, timeptr->tm_sec,
+			decade, y < 0 ? -y : y);
+	}
 	if (strlen(result) < STD_ASCTIME_BUF_SIZE || buf == buf_asctime) {
 		(void) strcpy(buf, result);
 		return buf;
===================================================================
RCS file: RCS/strftime.c,v
retrieving revision 2004.4
retrieving revision 2004.4.0.4
diff -pu -r2004.4 -r2004.4.0.4
--- strftime.c	2004/09/09 15:48:53	2004.4
+++ strftime.c	2004/10/06 23:03:13	2004.4.0.4
@@ -1,12 +1,6 @@
-/*
-** XXX To do: figure out correct (as distinct from standard-mandated)
-** output for "two digits of year" and "century" formats when
-** the year is negative or less than 100. --ado, 2004-09-09
-*/
-
 #ifndef lint
 #ifndef NOID
-static char	elsieid[] = "@(#)strftime.c	7.67";
+static char	elsieid[] = "@(#)strftime.c	7.70";
 /*
 ** Based on the UCB version with the ID appearing below.
 ** This is ANSIish only when "multibyte character == plain character".
@@ -114,10 +108,8 @@ static const struct lc_time_T	C_time_loc
 
 static char *	_add P((const char *, char *, const char *));
 static char *	_conv P((int, const char *, char *, const char *));
-static char *	_lconv P((long, const char *, char *, const char *));
 static char *	_fmt P((const char *, const struct tm *, char *, const char *, int *));
-
-size_t strftime P((char *, size_t, const char *, const struct tm *));
+static char *	_yconv P((int, int, int, int, char *, const char *));
 
 extern char *	tzname[];
 
@@ -125,7 +117,6 @@ extern char *	tzname[];
 #define YEAR_2000_NAME	"CHECK_STRFTIME_FORMATS_FOR_TWO_DIGIT_YEARS"
 #endif /* !defined YEAR_2000_NAME */
 
-
 #define IN_NONE	0
 #define IN_SOME	1
 #define IN_THIS	2
@@ -217,9 +208,8 @@ label:
 				** something completely different.
 				** (ado, 1993-05-24)
 				*/
-				pt = _conv((int) ((t->tm_year +
-					(long) TM_YEAR_BASE) / 100),
-					"%02d", pt, ptlim);
+				pt = _yconv(t->tm_year, TM_YEAR_BASE, 1, 0,
+					    pt, ptlim);
 				continue;
 			case 'c':
 				{
@@ -387,13 +377,14 @@ label:
 ** (ado, 1996-01-02)
 */
 				{
-					long	year;
+					int	year;
+					int	base;
 					int	yday;
 					int	wday;
 					int	w;
 
 					year = t->tm_year;
-					year += TM_YEAR_BASE;
+					base = TM_YEAR_BASE;
 					yday = t->tm_yday;
 					wday = t->tm_wday;
 					for ( ; ; ) {
@@ -401,7 +392,7 @@ label:
 						int	bot;
 						int	top;
 
-						len = isleap(year) ?
+						len = isleap_sum(year, base) ?
 							DAYSPERLYEAR :
 							DAYSPERNYEAR;
 						/*
@@ -420,7 +411,7 @@ label:
 							top += DAYSPERWEEK;
 						top += len;
 						if (yday >= top) {
-							++year;
+							++base;
 							w = 1;
 							break;
 						}
@@ -429,8 +420,8 @@ label:
 								DAYSPERWEEK);
 							break;
 						}
-						--year;
-						yday += isleap(year) ?
+						--base;
+						yday += isleap_sum(year, base) ?
 							DAYSPERLYEAR :
 							DAYSPERNYEAR;
 					}
@@ -444,12 +435,12 @@ label:
 					if (*format == 'V')
 						pt = _conv(w, "%02d",
 							pt, ptlim);
-					else if (*format == 'g') {
-						*warnp = IN_ALL;
-						pt = _conv(int(year % 100),
-							"%02d", pt, ptlim);
-					} else	pt = _lconv(year, "%04ld",
-							pt, ptlim);
+  					else if (*format == 'g') {
+  						*warnp = IN_ALL;
+						pt = _yconv(year, base, 0, 1,
+  							pt, ptlim);
+					} else	pt = _yconv(year, base, 1, 1,
+  							pt, ptlim);
 				}
 				continue;
 			case 'v':
@@ -484,15 +475,14 @@ label:
 					*warnp = warn2;
 				}
 				continue;
-			case 'y':
-				*warnp = IN_ALL;
-				pt = _conv((int) ((t->tm_year +
-					(long) TM_YEAR_BASE) % 100),
-					"%02d", pt, ptlim);
-				continue;
-			case 'Y':
-				pt = _lconv(t->tm_year + (long) TM_YEAR_BASE,
-					"%04ld", pt, ptlim);
+  			case 'y':
+  				*warnp = IN_ALL;
+				pt = _yconv(t->tm_year, TM_YEAR_BASE, 0, 1,
+					    pt, ptlim);
+  				continue;
+  			case 'Y':
+				pt = _yconv(t->tm_year, TM_YEAR_BASE, 1, 1,
+					    pt, ptlim);
 				continue;
 			case 'Z':
 #ifdef TM_ZONE
@@ -556,9 +546,10 @@ label:
 					diff = -diff;
 				} else	sign = "+";
 				pt = _add(sign, pt, ptlim);
-				diff /= 60;
-				pt = _conv((diff/60)*100 + diff%60,
-					"%04d", pt, ptlim);
+				diff /= SECSPERMIN;
+				diff = (diff / MINSPERHOUR) * 100 +
+					(diff % MINSPERHOUR);
+				pt = _conv(diff, "%04d", pt, ptlim);
 				}
 				continue;
 			case '+':
@@ -596,26 +587,59 @@ const char * const	ptlim;
 }
 
 static char *
-_lconv(n, format, pt, ptlim)
-const long		n;
-const char * const	format;
-char * const		pt;
+_add(str, pt, ptlim)
+const char *		str;
+char *			pt;
 const char * const	ptlim;
 {
-	char	buf[INT_STRLEN_MAXIMUM(long) + 1];
-
-	(void) sprintf(buf, format, n);
-	return _add(buf, pt, ptlim);
+	while (pt < ptlim && (*pt = *str++) != '\0')
+		++pt;
+	return pt;
 }
 
 static char *
-_add(str, pt, ptlim)
-const char *		str;
+_yconv(a, b, convert_top, convert_yy, pt, ptlim)
+const int		a;
+const int		b;
+const int		convert_top;
+const int		convert_yy;
 char *			pt;
 const char * const	ptlim;
 {
-	while (pt < ptlim && (*pt = *str++) != '\0')
-		++pt;
+	/*  
+	** POSIX and the C Standard are unclear or inconsistent about
+	** what %C and %y do if the year is negative or exceeds 9999.
+	** Use the convention that %C concatenated with %y yields the
+	** same output as %Y, and that %Y contains at least 4 bytes,
+	** with more only if necessary.
+	*/
+
+	int	top, yy;
+
+	yy = a % 100 + b % 100;
+	top = a / 100 + b / 100 + yy / 100;
+	yy %= 100;
+
+	if (yy < 0 && 0 < top) {
+		yy += 100;
+		top--;
+	} else if (top < 0 && 0 < yy) {
+		yy -= 100;
+		top++;
+	}
+
+	if (convert_top) {
+		if (top == 0 && yy < 0) {
+			pt = _add("-0", pt, ptlim);
+		} else {
+			pt = _conv(top, "%02d", pt, ptlim);
+		}
+	}
+
+	if (convert_yy) {
+		pt = _conv(yy < 0 ? -yy : yy, "%02d", pt, ptlim);
+	}
+
 	return pt;
 }
 
===================================================================
RCS file: RCS/zdump.c,v
retrieving revision 2004.4
retrieving revision 2004.4.0.3
diff -pu -r2004.4 -r2004.4.0.3
--- zdump.c	2004/09/06 20:00:46	2004.4
+++ zdump.c	2004/10/06 23:03:13	2004.4.0.3
@@ -1,4 +1,4 @@
-static char	elsieid[] = "@(#)zdump.c	7.40";
+static char	elsieid[] = "@(#)zdump.c	7.41";
 
 /*
 ** This code has been made independent of the rest of the time
@@ -61,9 +61,25 @@ static char	elsieid[] = "@(#)zdump.c	7.4
 #endif /* !defined DAYSPERNYEAR */
 
 #ifndef isleap
-#define isleap(y) ((((y) % 4) == 0 && ((y) % 100) != 0) || ((y) % 400) == 0)
+#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0))
 #endif /* !defined isleap */
 
+#ifndef isleap_sum
+/*
+** Since everything in isleap is modulo 400 (or a factor of 400), we know that
+**	isleap(y) == isleap(y % 400)
+** and so
+**	isleap(a + b) == isleap((a + b) % 400)
+** or
+**	isleap(a + b) == isleap(a % 400 + b % 400)
+** This is true even if % means modulo rather than Fortran remainder
+** (which is allowed by C89 but not C99).
+** We use this to avoid addition overflow problems.
+*/
+
+#define isleap_sum(a, b)	isleap((a) % 400 + (b) % 400)
+#endif /* !defined isleap_sum */
+
 #if HAVE_GETTEXT
 #include "locale.h"	/* for setlocale */
 #include "libintl.h"
@@ -321,7 +337,7 @@ struct tm *	oldp;
 		return -delta(oldp, newp);
 	result = 0;
 	for (tmy = oldp->tm_year; tmy < newp->tm_year; ++tmy)
-		result += DAYSPERNYEAR + isleap(tmy + (long) TM_YEAR_BASE);
+		result += DAYSPERNYEAR + isleap_sum(tmy, TM_YEAR_BASE);
 	result += newp->tm_yday - oldp->tm_yday;
 	result *= HOURSPERDAY;
 	result += newp->tm_hour - oldp->tm_hour;
@@ -384,6 +400,8 @@ register const struct tm *	timeptr;
 	};
 	register const char *	wn;
 	register const char *	mn;
+	int			decade;
+	int			y;
 
 	/*
 	** The packaged versions of localtime and gmtime never put out-of-range
@@ -398,9 +416,18 @@ register const struct tm *	timeptr;
 		(int) (sizeof mon_name / sizeof mon_name[0]))
 			mn = "???";
 	else		mn = mon_name[timeptr->tm_mon];
-	(void) printf("%.3s %.3s%3d %.2d:%.2d:%.2d %ld",
+	decade = timeptr->tm_year / 10 + TM_YEAR_BASE / 10;
+	y = timeptr->tm_year % 10;
+	if (y < 0 && 0 < decade) {
+		y += 10;
+		decade--;
+	} else if (decade < 0 && 0 < y) {
+		y -= 10;
+		decade++;
+	}
+	(void) printf("%.3s %.3s%3d %.2d:%.2d:%.2d %s%03d%d",
 		wn, mn,
 		timeptr->tm_mday, timeptr->tm_hour,
 		timeptr->tm_min, timeptr->tm_sec,
-		timeptr->tm_year + (long) TM_YEAR_BASE);
+		decade == 0 && y < 0 ? "-" : "", decade, y < 0 ? -y : y);
 }

    

Re: Strftime's %C and %y formats versus wide-ranging tm_year values

Paul Eggert

Ken Pizzini

Paul Eggert

Ken Pizzini

tags

participants (2)