[tz] Converting TZ DB files to pretty markup

Jan. 27, 2013

And then there is the script, rewritten to comply to the new
syntax.  And it produces much nicer HTML output, it is oh so
pretty now,

Of course checking still works, and is also more sophisticated
than before.

--steffen
...
From a2131929d0d2a09591fcedf3afef0b77d6f704ec Mon Sep 17 00:00:00 2001
Message-Id: <a2131929d0d2a09591fcedf3afef0b77d6f704ec.1359248090.git.sdaoden@gmail.com>
From: "Steffen \"Daode\" Nurpmeso" <sdaoden@gmail.com>
Date: Sat, 26 Jan 2013 19:48:34 +0100
Subject: [PATCH 1/2] TZ db files: remove <pre>
---
 africa       |    1 -
 antarctica   |    1 -
 asia         |    1 -
 australasia  |    1 -
 backward     |    1 -
 etcetera     |    1 -
 europe       |    1 -
 factory      |    1 -
 leapseconds  |    1 -
 northamerica |    1 -
 pacificnew   |    1 -
 solar87      |    1 -
 solar88      |    1 -
 solar89      |    1 -
 southamerica |    1 -
 systemv      |    1 -
 16 files changed, 0 insertions(+), 16 deletions(-)

diff --git a/africa b/africa
index dc90f12..5c95bb3 100644
--- a/africa
+++ b/africa
@@ -1,4 +1,3 @@
-# <pre>
 # This file is in the public domain, so clarified as of
 # 2009-05-17 by Arthur David Olson.
 
diff --git a/antarctica b/antarctica
index 60e615b..f630a73 100644
--- a/antarctica
+++ b/antarctica
@@ -1,4 +1,3 @@
-# <pre>
 # This file is in the public domain, so clarified as of
 # 2009-05-17 by Arthur David Olson.
 
diff --git a/asia b/asia
index 8711a50..0a6763a 100644
--- a/asia
+++ b/asia
@@ -1,4 +1,3 @@
-# <pre>
 # This file is in the public domain, so clarified as of
 # 2009-05-17 by Arthur David Olson.
 
diff --git a/australasia b/australasia
index b0a9638..8e8ac0d 100644
--- a/australasia
+++ b/australasia
@@ -1,4 +1,3 @@
-# <pre>
 # This file is in the public domain, so clarified as of
 # 2009-05-17 by Arthur David Olson.
 
diff --git a/backward b/backward
index dc7769f..6767777 100644
--- a/backward
+++ b/backward
@@ -1,4 +1,3 @@
-# <pre>
 # This file is in the public domain, so clarified as of
 # 2009-05-17 by Arthur David Olson.
 
diff --git a/etcetera b/etcetera
index a9ff729..af5d19b 100644
--- a/etcetera
+++ b/etcetera
@@ -1,4 +1,3 @@
-# <pre>
 # This file is in the public domain, so clarified as of
 # 2009-05-17 by Arthur David Olson.
 
diff --git a/europe b/europe
index 7fa1f13..8aad740 100644
--- a/europe
+++ b/europe
@@ -1,4 +1,3 @@
-# <pre>
 # This file is in the public domain, so clarified as of
 # 2009-05-17 by Arthur David Olson.
 
diff --git a/factory b/factory
index d29a585..4304f7c 100644
--- a/factory
+++ b/factory
@@ -1,4 +1,3 @@
-# <pre>
 # This file is in the public domain, so clarified as of
 # 2009-05-17 by Arthur David Olson.
 
diff --git a/leapseconds b/leapseconds
index eba7132..60c2323 100644
--- a/leapseconds
+++ b/leapseconds
@@ -1,4 +1,3 @@
-# <pre>
 # This file is in the public domain, so clarified as of
 # 2009-05-17 by Arthur David Olson.
 
diff --git a/northamerica b/northamerica
index efa1c06..6aaf665 100644
--- a/northamerica
+++ b/northamerica
@@ -1,4 +1,3 @@
-# <pre>
 # This file is in the public domain, so clarified as of
 # 2009-05-17 by Arthur David Olson.
 
diff --git a/pacificnew b/pacificnew
index bccd852..7349434 100644
--- a/pacificnew
+++ b/pacificnew
@@ -1,4 +1,3 @@
-# <pre>
 # This file is in the public domain, so clarified as of
 # 2009-05-17 by Arthur David Olson.
 
diff --git a/solar87 b/solar87
index 2299558..8d0344a 100644
--- a/solar87
+++ b/solar87
@@ -1,4 +1,3 @@
-# <pre>
 # This file is in the public domain, so clarified as of
 # 2009-05-17 by Arthur David Olson.
 
diff --git a/solar88 b/solar88
index bb1d6ca..29def03 100644
--- a/solar88
+++ b/solar88
@@ -1,4 +1,3 @@
-# <pre>
 # This file is in the public domain, so clarified as of
 # 2009-05-17 by Arthur David Olson.
 
diff --git a/solar89 b/solar89
index af93235..2153522 100644
--- a/solar89
+++ b/solar89
@@ -1,4 +1,3 @@
-# <pre>
 # This file is in the public domain, so clarified as of
 # 2009-05-17 by Arthur David Olson.
 
diff --git a/southamerica b/southamerica
index afa505f..55072c8 100644
--- a/southamerica
+++ b/southamerica
@@ -1,4 +1,3 @@
-# <pre>
 # This file is in the public domain, so clarified as of
 # 2009-05-17 by Arthur David Olson.
 
diff --git a/systemv b/systemv
index e651e85..d9e2995 100644
--- a/systemv
+++ b/systemv
@@ -1,4 +1,3 @@
-# <pre>
 # This file is in the public domain, so clarified as of
 # 2009-05-17 by Arthur David Olson.
 
-- 
1.7.9.rc2.1.g69204
...
From c4ce7ed5d829e2977fac9cb6837827ecc5d71233 Mon Sep 17 00:00:00 2001
Message-Id: <c4ce7ed5d829e2977fac9cb6837827ecc5d71233.1359248090.git.sdaoden@gmail.com>
In-Reply-To: <a2131929d0d2a09591fcedf3afef0b77d6f704ec.1359248090.git.sdaoden@gmail.com>
References: <a2131929d0d2a09591fcedf3afef0b77d6f704ec.1359248090.git.sdaoden@gmail.com>
From: "Steffen \"Daode\" Nurpmeso" <sdaoden@gmail.com>
Date: Sat, 26 Jan 2013 22:48:19 +0100
Subject: [PATCH 2/2] workht.pl: added: URL checker and output dumper
---
 workht.pl |  246 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 246 insertions(+), 0 deletions(-)
 create mode 100644 workht.pl

diff --git a/workht.pl b/workht.pl
new file mode 100644
index 0000000..56ee83c
--- /dev/null
+++ b/workht.pl
@@ -0,0 +1,246 @@
+#!/usr/bin/env perl
+require 5.008_001;
+#@ workht.pl - URL checker / output dumper for tz data files.
+#@ Public domain, 2013, Steffen Nurpmeso.
+#@ Synopsis:
+#@    workht.pl html   < DATA_FILE | elinks -force-html -dump 1
+#@    workht.pl check  < DATA_FILE > NEW_DATA_FILE
+#@ The *check* mode requires an installed curl(1) (<http://curl.haxx.se>);
+#@ Input data notes:
+#@ - Only comment lines (\s*#) are recognized.
+#@ - Non-empty (except whitespace only) non-comment lines finalize the
+#@   preceeding comment block.
+#@ - Empty (incl. WS only) non-comment lines in a comment block force paragraph
+#@   separation in between comments.
+#@ - A link is the character sequence '<SCHEME://[^>]+?'.
+#@   (The *check* mode strips the surrounding angle brackets if a link doesn't
+#@   work.)
+#@ - A link may be followed by WS and a link text in parenthesis ('\([^)]*?\)');
+#@   If no link text exists, the URL is used as the link content, too.
+#@   Note this only works in *html* mode, otherwise it'll always be the URL,
+#@   and the text in parenthesis will be left as is.
+#@ - A link may also be followed by WS, a backslash and a LF ('\s*\\$'),
+#@   in which case the link text in parenthesis may be placed on the very next
+#@   line.
+#@ Note: slurps the entire data into memory.
+
+my $SCHEME_CHECKER = 'curl -q --silent --fail --head --location';
+# Which <scheme://> should be checked by $SCHEME_CHECKER
+my %SCHEMES_TO_CHECK = (http => 1, https => 1, ftp => 1);
+
+##  --  >8  --  8<  --  ##
+
+use diagnostics -verbose;
+use strict;
+use warnings;
+
+my $SCHEME_URL = qr{
+   (.*?)
+      <(\w+://[^>]+)>
+   (.*)
+}xi;
+my $SCHEME_URL_EXTRACT = qr{^(\w+)://};
+
+my $SCHEME_TEXT = qr{
+   \s*
+      (?:\(([^)]*?)\))
+   (.*)
+}xi;
+
+my $EX_USAGE = 64;
+my $EX_NOINPUT = 66;
+my $ESTAT = 0;
+my $INPUT;
+
+sub main_fun {
+   usage($EX_USAGE) unless @ARGV >= 1;
+   usage() if $ARGV[0] eq '-h' || $ARGV[0] eq '--help';
+   if (@ARGV == 1) {
+      $INPUT = *STDIN;
+      if (! -f $INPUT) {
+         print STDERR "No file argument, and STDIN is not a file.\n\n";
+         usage($EX_NOINPUT);
+      }
+   } elsif (! -f $ARGV[1]) {
+      print STDERR "File \"${ARGV[1]}\" does not exist.\n\n";
+      usage($EX_NOINPUT);
+   } elsif (! open $INPUT, '<', $ARGV[1]) {
+      print STDERR "File \"${ARGV[1]}\" cannot be opened for reading.\n\n";
+      usage($EX_NOINPUT);
+   }
+   mode_html() if $ARGV[0] eq 'html';
+   mode_check() if $ARGV[0] eq 'check';
+   usage($EX_USAGE);
+}
+
+sub usage {
+   print STDERR <<__EOT__;
+Synopsis:
+   workht.pl html   < DATA_FILE | elinks -force-html -dump 1
+   workht.pl check  < DATA_FILE > NEW_DATA_FILE
+
+The *html* mode generates a very simple HTML page with hyperlinks.
+The *check* mode requires an installed curl(1) (<http://curl.haxx.se>).
+__EOT__
+
+   exit(@_ ? $_[0] : 0)
+}
+
+sub mode_html {
+   Line::parse_input();
+
+   print <<__EOT__;
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+<style>
+body {margin:0; margin-left:5%; padding:0; width:88%}
+pre {line-height:1.4em; font-family:serif; font-size:100%}
+.indat {padding:1em; border:1px solid black; background-color:#F0F0F0;
+   font-family:monospace; font-size:90%}
+</style>
+<body>
+__EOT__
+
+   my ($indat, $intxt) = (0, 0);
+   while (defined(my $lo = shift @$INPUT)) {
+      if (! $lo->{ISCOMM}) {
+         if ($intxt) {
+            $intxt = 0;
+            die unless print "</pre>\n";
+         }
+         if ($lo->{DATA} !~ /^\s*$/ && ! $indat) {
+            $indat = 1;
+            die unless print "<pre class=indat>\n";
+         }
+         die unless print $lo->{DATA}, "\n";
+         next;
+      }
+      if ($indat) {
+         $indat = 0;
+         die unless print "</pre>\n";
+      }
+      if (! $intxt) {
+         $intxt = 1;
+         die unless print "<pre>\n";
+      }
+
+      my ($l, $rest) = ('', substr $lo->{DATA}, $lo->{ISCOMM});
+      Line::join_follow(\$lo, \$rest, $INPUT) if $lo->{FOLLOW};
+
+      while ($rest =~ $SCHEME_URL) {
+         $l .= $1 ? $1 : '';
+         $rest = $3;
+         my $url = $2;
+         my $text;
+         if ($rest =~ $SCHEME_TEXT) {
+            $text = $1;
+            $rest = $2;
+         } else {
+            $text = $url;
+         }
+         $l .= '<a href="' . $url . '">' . $text . '</a>';
+      }
+      $l .= $rest if $rest;
+      die unless print $l, "\n";
+   }
+
+   print <<__EOT__;
+</body>
+</html>
+__EOT__
+
+   exit($ESTAT)
+}
+
+sub mode_check {
+   Line::parse_input();
+
+   while (defined(my $lo = shift @$INPUT)) {
+      if (! $lo->{ISCOMM}) {
+         die unless print $lo->{DATA}, "\n";
+         next;
+      }
+
+      my ($l, $rest) = ('', $lo->{DATA});
+
+      while ($rest =~ $SCHEME_URL) {
+         $l .= $1 ? $1 : '';
+         $rest = $3;
+         my $url = $2;
+         if ($url !~ $SCHEME_URL_EXTRACT || ! $SCHEMES_TO_CHECK{$1}) {
+            print STDERR ".Not checking URL scheme: <$url>\n";
+            $l .= '<' . $url . '>';
+            next;
+         }
+
+         print STDERR " Checking URL <$url> ";
+         system($SCHEME_CHECKER . ' "' .  $url . '" >/dev/null 2>/dev/null');
+         die "Cannot exec: $SCHEME_CHECKER" if $? < 0;
+         die "Died with signal: $SCHEME_CHECKER" if $? & 127;
+         if ($? >> 8) {
+            print STDERR "ERROR!\r!\n";
+            $l .= $url;
+         } else {
+            $l .= '<' . $url . '>';
+            print STDERR "ok\r.\n";
+         }
+      }
+      $l .= $rest if $rest;
+      die unless print $l, "\n";
+   }
+   exit($ESTAT)
+}
+
+{package Line;
+   sub new {
+      my $self = {
+         DATA     => undef,   # Line data
+         ISCOMM   => 0,       # If not 0, len of substr to strip
+         FOLLOW   => 0,       # If not 0, len of substr to strip for follow ln
+      };
+      bless $self, $_[0]
+   }
+
+   sub parse_input {
+      my @ld = <$INPUT>;
+      die unless close $INPUT;
+      my (@xd, $il, $ol);
+      while (@ld) {
+         $il = shift @ld;
+         chomp $il;
+   jloop:
+         $ol = new Line;
+         push @xd, $ol;
+         $ol->{DATA} = $il;
+         next unless (($ol->{ISCOMM} = ($il =~ /^(\s*#\s*)/) ? length $1 : 0));
+         next unless (($ol->{FOLLOW} = ($il =~ /(\s*\\\s*)$/) ? length $1 : 0));
+         if (@ld) {
+            $il = shift @ld;
+            chomp $il;
+            goto jloop if $il =~ $SCHEME_TEXT;
+         } else {
+            $il = undef;
+         }
+         $ol->{FOLLOW} = 0;
+         print STDERR "! False line continuation after: $ol->{DATA}\n";
+         $ESTAT = 1;
+         goto jloop if defined $il;
+      }
+      $INPUT = \@xd;
+   }
+
+   sub join_follow {
+      my ($sr, $lr, $lar) = @_;
+      if (${$sr}->{FOLLOW}) {
+         $$lr = substr $$lr, 0, -${$sr}->{FOLLOW};
+         $$lr .= ' ';
+         $$sr = shift @$lar;
+         $$lr .= substr ${$sr}->{DATA}, ${$sr}->{ISCOMM};
+      }
+   }
+}
+
+{package main; main_fun()}
+
+# vim:set fenc=utf-8 syntax=perl ts=8 sts=3 sw=3 et tw=79:
-- 
1.7.9.rc2.1.g69204

    

[tz] Converting TZ DB files to pretty markup

Steffen Daode Nurpmeso