And then there is the script, rewritten to comply to the new syntax. And it produces much nicer HTML output, it is oh so pretty now, Of course checking still works, and is also more sophisticated than before. --steffen
From a2131929d0d2a09591fcedf3afef0b77d6f704ec Mon Sep 17 00:00:00 2001 Message-Id: <a2131929d0d2a09591fcedf3afef0b77d6f704ec.1359248090.git.sdaoden@gmail.com> From: "Steffen \"Daode\" Nurpmeso" <sdaoden@gmail.com> Date: Sat, 26 Jan 2013 19:48:34 +0100 Subject: [PATCH 1/2] TZ db files: remove <pre>
--- africa | 1 - antarctica | 1 - asia | 1 - australasia | 1 - backward | 1 - etcetera | 1 - europe | 1 - factory | 1 - leapseconds | 1 - northamerica | 1 - pacificnew | 1 - solar87 | 1 - solar88 | 1 - solar89 | 1 - southamerica | 1 - systemv | 1 - 16 files changed, 0 insertions(+), 16 deletions(-) diff --git a/africa b/africa index dc90f12..5c95bb3 100644 --- a/africa +++ b/africa @@ -1,4 +1,3 @@ -# <pre> # This file is in the public domain, so clarified as of # 2009-05-17 by Arthur David Olson. diff --git a/antarctica b/antarctica index 60e615b..f630a73 100644 --- a/antarctica +++ b/antarctica @@ -1,4 +1,3 @@ -# <pre> # This file is in the public domain, so clarified as of # 2009-05-17 by Arthur David Olson. diff --git a/asia b/asia index 8711a50..0a6763a 100644 --- a/asia +++ b/asia @@ -1,4 +1,3 @@ -# <pre> # This file is in the public domain, so clarified as of # 2009-05-17 by Arthur David Olson. diff --git a/australasia b/australasia index b0a9638..8e8ac0d 100644 --- a/australasia +++ b/australasia @@ -1,4 +1,3 @@ -# <pre> # This file is in the public domain, so clarified as of # 2009-05-17 by Arthur David Olson. diff --git a/backward b/backward index dc7769f..6767777 100644 --- a/backward +++ b/backward @@ -1,4 +1,3 @@ -# <pre> # This file is in the public domain, so clarified as of # 2009-05-17 by Arthur David Olson. diff --git a/etcetera b/etcetera index a9ff729..af5d19b 100644 --- a/etcetera +++ b/etcetera @@ -1,4 +1,3 @@ -# <pre> # This file is in the public domain, so clarified as of # 2009-05-17 by Arthur David Olson. diff --git a/europe b/europe index 7fa1f13..8aad740 100644 --- a/europe +++ b/europe @@ -1,4 +1,3 @@ -# <pre> # This file is in the public domain, so clarified as of # 2009-05-17 by Arthur David Olson. diff --git a/factory b/factory index d29a585..4304f7c 100644 --- a/factory +++ b/factory @@ -1,4 +1,3 @@ -# <pre> # This file is in the public domain, so clarified as of # 2009-05-17 by Arthur David Olson. diff --git a/leapseconds b/leapseconds index eba7132..60c2323 100644 --- a/leapseconds +++ b/leapseconds @@ -1,4 +1,3 @@ -# <pre> # This file is in the public domain, so clarified as of # 2009-05-17 by Arthur David Olson. diff --git a/northamerica b/northamerica index efa1c06..6aaf665 100644 --- a/northamerica +++ b/northamerica @@ -1,4 +1,3 @@ -# <pre> # This file is in the public domain, so clarified as of # 2009-05-17 by Arthur David Olson. diff --git a/pacificnew b/pacificnew index bccd852..7349434 100644 --- a/pacificnew +++ b/pacificnew @@ -1,4 +1,3 @@ -# <pre> # This file is in the public domain, so clarified as of # 2009-05-17 by Arthur David Olson. diff --git a/solar87 b/solar87 index 2299558..8d0344a 100644 --- a/solar87 +++ b/solar87 @@ -1,4 +1,3 @@ -# <pre> # This file is in the public domain, so clarified as of # 2009-05-17 by Arthur David Olson. diff --git a/solar88 b/solar88 index bb1d6ca..29def03 100644 --- a/solar88 +++ b/solar88 @@ -1,4 +1,3 @@ -# <pre> # This file is in the public domain, so clarified as of # 2009-05-17 by Arthur David Olson. diff --git a/solar89 b/solar89 index af93235..2153522 100644 --- a/solar89 +++ b/solar89 @@ -1,4 +1,3 @@ -# <pre> # This file is in the public domain, so clarified as of # 2009-05-17 by Arthur David Olson. diff --git a/southamerica b/southamerica index afa505f..55072c8 100644 --- a/southamerica +++ b/southamerica @@ -1,4 +1,3 @@ -# <pre> # This file is in the public domain, so clarified as of # 2009-05-17 by Arthur David Olson. diff --git a/systemv b/systemv index e651e85..d9e2995 100644 --- a/systemv +++ b/systemv @@ -1,4 +1,3 @@ -# <pre> # This file is in the public domain, so clarified as of # 2009-05-17 by Arthur David Olson. -- 1.7.9.rc2.1.g69204
From c4ce7ed5d829e2977fac9cb6837827ecc5d71233 Mon Sep 17 00:00:00 2001 Message-Id: <c4ce7ed5d829e2977fac9cb6837827ecc5d71233.1359248090.git.sdaoden@gmail.com> In-Reply-To: <a2131929d0d2a09591fcedf3afef0b77d6f704ec.1359248090.git.sdaoden@gmail.com> References: <a2131929d0d2a09591fcedf3afef0b77d6f704ec.1359248090.git.sdaoden@gmail.com> From: "Steffen \"Daode\" Nurpmeso" <sdaoden@gmail.com> Date: Sat, 26 Jan 2013 22:48:19 +0100 Subject: [PATCH 2/2] workht.pl: added: URL checker and output dumper
--- workht.pl | 246 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 246 insertions(+), 0 deletions(-) create mode 100644 workht.pl diff --git a/workht.pl b/workht.pl new file mode 100644 index 0000000..56ee83c --- /dev/null +++ b/workht.pl @@ -0,0 +1,246 @@ +#!/usr/bin/env perl +require 5.008_001; +#@ workht.pl - URL checker / output dumper for tz data files. +#@ Public domain, 2013, Steffen Nurpmeso. +#@ Synopsis: +#@ workht.pl html < DATA_FILE | elinks -force-html -dump 1 +#@ workht.pl check < DATA_FILE > NEW_DATA_FILE +#@ The *check* mode requires an installed curl(1) (<http://curl.haxx.se>); +#@ Input data notes: +#@ - Only comment lines (\s*#) are recognized. +#@ - Non-empty (except whitespace only) non-comment lines finalize the +#@ preceeding comment block. +#@ - Empty (incl. WS only) non-comment lines in a comment block force paragraph +#@ separation in between comments. +#@ - A link is the character sequence '<SCHEME://[^>]+?'. +#@ (The *check* mode strips the surrounding angle brackets if a link doesn't +#@ work.) +#@ - A link may be followed by WS and a link text in parenthesis ('\([^)]*?\)'); +#@ If no link text exists, the URL is used as the link content, too. +#@ Note this only works in *html* mode, otherwise it'll always be the URL, +#@ and the text in parenthesis will be left as is. +#@ - A link may also be followed by WS, a backslash and a LF ('\s*\\$'), +#@ in which case the link text in parenthesis may be placed on the very next +#@ line. +#@ Note: slurps the entire data into memory. + +my $SCHEME_CHECKER = 'curl -q --silent --fail --head --location'; +# Which <scheme://> should be checked by $SCHEME_CHECKER +my %SCHEMES_TO_CHECK = (http => 1, https => 1, ftp => 1); + +## -- >8 -- 8< -- ## + +use diagnostics -verbose; +use strict; +use warnings; + +my $SCHEME_URL = qr{ + (.*?) + <(\w+://[^>]+)> + (.*) +}xi; +my $SCHEME_URL_EXTRACT = qr{^(\w+)://}; + +my $SCHEME_TEXT = qr{ + \s* + (?:\(([^)]*?)\)) + (.*) +}xi; + +my $EX_USAGE = 64; +my $EX_NOINPUT = 66; +my $ESTAT = 0; +my $INPUT; + +sub main_fun { + usage($EX_USAGE) unless @ARGV >= 1; + usage() if $ARGV[0] eq '-h' || $ARGV[0] eq '--help'; + if (@ARGV == 1) { + $INPUT = *STDIN; + if (! -f $INPUT) { + print STDERR "No file argument, and STDIN is not a file.\n\n"; + usage($EX_NOINPUT); + } + } elsif (! -f $ARGV[1]) { + print STDERR "File \"${ARGV[1]}\" does not exist.\n\n"; + usage($EX_NOINPUT); + } elsif (! open $INPUT, '<', $ARGV[1]) { + print STDERR "File \"${ARGV[1]}\" cannot be opened for reading.\n\n"; + usage($EX_NOINPUT); + } + mode_html() if $ARGV[0] eq 'html'; + mode_check() if $ARGV[0] eq 'check'; + usage($EX_USAGE); +} + +sub usage { + print STDERR <<__EOT__; +Synopsis: + workht.pl html < DATA_FILE | elinks -force-html -dump 1 + workht.pl check < DATA_FILE > NEW_DATA_FILE + +The *html* mode generates a very simple HTML page with hyperlinks. +The *check* mode requires an installed curl(1) (<http://curl.haxx.se>). +__EOT__ + + exit(@_ ? $_[0] : 0) +} + +sub mode_html { + Line::parse_input(); + + print <<__EOT__; +<html> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> +<style> +body {margin:0; margin-left:5%; padding:0; width:88%} +pre {line-height:1.4em; font-family:serif; font-size:100%} +.indat {padding:1em; border:1px solid black; background-color:#F0F0F0; + font-family:monospace; font-size:90%} +</style> +<body> +__EOT__ + + my ($indat, $intxt) = (0, 0); + while (defined(my $lo = shift @$INPUT)) { + if (! $lo->{ISCOMM}) { + if ($intxt) { + $intxt = 0; + die unless print "</pre>\n"; + } + if ($lo->{DATA} !~ /^\s*$/ && ! $indat) { + $indat = 1; + die unless print "<pre class=indat>\n"; + } + die unless print $lo->{DATA}, "\n"; + next; + } + if ($indat) { + $indat = 0; + die unless print "</pre>\n"; + } + if (! $intxt) { + $intxt = 1; + die unless print "<pre>\n"; + } + + my ($l, $rest) = ('', substr $lo->{DATA}, $lo->{ISCOMM}); + Line::join_follow(\$lo, \$rest, $INPUT) if $lo->{FOLLOW}; + + while ($rest =~ $SCHEME_URL) { + $l .= $1 ? $1 : ''; + $rest = $3; + my $url = $2; + my $text; + if ($rest =~ $SCHEME_TEXT) { + $text = $1; + $rest = $2; + } else { + $text = $url; + } + $l .= '<a href="' . $url . '">' . $text . '</a>'; + } + $l .= $rest if $rest; + die unless print $l, "\n"; + } + + print <<__EOT__; +</body> +</html> +__EOT__ + + exit($ESTAT) +} + +sub mode_check { + Line::parse_input(); + + while (defined(my $lo = shift @$INPUT)) { + if (! $lo->{ISCOMM}) { + die unless print $lo->{DATA}, "\n"; + next; + } + + my ($l, $rest) = ('', $lo->{DATA}); + + while ($rest =~ $SCHEME_URL) { + $l .= $1 ? $1 : ''; + $rest = $3; + my $url = $2; + if ($url !~ $SCHEME_URL_EXTRACT || ! $SCHEMES_TO_CHECK{$1}) { + print STDERR ".Not checking URL scheme: <$url>\n"; + $l .= '<' . $url . '>'; + next; + } + + print STDERR " Checking URL <$url> "; + system($SCHEME_CHECKER . ' "' . $url . '" >/dev/null 2>/dev/null'); + die "Cannot exec: $SCHEME_CHECKER" if $? < 0; + die "Died with signal: $SCHEME_CHECKER" if $? & 127; + if ($? >> 8) { + print STDERR "ERROR!\r!\n"; + $l .= $url; + } else { + $l .= '<' . $url . '>'; + print STDERR "ok\r.\n"; + } + } + $l .= $rest if $rest; + die unless print $l, "\n"; + } + exit($ESTAT) +} + +{package Line; + sub new { + my $self = { + DATA => undef, # Line data + ISCOMM => 0, # If not 0, len of substr to strip + FOLLOW => 0, # If not 0, len of substr to strip for follow ln + }; + bless $self, $_[0] + } + + sub parse_input { + my @ld = <$INPUT>; + die unless close $INPUT; + my (@xd, $il, $ol); + while (@ld) { + $il = shift @ld; + chomp $il; + jloop: + $ol = new Line; + push @xd, $ol; + $ol->{DATA} = $il; + next unless (($ol->{ISCOMM} = ($il =~ /^(\s*#\s*)/) ? length $1 : 0)); + next unless (($ol->{FOLLOW} = ($il =~ /(\s*\\\s*)$/) ? length $1 : 0)); + if (@ld) { + $il = shift @ld; + chomp $il; + goto jloop if $il =~ $SCHEME_TEXT; + } else { + $il = undef; + } + $ol->{FOLLOW} = 0; + print STDERR "! False line continuation after: $ol->{DATA}\n"; + $ESTAT = 1; + goto jloop if defined $il; + } + $INPUT = \@xd; + } + + sub join_follow { + my ($sr, $lr, $lar) = @_; + if (${$sr}->{FOLLOW}) { + $$lr = substr $$lr, 0, -${$sr}->{FOLLOW}; + $$lr .= ' '; + $$sr = shift @$lar; + $$lr .= substr ${$sr}->{DATA}, ${$sr}->{ISCOMM}; + } + } +} + +{package main; main_fun()} + +# vim:set fenc=utf-8 syntax=perl ts=8 sts=3 sw=3 et tw=79: -- 1.7.9.rc2.1.g69204