--- zishrink.awk.orig	2018-01-23 17:54:15.000000000 -0500
+++ zishrink.awk	2018-05-11 13:21:18.633927586 -0400
@@ -7,21 +7,37 @@
 # this script's input.
 
 
-# Return a new rule name.
-# N_RULE_NAMES keeps track of how many rule names have been generated.
+# Return a compressed rule name representing orig_name.
+# This should be called only once per distinct orig_name.
+# _USED_HASHES keeps track of already-used names (in numeric hash format).
 
-function gen_rule_name(alphabet, base, rule_name, n, digit)
+function compress_rule_name(orig_name,  alphabet, base, n, i, rule_name, digit)
 {
-  alphabet = ""
-  alphabet = alphabet "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-  alphabet = alphabet "abcdefghijklmnopqrstuvwxyz"
-  alphabet = alphabet "!$%&'()*+,./:;<=>?@[\\]^_`{|}~"
+  # We use only upper and lower case letters in generated rule names.
+  alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
   base = length(alphabet)
-  rule_name = ""
-  n = n_rule_names++
 
+  # Hash orig_name in a rather trivial way.
+  # Non-letter characters hash to zero, though we could do something else.
+  n = 0
+  for (i = 1; i <= length(orig_name); i++)
+    n = n * 5 + index(alphabet, substr(orig_name, i, 1))
+
+  # We don't wish to use more than 2 letters in compressed names,
+  # so restrict the hash values to be less than base^2.
+  n %= base * base
+
+  # Cope with collisions by incrementing the hash till we find a free hash.
+  while (n in _used_hashes) {
+#   print "Collision between", orig_name, "and", _used_hashes[n], "at hash", n > "collisions.out"
+    n = (n + 1) % (base * base)
+  }
+
+  _used_hashes[n] = orig_name
+
+  # Generate compressed name by stringifying the hash.
+  rule_name = ""
   do {
-    n -= rule_name && n <= base
     digit = n % base
     rule_name = substr(alphabet, digit + 1, 1) rule_name
     n = (n - digit) / base
@@ -106,7 +122,7 @@
   i = field[1] == "Z" ? 4 : field[1] == "Li" ? 0 : 2
   if (i && field[i] ~ /^[^-+0-9]/) {
     if (!rule[field[i]])
-      rule[field[i]] = gen_rule_name()
+      rule[field[i]] = compress_rule_name(field[i])
     field[i] = rule[field[i]]
   }
 
