From b848dce41c97fa405196e004a0ff1e7a79e6a822 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Thu, 11 Jan 2024 11:02:04 -0800
Subject: [PROPOSED 3/5] Shrink lines by class
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* zishrink.awk (process_input_line): Output rule lines first, then
zone lines, then link lines.  Although this doesn’t change the set
of output lines, it should makes the output a bit more compressible.
---
 zishrink.awk | 70 ++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 51 insertions(+), 19 deletions(-)

diff --git a/zishrink.awk b/zishrink.awk
index 500e6cf8..a59eead9 100644
--- a/zishrink.awk
+++ b/zishrink.awk
@@ -162,7 +162,7 @@ function make_line(n, field, \
 # Process the input line LINE and save it for later output.
 
 function process_input_line(line, \
-			    f, field, end, n, r, \
+			    f, field, end, n, outline, r, \
 			    linkline, ruleline, zoneline)
 {
   # Remove comments, normalize spaces, and append a space to each line.
@@ -236,47 +236,79 @@ function process_input_line(line, \
     rule_used[r] = 1
   }
 
-  # Save the line for later output.
-  output_line[nout++] = make_line(n, field)
+  if (zoneline)
+    zonename = startdef = field[2]
+  else if (linkline)
+    zonename = startdef = field[3]
+  else if (ruleline)
+    zonename = ""
+
+  # Save the information for later output.
+  outline = make_line(n, field)
+  if (ruleline)
+    rule_output_line[nrule_out++] = outline
+  else if (linkline)
+    link_output_line[nlink_out++] = outline
+  else
+    zonedef[zonename] = (zoneline ? "" : zonedef[zonename] "\n") outline
 }
 
 function omit_unused_rules( \
 			   i, field)
 {
-  for (i = 0; i < nout; i++) {
-    split(output_line[i], field)
-    if (field[1] == "R" && !rule_used[field[2]]) {
-      output_line[i] = ""
-    }
+  for (i = 0; i < nrule_out; i++) {
+    split(rule_output_line[i], field)
+    if (!rule_used[field[2]])
+      rule_output_line[i] = ""
   }
 }
 
 function abbreviate_rule_names( \
-			       abbr, f, field, i, n, r)
+			       abbr, f, field, i, n, newdef, newline, r, \
+			       zoneline, zonelines, zonename)
 {
-  for (i = 0; i < nout; i++) {
-    n = split(output_line[i], field)
+  for (i = 0; i < nrule_out; i++) {
+    n = split(rule_output_line[i], field)
     if (n) {
-      f = field[1] == "Z" ? 4 : field[1] == "L" ? 0 : 2
-      r = field[f]
+      r = field[2]
       if (r ~ /^[^-+0-9]/) {
 	abbr = rule[r]
 	if (!abbr) {
 	  rule[r] = abbr = gen_rule_name(r)
 	}
-	field[f] = abbr
-	output_line[i] = make_line(n, field)
+	field[2] = abbr
+	rule_output_line[i] = make_line(n, field)
+      }
+    }
+  }
+  for (zonename in zonedef) {
+    zonelines = split(zonedef[zonename], zoneline, /\n/)
+    newdef = ""
+    for (i = 1; i <= zonelines; i++) {
+      newline = zoneline[i]
+      n = split(newline, field)
+      f = i == 1 ? 4 : 2
+      r = rule[field[f]]
+      if (r) {
+	field[f] = r
+	newline = make_line(n, field)
       }
+      newdef = (newdef ? newdef "\n" : "") newline
     }
+    zonedef[zonename] = newdef
   }
 }
 
 function output_saved_lines( \
-			    i)
+			    i, zonename)
 {
-  for (i = 0; i < nout; i++)
-    if (output_line[i])
-      print output_line[i]
+  for (i = 0; i < nrule_out; i++)
+    if (rule_output_line[i])
+      print rule_output_line[i]
+  for (zonename in zonedef)
+    print zonedef[zonename]
+  for (i = 0; i < nlink_out; i++)
+    print link_output_line[i]
 }
 
 BEGIN {
-- 
2.40.1

