PERL   43
make rules pl
Guest on 12th February 2023 03:24:08 AM


  1. #!/usr/bin/perl
  2.  
  3. open XDY,">$ARGV[0].xdy";
  4. open DOC,">$ARGV[0]-doc.tex";
  5. open TEST,">$ARGV[0].raw";
  6. open TESTXDY,">$ARGV[0]-test.xdy";
  7.  
  8. print XDY ";; Rules for xindy; generated by make-rules.pl\n";
  9. print XDY ";; language: $language\n\n";
  10.  
  11. if ($prefix) {
  12.     $prefix = $prefix . '-';
  13. }
  14.  
  15. print TESTXDY "(require \"lang/$ARGV[0].xdy\")\n";
  16. print TESTXDY "(define-sort-rule-orientations (forward backward forward forward))\n";
  17. print TESTXDY "(use-rule-set :run 0\n";
  18. print TESTXDY "       :rule-set (\"".$prefix."alphabetize\" \"".$prefix."ignore-special\"))\n";
  19. print TESTXDY "(use-rule-set :run 1\n";
  20. print TESTXDY "       :rule-set (\"".$prefix."resolve-diacritics\" \"".$prefix."ignore-special\"))\n";
  21. print TESTXDY "(use-rule-set :run 2\n";
  22. print TESTXDY "       :rule-set (\"".$prefix."resolve-case\" \"".$prefix."ignore-special\"))\n";
  23. print TESTXDY "(use-rule-set :run 3\n";
  24. print TESTXDY "       :rule-set (\"".$prefix."resolve-special\"))\n";
  25. close TESTXDY;
  26.  
  27. $fn = "test.xdy";
  28. ! -f $fn && -f "$INC[0]/$fn"  and  $fn = "$INC[0]/$fn";
  29. -f $fn  or  die "$fn: $!";
  30. system("cat $fn >> $ARGV[0]-test.xdy");
  31.  
  32. $begin = 128;
  33. # $begin =  97 if ($script eq "cyrillic");
  34. # $begin = 150 if ($script eq "latin");
  35. # $begin =  65 if ($script eq "greek");
  36.  
  37. if (!$begin) {
  38.     $begin = 65;
  39. }
  40.  
  41. print "Alphabet has " . @{$alphabet} . " elements.\n";
  42.  
  43. for ($i = 0; $i < @{$alphabet}; $i++) {
  44.   if (defined($alphabet->[$i][0])) {
  45.     $letter = $alphabet->[$i][0];
  46.     $lout = $letter;
  47.     $lout =~ s/\~/~~/g;
  48.     $lout =~ s/\"/~\"/g;
  49.     $token = chr($i+$begin);
  50.     $token =~ s/\~/~~/g;
  51.     $token =~ s/\"/~\"/g;
  52.     if ($after ne $lout) {
  53.       print XDY "\"))\n" if ($i);
  54.       print XDY "(define-letter-group \"$lout\"";
  55.       print XDY " :after \"$after\"" if ($i);
  56.       print XDY " :prefixes (\"";
  57.     } else {
  58.       print XDY "\" \"";
  59.     }
  60.     print XDY "$token";
  61.     $after = $lout;
  62.   }
  63. }
  64.  
  65. print XDY "\"))\n\n";
  66.  
  67. $ref = \$i;
  68. $offset = $begin;
  69. $f = 1;
  70. make_tokens();
  71.  
  72. # print test raw file
  73. # alphabet
  74.   $p = 1;
  75.   foreach $letter (keys %tokens) {
  76.     print TEST "(indexentry :key (\"$letter\") :locref \"$p\")\n";
  77.     $p++;
  78.   }
  79. # end
  80.  
  81. process_ligatures();
  82.  
  83. print_tokens($prefix . "alphabetize");
  84.  
  85. $ref = \$j;
  86. $offset = 161;
  87. $f = 1;
  88. make_tokens();
  89.  
  90. for ($i = 0; $i < @{$ligatures}; $i++) {
  91.   if ($ligatures->[$i][1] eq "before") {
  92.     $token = chr(161);
  93.   } else {
  94.     $token = chr(255);
  95.   }
  96.   foreach $ligature_variant (@{$ligatures->[$i][0]}) {
  97.     $tokens{$ligature_variant} = $token;
  98.   }
  99. }
  100.  
  101. print_tokens($prefix . "resolve-diacritics");
  102.  
  103. $ref = \$k;
  104. $offset = 48;
  105. $f = 1;
  106. if ($sortcase eq "Aa") {
  107.   $offset = 57;
  108.   $f = -1;
  109. }
  110.  
  111. make_tokens();
  112.  
  113. process_ligatures();
  114.  
  115. print_tokens($prefix . "resolve-case");
  116.  
  117. foreach $character (@special) {
  118.   $tokens{$character} = "" unless ($character eq "letters");
  119. }
  120.  
  121. print_tokens($prefix . "ignore-special");
  122.  
  123. $offset = 161;
  124. $f = 1;
  125.  
  126. for ($l = 0; $l < @special; $l++) {
  127.   if (@special[$l] eq "letters") {
  128.     $ref = \$l;
  129.     make_tokens();
  130.     process_ligatures();
  131.   } else {
  132.     $token = chr($l+$offset);
  133.     $tokens{@special[$l]} = $token;
  134.   }
  135. }
  136.  
  137. print_tokens($prefix . "resolve-special");
  138.  
  139. # print doc:
  140. # alphabet
  141.   for ($i = 0; $i < @{$alphabet}; $i++) {
  142.     for ($j = 1; $j < @{$alphabet->[$i]}; $j++) {
  143.         push @l,join("\\,", (@{$alphabet->[$i][$j]}));
  144.     }
  145.     if ((@l)) {
  146.         push @m, join(" & ", (@l));
  147.     }
  148. #    push @m, "`".$alphabet->[$i][0]."':\\>".join(" -- ", (@l));
  149.     @l = ();
  150.   }
  151.   print DOC "\\subsection{$language";
  152.   print DOC " ($variant)" if ($variant);
  153.   print DOC "}\n\n";
  154.   print DOC "\\subsubsection{Alphabet}\n";
  155.   print DOC "\\icod\\fcod\n";
  156.   print DOC "\\begin{alphabet}\n";
  157.   print DOC join("\\\\\n", (@m));
  158.   print DOC "\n\\end{alphabet}\n";
  159.   print DOC "\\idef\\fdef\n";
  160.  
  161. # ligatures
  162.   print DOC "\n\\subsubsection{Ligatures}\n";
  163.   print DOC "\\begin{flushleft}\n";
  164.   print DOC "None.\n" unless @{$ligatures};
  165.   @m = ();
  166.   for ($i = 0; $i < @{$ligatures}; $i++) {
  167.     for ($j = 0; $j < @{$ligatures->[$i][0]}; $j++) {
  168.       push @m, "`$ligatures->[$i][0][$j]' is sorted like `" .
  169.         join("\\,", (@{$ligatures->[$i][2][$j]})) .
  170.         "', but \\emph{$ligatures->[$i][1]} it in otherwise equal words.";
  171.     }
  172.   }
  173.   print DOC join("\\\\\n", (@m));
  174.   print DOC "\n\\end{flushleft}\n";
  175.  
  176. # case
  177.   print DOC "\n\\subsubsection{Upper-/lowercase words}\n";
  178.   if ($sortcase eq "Aa") {
  179.     print DOC "Capitalized or uppercase words are sorted \\emph{before} ";
  180.     print DOC "otherwise equal lowercase words.\n";
  181.   } else {
  182.     print DOC "Capitalized or uppercase words are sorted \\emph{after} ";
  183.     print DOC "otherwise equal lowercase words.\n";
  184.   }
  185.  
  186. # special
  187. print DOC "\n\\subsubsection{Special characters}\n";
  188. print DOC "The order of special characters and letters is:\n";
  189. print DOC "\\begin{flushleft}\n";
  190. print DOC join("\\hspace{4mm}", (@special));
  191. print DOC "\n\\end{flushleft}\n";
  192. print DOC "\\newpage\n";
  193.  
  194. # end
  195.  
  196. # print test raw file
  197.  
  198. #  for ($i = 0; $i < @{$alphabet}; $i++) {
  199. #    for ($j = 1; $j < @{$alphabet->[$i]}; $j++) {
  200. #      foreach $letter (@{$alphabet->[$i][$j]}) {
  201. #        print TEST "(indexentry :key (\"$letter\") :locref \"$p\")\n";
  202. #       $p++;
  203. #      }
  204. #    }
  205. #  }
  206.  
  207. # ligatures
  208.   for ($i = 0; $i < @{$ligatures}; $i++) {
  209.     for ($j = 0; $j < @{$ligatures->[$i][0]}; $j++) {
  210.       print TEST
  211.         "(indexentry :key (\"$ligatures->[$i][0][$j]\") :locref \"$p\")\n";
  212.       $p++;
  213.       print TEST "(indexentry :key (\"" .
  214.         join("", (@{$ligatures->[$i][2][$j]})) . "\") :locref \"$p\")\n";
  215.       $p++;
  216.     }
  217.   }
  218.  
  219. # special
  220. foreach $letter (@special) {
  221.   if ($letter ne "letters") {
  222.     print TEST "(indexentry :key (\"$letter\") :locref \"$p\")\n";
  223.     $p++;
  224.   }
  225. }
  226.  
  227. # end
  228.  
  229. sub make_tokens {
  230.   for ($i = 0; $i < @{$alphabet}; $i++) {
  231.     for ($j = 1; $j < @{$alphabet->[$i]}; $j++) {
  232.       for ($k = 0; $k < @{$alphabet->[$i][$j]}; $k++) {
  233.         $token = chr($f*$$ref+$offset);
  234.         if ($alphabet->[$i][$j][$k]) {
  235.           $tokens{$alphabet->[$i][$j][$k]} = $token;
  236.         }
  237.       }
  238.     }
  239.   }
  240. }
  241.  
  242. sub process_ligatures {
  243.   for ($i = 0; $i < @{$ligatures}; $i++) {
  244.     for ($j = 0; $j < @{$ligatures->[$i][0]}; $j++) {
  245.       $token = "";
  246.       foreach $ligature_component (@{$ligatures->[$i][2][$j]}) {
  247.         $token = $token.$tokens{$ligature_component};
  248.       }
  249.       $tokens{$ligatures->[$i][0][$j]} = $token;
  250.     }
  251.   }
  252. }
  253.  
  254. sub print_tokens {
  255.   print XDY "(define-rule-set \"$_[0]\"\n\n  :rules  (";
  256.   foreach $letter (sort {
  257.     (length($b) <=> length($a)) || ($tokens{$a} cmp $tokens{$b})
  258.   } (keys %tokens)) {
  259.     $lout = $letter;
  260.     $lout =~ s/\~/~~/g;
  261.     $lout =~ s/\"/~\"/g;
  262.     $tout = $tokens{$letter};
  263.     $tout =~ s/\~/~~/g;
  264.     $tout =~ s/\"/~\"/g;
  265.     print XDY "(\"$lout\" \"$tout\" :string)\n           ";
  266.   }
  267.   print XDY "))\n\n";
  268.   %tokens = ();
  269. }

Raw Paste

Login or Register to edit or fork this paste. It's free.