PERL 12
DiscoMT_scorer.pl Guest on 21st November 2020 05:34:39 PM
  1. #!/usr/bin/perl
  2. #
  3. #  Author: Preslav Nakov
  4. #  
  5. #  Description: Scores the output for the shared task of the DiscoMT workshop.
  6. #
  7. #
  8. #  Last modified: Ferbuary 26, 2015
  9. #
  10. #
  11. #  Use:
  12. #     perl discoMT_scorer.pl [-y scores.yml] <GOLD_FILE> <PREDICTIONS_FILE>
  13. #
  14. #  Example use:
  15. #     perl discoMT_scorer.pl gold.txt predicted.txt
  16. #     perl discoMT_scorer.pl gold.txt predicted_simple.txt
  17. #
  18. #  Description:
  19. #
  20. #     The scorer calculates and outputs the following statistics:
  21. #        (1) confusion matrix, which shows
  22. #                       - the count for each gold/predicted pair
  23. #           - the sums for each row/column: -SUM-
  24. #        (2) accuracy
  25. #        (3) precision (P), recall (R), and F1-score for each label
  26. #        (4) micro-averaged P, R, F1 (note that in our single-class classification problem, micro-P=R=F1=Acc)
  27. #        (5) macro-averaged P, R, F1
  28. #
  29. #     The scoring is done two times:
  30. #       (i)  using coarse-grained labels (ce, {cela+ça}, elle, elles, il, ils, {OTHER+on}).
  31. #       (ii) using fine-grained labels   (ce, cela, elle, elles, il, ils, on, ça, OTHER).
  32. #    
  33. #     The official score is the macro-averaged F1-score for (ii).
  34. #
  35. #
  36.  
  37. use warnings;
  38. use strict;
  39. use utf8;
  40. use Getopt::Std;
  41.  
  42. ###################
  43. ###   GLOBALS   ###
  44. ###################
  45.  
  46. my %confMatrixCoarse   = ();
  47. my @allLabelsCoarse    = ('  ce ', 'cela ', 'elle ', 'elles', '  il ', ' ils ', 'OTHER');
  48. my %labelMappingCoarse = ('ce'=>'  ce ', 'cela'=>'cela ', 'elle'=>'elle ', 'elles'=>'elles', 'il'=>'  il ', 'ils'=>' ils ', 'on'=>'OTHER', 'ça'=>'cela ', 'OTHER'=>'OTHER');
  49.  
  50. my %confMatrixFine     = ();
  51. my @allLabelsFine      = ('  ce ', 'cela ', 'elle ', 'elles', '  il ', ' ils ', '  on ', '  Ã§a ', 'OTHER');
  52. my %labelMappingFine   = ('ce'=>'  ce ', 'cela'=>'cela ', 'elle'=>'elle ', 'elles'=>'elles', 'il'=>'  il ', 'ils'=>' ils ', 'on'=>'  on ', 'ça'=>'  Ã§a ', 'OTHER'=>'OTHER');
  53.  
  54.  
  55. ################
  56. ###   MAIN   ###
  57. ################
  58.  
  59. ### 1. Check oparameters
  60. our $opt_y;
  61. getopts('y:');
  62. die "Usage: $0 <GOLD_FILE> <PREDICTIONS_FILE>\n" if ($#ARGV != 1);
  63. my $GOLD_FILE        = $ARGV[0];
  64. my $PREDICTIONS_FILE = $ARGV[1];
  65.  
  66. ### 2. Open the files
  67. open GOLD, '<:encoding(UTF-8)', $GOLD_FILE or die "Error opening $GOLD_FILE!";
  68. open PREDICTED, '<:encoding(UTF-8)', $PREDICTIONS_FILE or die "Error opening $PREDICTIONS_FILE!";
  69.  
  70. if ($opt_y) {
  71.     open YAML, '>:encoding(UTF-8)', $opt_y;
  72.     print YAML "filename: '$PREDICTIONS_FILE'\n";
  73.     print YAML "scores:\n";
  74. }
  75.  
  76. ### 3. Collect the statistics
  77. for (my $lineNo = 1; <GOLD>; $lineNo++) {
  78.        
  79.         # 3.1. Get the GOLD label
  80.         # OTHER le      There 's just no way of getting it right .      Il est impossible de de REPLACE_7 percevoir correctement .      0-0 1-1 1-3 2-2 3-2 4-2 5-3 5-4 6-6 7-5 8-7 9-8
  81.         die "Line $lineNo: Wrong file format for $GOLD_FILE!" if (!/^([^\t]*)\t[^\t]*\t[^\t]+\t[^\t]+\t[^\t]+$/);
  82.         my $goldLabel = $1;
  83.  
  84.         # 3.2. Get the PREDICTED label
  85.         # ce    c'      There 's just no way of getting it right .      Il est impossible de de REPLACE_7 percevoir correctement .      0-0 1-1 1-3 2-2 3-2 4-2 5-3 5-4 6-6 7-5 8-7 9-8
  86.         die "Line $lineNo: The file $PREDICTIONS_FILE is shorter!" if (!($_ = <PREDICTED>));
  87.         die "Line $lineNo: Wrong file format for $PREDICTIONS_FILE!" if (!/^([^\t\n\r]*)/);
  88.         my $predictedLabel = $1;
  89.  
  90.         # 3.3. Check the file formats
  91.         if ($goldLabel eq '') {
  92.                 if ($predictedLabel eq '') {
  93.                         next;
  94.                 }
  95.                 else {
  96.                         die "Line $lineNo: The gold label is empty, but the predicted label is not: $predictedLabel";
  97.                 }
  98.         }
  99.         elsif ($predictedLabel eq '') {
  100.                 die "Line $lineNo: The predicted label is empty, but the gold label is not: $goldLabel";
  101.         }
  102.  
  103.         die "Line $lineNo: Wrong file format for $GOLD_FILE: the gold label is '$goldLabel'" if ($goldLabel !~ /^(ce|cela|elle|elles|il|ils|on|ça|OTHER)( (ce|cela|elle|elles|il|ils|on|ça|OTHER))*$/);
  104.         die "Line $lineNo: Wrong file format for $PREDICTIONS_FILE: the predicted label is '$predictedLabel'" if ($predictedLabel !~ /^(ce|cela|elle|elles|il|ils|on|ça|OTHER)( (ce|cela|elle|elles|il|ils|on|ça|OTHER))*$/);
  105.  
  106.         my @goldLabels      = split / /, $goldLabel;
  107.         my @predictedLabels = split / /, $predictedLabel;
  108.         die "Line $lineNo: Different number of labels in the gold and in the predictions file." if ($#goldLabels != $#predictedLabels);
  109.  
  110.         # 3.4. Update the statistics
  111.         for (my $ind = 0; $ind <= $#goldLabels; $ind++) {
  112.                 my $gldLabel = $goldLabels[$ind];
  113.                 my $prdctdLabel = $predictedLabels[$ind];
  114.                 $confMatrixFine{$labelMappingFine{$prdctdLabel}}{$labelMappingFine{$gldLabel}}++;
  115.                 $confMatrixCoarse{$labelMappingCoarse{$prdctdLabel}}{$labelMappingCoarse{$gldLabel}}++;
  116.         }
  117.  
  118. }
  119.  
  120. ### 4. Coarse-grained evaluation
  121. print "\n<<< I. COARSE EVALUATION >>>\n\n";
  122. if ($opt_y) {
  123.     print YAML "  coarse:\n";
  124. }
  125. &evaluate(\@allLabelsCoarse, \%confMatrixCoarse);
  126.  
  127. ### 5. Fine-grained evaluation
  128. print "\n<<< II. FINE-GRAINED EVALUATION >>>\n\n";
  129. if ($opt_y) {
  130.     print YAML "  fine:\n";
  131. }
  132. my ($officialScore, $accuracy) = &evaluate(\@allLabelsFine, \%confMatrixFine);
  133.  
  134. ### 6. Output the official score
  135. print "\n<<< III. OFFICIAL SCORE >>>\n";
  136. printf "\nMACRO-averaged fine-grained F1: %6.2f%s", $officialScore, "%\n";
  137.  
  138. ### 7. Close the files
  139. close GOLD or die;
  140. close PREDICTED or die;
  141.  
  142. ### 8. Print a summary to the screen
  143. print "$PREDICTIONS_FILE\t$officialScore\t$accuracy\n";
  144.  
  145.  
  146. ################
  147. ###   SUBS   ###
  148. ################
  149.  
  150. sub evaluate() {
  151.         my ($allLabels, $confMatrix) = @_;
  152.  
  153.         ### 0. Calculate the horizontal and vertical sums
  154.         my %allLabelsProposed = ();
  155.         my %allLabelsAnswer   = ();
  156.         my ($cntCorrect, $cntTotal) = (0, 0);
  157.         foreach my $labelGold (@{$allLabels}) {
  158.                 foreach my $labelProposed (@{$allLabels}) {
  159.                         $$confMatrix{$labelProposed}{$labelGold} = 0
  160.                                 if (!defined($$confMatrix{$labelProposed}{$labelGold}));
  161.                         $allLabelsProposed{$labelProposed} += $$confMatrix{$labelProposed}{$labelGold};
  162.                         $allLabelsAnswer{$labelGold} += $$confMatrix{$labelProposed}{$labelGold};
  163.                         $cntTotal += $$confMatrix{$labelProposed}{$labelGold};
  164.                 }
  165.                 $cntCorrect += $$confMatrix{$labelGold}{$labelGold};
  166.         }
  167.  
  168.         ### 1. Print the confusion matrix heading
  169.         print "Confusion matrix:\n";
  170.         print "       ";
  171.         foreach my $label (@{$allLabels}) {
  172.                 printf " %5s", $label;
  173.         }
  174.         print " <-- classified as\n";
  175.         print "       +";
  176.         foreach (@{$allLabels}) {
  177.                 print "------";
  178.         }
  179.         print "+ -SUM-\n";
  180.  
  181.         ### 2. Print the rest of the confusion matrix
  182.         my $freqCorrect = 0;
  183.         foreach my $labelGold (@{$allLabels}) {
  184.  
  185.                 ### 2.1. Output the short relation label
  186.                 printf " %5s |", $labelGold;
  187.  
  188.                 ### 2.2. Output a row of the confusion matrix
  189.                 foreach my $labelProposed (@{$allLabels}) {
  190.                         printf "%5d ", $$confMatrix{$labelProposed}{$labelGold};
  191.                 }
  192.  
  193.                 ### 2.3. Output the horizontal sums
  194.                 printf "| %5d\n", $allLabelsAnswer{$labelGold};
  195.         }
  196.         print "       +";
  197.         foreach (@{$allLabels}) {
  198.                 print "------";
  199.         }
  200.         print "+\n";
  201.        
  202.         ### 3. Print the vertical sums
  203.         print " -SUM- ";
  204.         foreach my $labelProposed (@{$allLabels}) {
  205.                 printf "%5d ", $allLabelsProposed{$labelProposed};
  206.         }
  207.         print "\n\n";
  208.  
  209.         ### 5. Output the accuracy
  210.         my $accuracy = 100.0 * $cntCorrect / $cntTotal;
  211.         printf "%s%d%s%d%s%5.2f%s", 'Accuracy (calculated for the above confusion matrix) = ', $cntCorrect, '/', $cntTotal, ' = ', $accuracy, "\%\n";
  212.         if ($opt_y) {
  213.             print YAML "    total_acc: [$cntCorrect, $cntTotal]\n"
  214.         }
  215.  
  216.         ### 8. Output P, R, F1 for each relation
  217.         my ($macroP, $macroR, $macroF1) = (0, 0, 0);
  218.         my ($microCorrect, $microProposed, $microAnswer) = (0, 0, 0);
  219.         print "\nResults for the individual labels:\n";
  220.         foreach my $labelGold (@{$allLabels}) {
  221.  
  222.                 ### 8.3. Calculate P/R/F1
  223.                 my $P  = (0 == $allLabelsProposed{$labelGold}) ? 0
  224.                                 : 100.0 * $$confMatrix{$labelGold}{$labelGold} / $allLabelsProposed{$labelGold};
  225.                 my $R  = (0 == $allLabelsAnswer{$labelGold}) ? 0
  226.                                 : 100.0 * $$confMatrix{$labelGold}{$labelGold} / $allLabelsAnswer{$labelGold};
  227.                 my $F1 = (0 == $P + $R) ? 0 : 2 * $P * $R / ($P + $R);
  228.  
  229.                 printf "%10s%s%5d%s%5d%s%6.2f", $labelGold,
  230.                         " :    P = ", $$confMatrix{$labelGold}{$labelGold}, '/', $allLabelsProposed{$labelGold}, ' = ', $P;
  231.  
  232.                 printf "%s%5d%s%5d%s%6.2f%s%6.2f%s\n",
  233.                          "%     R = ", $$confMatrix{$labelGold}{$labelGold}, '/', $allLabelsAnswer{$labelGold},   ' = ', $R,
  234.                          "%     F1 = ", $F1, '%';
  235.  
  236.                 if ($opt_y) {
  237.                     my $lbl = $labelGold;
  238.                     $lbl =~ s/ //g;
  239.                     printf YAML "    rel_%s: [%s, %s, %s]\n", $lbl, $$confMatrix{$labelGold}{$labelGold}, $allLabelsProposed{$labelGold}, $allLabelsAnswer{$labelGold};
  240.                 }
  241.  
  242.                 ### 8.5. Accumulate statistics for micro/macro-averaging
  243.                 $macroP  += $P;
  244.                 $macroR  += $R;
  245.                 $macroF1 += $F1;
  246.                 $microCorrect += $$confMatrix{$labelGold}{$labelGold};
  247.                 $microProposed += $allLabelsProposed{$labelGold};
  248.                 $microAnswer += $allLabelsAnswer{$labelGold};
  249.         }
  250.  
  251.         ### 9. Output the micro-averaged P, R, F1
  252.         my $microP  = (0 == $microProposed)    ? 0 : 100.0 * $microCorrect / $microProposed;
  253.         my $microR  = (0 == $microAnswer)      ? 0 : 100.0 * $microCorrect / $microAnswer;
  254.         my $microF1 = (0 == $microP + $microR) ? 0 :   2.0 * $microP * $microR / ($microP + $microR);
  255.         print "\nMicro-averaged result:\n";
  256.         printf "%s%5d%s%5d%s%6.2f%s%5d%s%5d%s%6.2f%s%6.2f%s\n",
  257.                       "P = ", $microCorrect, '/', $microProposed, ' = ', $microP,
  258.                 "%     R = ", $microCorrect, '/', $microAnswer, ' = ', $microR,
  259.                 "%     F1 = ", $microF1, '%';
  260.  
  261.         ### 10. Output the macro-averaged P, R, F1
  262.         my $distinctLabelsCnt = $#{$allLabels}+1;
  263.  
  264.         $macroP  /= $distinctLabelsCnt; # first divide by the number of non-Other categories
  265.         $macroR  /= $distinctLabelsCnt;
  266.         $macroF1 /= $distinctLabelsCnt;
  267.         print "\nMACRO-averaged result:\n";
  268.         printf "%s%6.2f%s%6.2f%s%6.2f%s\n\n\n\n", "P = ", $macroP, "%\tR = ", $macroR, "%\tF1 = ", $macroF1, '%';
  269.  
  270.         if ($opt_y) {
  271.             printf YAML "    macro_avg: %.3f\n", $macroF1;
  272.         }
  273.  
  274.         ### 11. Return the official score
  275.         return ($macroF1, $accuracy);
  276. }

Paste is for source code and general debugging text.

Login or Register to edit, delete and keep track of your pastes and more.

Raw Paste

Login or Register to edit or fork this paste. It's free.