PERL   74

norm-bnc

Guest on 25th August 2022 01:51:58 PM

  1. #!/usr/local/bin/perl
  2.  
  3. #  norm-bnc
  4. #     Converts BNC tag names to smalltag tag names
  5. #  
  6. #     Alex Krotov (alexk@dcs.shef.ac.uk)
  7. #
  8.  
  9.  
  10. %conv_table = (
  11. 'aj0','ADJ',
  12. 'ajc','ADJ',
  13. 'ajs','ADJ',
  14. 'at0','DET_ART',
  15. 'av0','ADV',
  16. 'avp','ADV',
  17. 'avq','ADV_WH',
  18. 'cjc','CONJ',
  19. 'cjs','CONJ',
  20. 'cjt','CONJ',
  21. 'crd','NOUN',
  22. 'dps','DET',
  23. 'dt0','DET',
  24. 'itj','INTERJ',
  25. 'nn0','NOUN',
  26. 'nn1','NOUN',
  27. 'nn2','NOUN',
  28. 'np0','NOUN_PROP',
  29. 'ord','NOUN',
  30. 'pni','PRON',
  31. 'pnp','PRON_PERS',
  32. 'pnq','PRON_WH',
  33. 'pnx','PRON',
  34. 'pos','GENIT',
  35. 'prf','PREP',
  36. 'prp','PREP',
  37. 'to0','TO',
  38. 'unc','UNKNOWN',
  39. 'vbb','VERB_PRES',
  40. 'vbd','VERB_PAST',
  41. 'vbg','VERB_ING',
  42. 'vbi','VERB_INF',
  43. 'vbn','VERB_PAST_PART',
  44. 'vbz','VERB_PRES_SING',
  45. 'vdb','VERB_PRES',
  46. 'vdd','VERB_PAST',
  47. 'vdg','VERB_ING',
  48. 'vdi','VERB_INF',
  49. 'vdn','VERB_PAST_PART',
  50. 'vdz','VERB_PRES_SING',
  51. 'vhb','VERB_PRES',
  52. 'vhd','VERB_PAST',
  53. 'vhg','VERB_ING',
  54. 'vhi','VERB_INF',
  55. 'vhn','VERB_PAST_PART',
  56. 'vhz','VERB_PRES_SING',
  57. 'vm0','VERB_MODAL',
  58. 'vvb','VERB_PRES',
  59. 'vvi','VERB_INF',
  60. 'vvd','VERB_PAST',
  61. 'vvg','VERB_ING',
  62. 'vvn','VERB_PAST_PART',
  63. 'vvz','VERB_PRES_SING',
  64. 'xx0','NOT',
  65. 'zz0','NOUN',
  66. 'pul','PUNC_LEFT',
  67. 'pun','PUNC',
  68. 'puq','PUNC_QUOTE',
  69. 'pur','PUNC_RIGHT'
  70. );
  71.  
  72.  
  73. while(<>)
  74.  
  75. {
  76.  
  77.   if (/<w (\w+)>/) {
  78.      $from = $1;
  79.      $to = $1;
  80.      $to =~ tr/A-Z/a-z/;
  81.      # put it in lowercase
  82.  
  83.      #print $to, "\n";
  84.  
  85.  
  86.      if ($conv_table{$to}) {
  87.         $to = $conv_table{$to};
  88.      }
  89.      else {
  90.         $to = "X";
  91.      }
  92.      s/<w $from>/<w $to>/;
  93.      print;
  94.   }
  95.  
  96. }

Raw Paste


Login or Register to edit or fork this paste. It's free.