PYTHON   10

parse.py

Guest on 25th July 2021 04:52:45 PM

  1.  
  2. """A module of functions that reformat an input program into an
  3.   operator tree.   Call  parse(LIST_OF_WORDS_TO_PARSE_GO_HERE)  to run the parser.
  4.  
  5.    The BNF is
  6.  
  7.    PROGRAM ::=  COMMANDLIST
  8.    COMMANDLIST ::=  COMMAND  |  COMMAND ; COMMANDLIST
  9.    COMMAND ::=  VAREXP = EXPRESSSION
  10.                 |  print EXPRESSION
  11.                 |  while EXPRESSION : COMMANDLIST end
  12.                 |  if EXPRESSION : COMMANDLIST else COMMANDLIST end
  13.    EXPRESSION ::= NUMERAL  |  VAREXP
  14.                 |  ( EXPRESSION OPERATOR EXPRESSION )
  15.                 |  newarray EXPRESSION
  16.    VAREXP ::=   VAR  |  VAR [ EXPRESSION ]
  17.  
  18.    OPERATOR is  +  or  -  or  *
  19.    NUMERAL  is a sequence of digits
  20.    VAR  is a string beginning with a letter; it cannot be  'print', 'while',
  21.          'if',  'else',  'end',  or  'newarray'
  22.  
  23.  
  24.    The operator trees are nested lists:
  25.  
  26.    CLIST ::=  [ CTREE+ ]
  27.               where  CTREE+  means  one or more CTREEs
  28.    CTREE ::=  ["=", VTREE, ETREE]  |  ["print", ETREE]  |  ["while", ETREE, CLIST]
  29.               |  ["if", ETREE, CLIST, CLIST]  
  30.    ETREE ::=  ["num", NUMERAL]  |  VTREE   |  ["newarray", ETREE]
  31.               |  [OP, ETREE1, ETREE2]
  32.               where OP is either "+" or "-"
  33.    VTREE ::=  ["var", VAR]  |  ["index", VAR, ETREE]
  34. """
  35.  
  36. ### data structures for parser:
  37. wordlist = []  # holds the remaining unread words
  38. nextword = ""  # holds the first unread word
  39. # global invariant:  nextword + wordlist == all remaining unread words
  40.  
  41. EOF = "!"      # a word that marks the end of the input words
  42.  
  43. def getNextword() :
  44.     """moves the front word in  wordlist  to  nextword.
  45.       If wordlist is empty, sets  nextword = EOF
  46.    """
  47.     global nextword, wordlist
  48.     if wordlist == [] :
  49.         nextword = EOF
  50.     else :
  51.         nextword = wordlist[0]
  52.         wordlist = wordlist[1:]
  53.  
  54. #####
  55.  
  56. def error(message) :
  57.     """prints an error message and halts the parse"""
  58.     print "parse error: " + message
  59.     print "The next input word is:", nextword
  60.     print "The remaining word list is:", wordlist
  61.     print
  62.     raise Exception
  63.  
  64.  
  65. def parseEXPR() :
  66.     """returns an ETREE built from the words in  nextword + wordlist,
  67.          based on the rule,
  68.          EXPRESSION ::= NUMERAL  |  VAREXP
  69.                 |  ( EXPRESSION OPERATOR EXPRESSION )
  70.                 |  newarray EXPRESSION
  71.      Also, maintains the global invariant (on wordlist and nextword)
  72.    """
  73.     if  nextword.isdigit() :   # a NUMERAL ?
  74.         ans = ["num", nextword]
  75.         getNextword()
  76.     elif  isVar(nextword) :    # a VARIABLE ?
  77.         ans = parseVAREXP()
  78.     elif nextword == "newarray" :  # newarray EXPR ?
  79.         getNextword()
  80.         etree = parseEXPR()
  81.         ans = ["newarray", etree]
  82.     elif nextword == "(" :     # ( EXPR op EXPR ) ?
  83.         getNextword()
  84.         tree1 = parseEXPR()
  85.         op = nextword
  86.         if op in ["+", "-", "*"] :
  87.             getNextword()
  88.             tree2 = parseEXPR()
  89.             if nextword == ")" :
  90.                 ans = [op, tree1, tree2]
  91.                 getNextword()
  92.             else :
  93.                 error("missing )")
  94.         else :
  95.             error("missing operator")
  96.     else :
  97.         error("illegal symbol to start an expression")
  98.  
  99.     return ans
  100.  
  101.  
  102. def parseVAREXP() :
  103.     """return a VTREE built from the words in  nextword + wordlist,
  104.          based on the rule,
  105.          VAREXP ::=   VAR  |  VAR [ EXPRESSION ]
  106.    """
  107.     varname = nextword
  108.     getNextword()
  109.     if nextword == "[" :   # an indexed varname ?
  110.         getNextword()
  111.         etree = parseEXPR()
  112.         if nextword == "]" :
  113.             getNextword()
  114.             ans = ["index", varname, etree]
  115.         else :
  116.             error("missing ]")
  117.     else :  # a simple varname
  118.         ans = ["var", varname]
  119.     return ans
  120.  
  121.    
  122.  
  123. def isVar(word) :
  124.     """checks whether  word  is a legal variable name
  125.       pre: word is a string of letters
  126.       post: returns True when  word  is a legal variable name
  127.    """
  128.     KEYWORDS = ("print", "while", "end", "if", "else", "newarray")
  129.     ans = ( word.isalpha()  and  not(word in KEYWORDS) )
  130.     return ans
  131.  
  132.  
  133. def parseCOMMAND() :
  134.     """builds a CTREE from the words in  nextword + wordlist, based on
  135.          COMMAND ::=  VAREXP = EXPRESSSION
  136.                 |  print VARIABLE
  137.                 |  while EXPRESSION : COMMANDLIST end
  138.                 |  if EXPRESSION : COMMANDLIST else COMMANDLIST end
  139.    """
  140.     if nextword == "print" :      # print VARIABLE ?
  141.         getNextword()
  142.         etree = parseEXPR()
  143.         ans = ["print", etree]
  144.     elif nextword == "while" :    # while EXPRESSION : COMMANDLIST end ?
  145.         getNextword()
  146.         exprtree = parseEXPR()
  147.         if nextword == ":" :
  148.             getNextword()
  149.         else :
  150.             error("missing :")
  151.         cmdlisttree = parseCMDLIST()
  152.         if nextword == "end" :
  153.             ans = ["while", exprtree, cmdlisttree]
  154.             getNextword()
  155.         else :
  156.             error("missing end")
  157.     elif nextword == "if" :    # if EXPR : COMMANDLIST else COMMANDLSIT end ?
  158.         getNextword()
  159.         exprtree = parseEXPR()
  160.         if nextword == ":" :
  161.             getNextword()
  162.         else :
  163.             error("missing :")
  164.         clist1 = parseCMDLIST()
  165.         if nextword == "else" :
  166.             getNextword()
  167.             clist2 = parseCMDLIST()
  168.             if nextword == "end" :
  169.                 ans = ["if", exprtree, clist1, clist2]
  170.                 getNextword()
  171.             else :
  172.                 error("missing end")
  173.         else :
  174.             error("missing else")
  175.  
  176.     elif isVar(nextword) :       # VAREXP = EXPRESSION ?
  177.         v = parseVAREXP()
  178.         if nextword == "=" :
  179.             getNextword()
  180.             exprtree = parseEXPR()
  181.             ans = ["=", v, exprtree]
  182.         else :
  183.             error("missing =")
  184.     else :                       # error -- bad command
  185.         error("bad word to start a command")
  186.     return ans
  187.  
  188.  
  189. def parseCMDLIST() :
  190.     """builds a COMMANDLIST tree from the words in  nextword + wordlist,
  191.       where  COMMANDLIST ::=  COMMAND  |  COMMAND ; COMMANDLIST
  192.                          that is,  one or more COMMANDS, separated by ;s
  193.       also, maintains the global invariant (on wordlist and nextword)
  194.    """
  195.     anslist = [ parseCOMMAND() ]   # parse first command
  196.     while nextword == ";" :        # collect any other COMMANDS
  197.         getNextword()
  198.         anslist.append( parseCOMMAND() )
  199.     return anslist
  200.  
  201.  
  202. def parse(words) :
  203.     """reads the input mini-program and builds an operator tree for it,
  204.       where  PROGRAM ::= COMMANDLIST
  205.       input: words, the program to be analyzed
  206.       output: the operator tree for the program
  207.    """
  208.     global wordlist
  209.     wordlist = words # initialize parser with program's words
  210.  
  211.     getNextword()  # initialize  nextword  to first unread word
  212.     # assert: invariant for nextword and wordlist holds true here
  213.     tree = parseCMDLIST()  
  214.     # assert: tree holds the entire operator tree for  words
  215.     #print tree
  216.     if nextword != EOF :
  217.        error("there are extra words")
  218.     return tree

Raw Paste


Login or Register to edit or fork this paste. It's free.