PYTHON   13

scanner.py

Guest on 25th July 2021 04:54:29 PM

  1.  
  2. """A module of functions for scanning a string, that is,
  3.   splitting the string into its constituent words (tokens)
  4. """
  5.  
  6. ###### data structures:
  7.  
  8. nextword = ""  # the word or operator in process of assembly
  9. answer = []    # the answer list of operators and words assembled so far
  10. # global invariant:
  11. #   answer + nextword == all the words found in the input examined so far
  12.  
  13. # functions that maintain the two data structures:
  14. def init() :
  15.     """init  resets the data structures"""
  16.     global nextword, answer
  17.     nextword = ""
  18.     answer = []
  19.  
  20. def emitword() :
  21.     """emitword  moves the current assembled word to  answer"""
  22.     global nextword
  23.     if nextword != "" :
  24.         answer.append(nextword)
  25.         nextword = ""
  26.  
  27. def addletter(letter):
  28.     """addletter  appends  letter  to the end of  nextword"""
  29.     global nextword
  30.     nextword = nextword + letter
  31.  
  32.  
  33. ####### the scan function:
  34.  
  35. def scan(text) :
  36.     """scan splits apart the symbols in  text  into a list.
  37.  
  38.       pre:  text  is a string of characters holding words and operators
  39.       post: answer  is a list of all the words and operators
  40.             within  text; spaces and newlines are removed
  41.       returns  answer
  42.    """
  43.     #print text
  44.     OPERATOR_SYMBOLS = ("(", ")", "[", "]", "+", "-", ";", ":", "=", "!", "*")
  45.     TWO_SYM_OPS = ("==", "!=")
  46.     SPACES = (" ", "\n", "\r", "\t")   # space, newline, return, tab
  47.     init()
  48.     for letter in text:
  49.         # invariant:  answer + nextword + letter + the_rest_of_text
  50.         #   holds all the words in the user's input
  51.         if letter in SPACES :   # end of the nextword?
  52.             emitword()
  53.  
  54.         elif letter in OPERATOR_SYMBOLS :  # an operator?
  55.             if nextword + letter in TWO_SYM_OPS :
  56.                 addletter(letter)
  57.                 emitword()
  58.             else :
  59.                 emitword()
  60.                 addletter(letter)
  61.  
  62.         else :  # a letter that is part of a word/number :
  63.             if nextword in OPERATOR_SYMBOLS :
  64.                 emitword()
  65.             addletter(letter)
  66.  
  67.     # loop finished; all words assembled.
  68.     emitword()   # in case there is a last word
  69.  
  70.     return answer

Raw Paste


Login or Register to edit or fork this paste. It's free.