PYTHON 17
SearchEngineQueries.py Guest on 22nd June 2020 03:31:25 AM
  1. #!/usr/bin/env python
  2.  
  3. import urllib
  4. import re
  5.  
  6. display_engine = True
  7. display_result = True
  8.  
  9. def prettify(str):
  10.     result = re.sub(" +", " ", str)
  11.     if result[0] == '"':
  12.         result = result[1:]
  13.     if result[len(result)-1] == '"':
  14.         result = result[:-1]
  15.     return result
  16.    
  17. def decode(str):
  18.     try:
  19.         return prettify(urllib.unquote_plus(str).decode('utf-8'))
  20.     except UnicodeDecodeError:
  21.         return str
  22.    
  23. def extract(line):
  24.     match = re.search("GET +(.*) +HTTP.*[0-9]+ \"(.*)\&q=([^&]+)\&", line)
  25.     if match:
  26.         return (match.group(3), match.group(2), match.group(1))
  27.     else:
  28.         return (None, None, None)
  29.    
  30. if __name__ == '__main__':
  31.     import sys
  32.     results = []
  33.     n_result = 0
  34.     for line in sys.stdin.xreadlines():
  35.         (query, engine, result) = extract(line[:-1])
  36.         if query is not None:
  37.             query = decode(query).encode('latin-1','replace')
  38.             results.append ({'query': query, 'engine': engine, 'result': result})
  39.         n_result = n_result + 1
  40.     for result in results:
  41.         if display_engine:
  42.             print "%s (%s)" % (result['query'], result['engine']),
  43.         else:
  44.             print result['query'],
  45.         if display_result:
  46.             print " -> %s" % result['result']
  47.         else:
  48.             print ""

Paste is for source code and general debugging text.

Login or Register to edit, delete and keep track of your pastes and more.

Raw Paste

Login or Register to edit or fork this paste. It's free.