PYTHON   15

mailstats py

Guest on 18th August 2022 01:12:55 AM

  1. #!/usr/bin/env python
  2. #
  3. # mailstats.py - collect & report postscreen(8) statistics
  4. # Sahil Tandon <sahil@FreeBSD.org>
  5.  
  6. import argparse, bz2, re, sys
  7. from collections import defaultdict
  8.  
  9. # Initialize the argument parser & specify arguments
  10. parser = argparse.ArgumentParser()
  11. parser.add_argument('filename', metavar='FILE', help='path to maillog')
  12. parser.add_argument('-a', dest='action', help='filter results based on this action', choices=['DNSBL', 'PREGREET'])
  13. parser.add_argument('-w', dest='userwl', metavar='WL', nargs='*', default=[], help='DNS whitelists')
  14. args = parser.parse_args()
  15.  
  16. # Initialize dns black and whitelists; the latter include
  17. # two defaults as well as user-defined zones.  Blacklists
  18. # are populated by the parse() function as it steps through
  19. # the log lines.
  20. #
  21. # IMPORTANT: dnsblog(8) entries with a zone not included in
  22. # a whitelist are assumed to be blacklist hits.
  23. dnsbls    = []
  24. dnswls    = set(['list.dnswl.org', 'swl.spamhaus.org'] + args.userwl)
  25.  
  26. # DRY: define common parts of compiled regular expressions
  27. conn_re = ']: CONNECT'
  28. addr_re = '.*\[([^]]+)\]:\d+'
  29. rcpt_re = ': reject: RCPT from'
  30.  
  31. # pattern_list is used to grow pattern_dict, a dictionary populated
  32. # with keys that correspond to postscreen(8) events, and their regex
  33. # values.
  34. pattern_list = [  'BARE NEWLINE', 'BLACKLISTED', 'COMMAND COUNT LIMIT','COMMAND TIME LIMIT',
  35.                   'COMMAND PIPELINING', 'DISCONNECT', 'DNSBL', 'HANGUP', 'NON-SMTP',
  36.                   'PASS NEW', 'PASS OLD', 'PREGREET', 'WHITELISTED' ]
  37.                  
  38. pattern_dict = {  'CONNECT':re.compile(r'%s%s ' % (conn_re, addr_re)),
  39.                   'reject (too many connections)':re.compile(r'%s%s: too many' % (conn_re, addr_re)),
  40.                   'reject (all server ports busy)':re.compile(r'%s%s: all server' % (conn_re, addr_re)),
  41.                   'reject (450)':re.compile(r'%s%s: 450' % (rcpt_re, addr_re)),
  42.                   'reject (550)':re.compile(r'%s%s: 550' % (rcpt_re, addr_re))  }
  43.  
  44. for pattern in pattern_list:
  45.   pattern_dict[pattern] = re.compile(r': %s%s' % (pattern, addr_re))
  46.  
  47. # Initialize two multi-dimensional dictionaries.
  48. addr_dict = defaultdict(lambda: defaultdict(int))
  49. rank_dict = defaultdict(int)
  50.  
  51. # Iterate through each line of the log file, collecting postscreen(8)
  52. # and dnsblog(8) statistics per IP address.  Also note postscreen(8)
  53. # connections per secon
  54. def parse(filename):
  55.   with open(filename) as file:
  56.     for line in file:
  57.       if 'postfix/postscreen' in line:
  58.         for event, regex in pattern_dict.items():
  59.           match = regex.search(line)
  60.           if match:
  61.             addr_dict[match.group(1)][event] += 1
  62.             break
  63.         match = re.search(r'rank (\d+) for \[[^]]+\]:\d+', line)
  64.         if match:
  65.           rank_dict[match.group(1)] += 1
  66.       elif 'postfix/dnsblog' in line:
  67.         match = re.search(r': addr ([\d.]+) listed by domain ([\w.]+) as', line)
  68.         if match:
  69.           addr, zone = match.groups()
  70.           addr_dict[addr][zone] = 1
  71.           if zone in dnswls:
  72.             pass
  73.           elif zone not in dnsbls:
  74.             dnsbls.append(zone)
  75.       else:
  76.         continue
  77.  
  78. # Jan 31 22:37:10 cricket postfix/postscreen[35464]: DNSBL rank 7 for [180.253.3.203]:18541
  79.  
  80. # A filter used to prune the IP list based on the presence of a particular
  81. # event, e.g. DNSBL hit.  If no event is specified, return True.
  82. def prune(ip, event):
  83.   if event:
  84.     return addr_dict[ip][event] > 0
  85.   else:
  86.     return True
  87.  
  88. # TOTAL refers to the number of times each event was triggered, while
  89. # UNIQ corresponds to the number of unique IPs that triggered the event.
  90. def event_report(filt=None):
  91.   align = '{0:>8}{1}{2:<8}{3:<}'
  92.   print
  93.   print align.format("UNIQ", "/", "TOTAL", "EVENT")
  94.   for event in sorted(pattern_dict):
  95.     total = sum([addr_dict[ip][event] for ip in addr_dict if prune(ip, filt)])
  96.     if total == 0: continue
  97.     uniq  = len([ip for ip in addr_dict if addr_dict[ip][event] if prune(ip, filt)])
  98.     print align.format(uniq, "/", total, event)
  99.   print
  100.  
  101. # In the context of DNS{B,W}Ls, TOTAL corresponds to the number of IPs listed
  102. # in a zone, while UNIQ is the number of IPs that are listed *only* by
  103. # that zone.  Also report the number of overlap among DNS{B,W}L hits.  
  104. def dns_report(zones):
  105.   align = '{0:>8}{1}{2:<8}{3:<24}{4:>6}'
  106.   (str1, str2, dnsl1, dnsl2) = ('DNSBL','DNSWL', dnsbls, dnswls)
  107.   if 'list.dnswl.org' in zones:
  108.     (str1, str2, dnsl1, dnsl2) = (str2, str1, dnsl2, dnsl1)
  109.   print align.format("UNIQ", "/", "TOTAL", str1, str2)
  110.   for zone in sorted(zones):
  111.     hits = len([ip for ip in addr_dict if addr_dict[ip][zone]])
  112.     excl = len([ip for ip in addr_dict if addr_dict[ip][zone] if sum([addr_dict[ip][x] for x in dnsl1]) == 1])
  113.     olap = len([ip for ip in addr_dict if addr_dict[ip][zone] if sum([addr_dict[ip][x] for x in dnsl2]) != 0])
  114.     print align.format(excl, "/", hits, zone, olap)
  115.   print
  116.  
  117. parse(args.filename)
  118. event_report(args.action)
  119. dns_report(dnsbls)
  120. dns_report(dnswls)
  121. for rank in sorted(rank_dict, key=rank_dict.get, reverse=True): print '{0:>11} {1:>2} : {2}'.format("DNSBL Rank", rank, rank_dict[rank])
  122. print

Raw Paste


Login or Register to edit or fork this paste. It's free.