PYTHON   14

DeDuper py

Guest on 18th August 2022 01:10:50 AM

  1. # DeDuper.py - discard duplicates sent to a FreeBSD mailing list.
  2. # Sahil Tandon <sahil@FreeBSD.org>
  3.  
  4. """ This custom handler discards messages with a Message ID that has
  5.    already been seen for a particular Mailman list.  A cache of seen
  6.    IDs is saved in a pickled dictionary, which is periodically cleansed
  7.    of old entries.
  8. """
  9.  
  10. import os
  11. import errno
  12. import time
  13. import cPickle
  14.  
  15. from Mailman.Errors import DiscardMessage
  16. from Mailman.Logging.Syslog import syslog
  17.  
  18. def process(mlist, msg, msgdata):
  19.     msgid = msg.get('message-id', None)
  20.     if not msgid:
  21.         return
  22.     filename = os.path.join(mlist.fullpath(), 'message-ids.pck')
  23.     try:
  24.         fp = open(filename, 'r')
  25.         pairs = cPickle.load(fp)
  26.         if pairs.has_key(msgid):
  27.             syslog('vette', '%s post from %s with message-id=%s is a duplicate',
  28.                     mlist.internal_name(), msg.get_sender(), msgid)
  29.             raise DiscardMessage
  30.         else:
  31.             pairs[msgid] = time.time()
  32.             remove = [k for k,v in pairs.iteritems() if pairs[msgid]-v > 300]
  33.             for k in remove: del pairs[k]
  34.             fp = open(filename, 'w')
  35.             cPickle.dump(pairs, fp)
  36.     except IOError, e:
  37.         if e.errno == errno.ENOENT:
  38.             fp = open(filename, 'w')
  39.             pairs = {}
  40.             pairs[msgid] = time.time()
  41.             cPickle.dump(pairs, fp)
  42.     finally:
  43.         fp.close()

Raw Paste


Login or Register to edit or fork this paste. It's free.