PYTHON   14

find-fix.py

Guest on 25th July 2021 05:00:21 PM

  1. #!/usr/bin/env python
  2. # -*- Python -*-
  3. """find-fix.py: produce a find/fix report for Subversion's IZ database
  4.  
  5. For simple text summary:
  6.       find-fix.py query-set-1.tsv YYYY-MM-DD YYYY-MM-DD
  7. Statistics will be printed for bugs found or fixed within the
  8. time frame.
  9.  
  10. For gnuplot presentation:
  11.       find-fix.py query-set-1.tsv outfile
  12. Gnuplot provides its own way to select date ranges.
  13.  
  14. Either way, get a query-set-1.tsv from:
  15.  http://subversion.tigris.org/iz-data/query-set-1.tsv  (updated nightly)
  16. See http://subversion.tigris.org/iz-data/README for more info on that file.
  17.  
  18. For more usage info on this script:
  19.        find-fix.py --help
  20. """
  21.  
  22. _version = "$Revision:"
  23.  
  24. #
  25. # This can be run over the data file found at:
  26. #   http://subversion.tigris.org/iz-data/query-set-1.tsv
  27. #
  28.  
  29. import getopt
  30. import operator
  31. import os
  32. import os.path
  33. import pydoc
  34. import re
  35. import string
  36. import sys
  37. import time
  38.  
  39. me = os.path.basename(sys.argv[0])
  40.  
  41. # Long options and their usage strings; "=" means it takes an argument.
  42. # To get a list suitable for getopt.getopt(), just do
  43. #
  44. #   [x[0] for x in long_opts]
  45. #
  46. # Make sure to sacrifice a lamb to Guido for each element of the list.
  47. long_opts = [
  48.   ["milestones=",      """Optional, milestones NOT to report on
  49.        (one or more of Beta, 1.0, Post-1.0, cvs2svn-1.0, cvs2svn-opt,
  50.        inapplicable)"""],
  51.   ["update",          """Optional, update the statistics first."""],
  52.   ["doc",             """Optional, print pydocs."""],
  53.   ["help",            """Optional, print usage (this text)."""],
  54.   ["verbose",         """Optional, print more progress messages."""],
  55.   ]
  56.  
  57. help    = 0
  58. verbose = 0
  59. update  = 0
  60.  
  61. DATA_FILE = "http://subversion.tigris.org/iz-data/query-set-1.tsv"
  62. ONE_WEEK = 7 * 24 * 60 * 60
  63.  
  64. _types = []
  65. _milestone_filter = []
  66.  
  67. noncore_milestone_filter = [
  68.   'Post-1.0',
  69.   '1.1',
  70.   'cvs2svn-1.0',
  71.   'cvs2svn-opt',
  72.   'inapplicable',
  73.   'no milestone',
  74.   ]
  75.  
  76. one_point_oh_milestone_filter = noncore_milestone_filter + []
  77.  
  78. beta_milestone_filter = one_point_oh_milestone_filter + ['1.0']
  79.  
  80.  
  81. _types = [
  82.   'DEFECT',
  83.   'TASK',
  84.   'FEATURE',
  85.   'ENHANCEMENT',
  86.   'PATCH',
  87.   ]
  88.  
  89.  
  90. def main():
  91.   """Report bug find/fix rate statistics for Subversion."""
  92.  
  93.   global verbose
  94.   global update
  95.   global _types
  96.   global _milestone_filter
  97.   global noncore_milestone_filter
  98.  
  99.   try:
  100.       opts, args = getopt.getopt(sys.argv[1:], "", [x[0] for x in long_opts])
  101.   except getopt.GetoptError, e:
  102.       sys.stderr.write("Error: %s\n" % e.msg)
  103.       shortusage()
  104.       sys.stderr.write("%s --help for options.\n" % me)
  105.       sys.exit(1)
  106.  
  107.   for opt, arg in opts:
  108.     if opt == "--help":
  109.       usage()
  110.       sys.exit(0)
  111.     elif opt == "--verbose":
  112.       verbose = 1
  113.     elif opt == "--milestones":
  114.       for mstone in string.split(arg, ","):
  115.         if mstone == "noncore":
  116.           _milestone_filter = noncore_milestone_filter
  117.         elif mstone == "beta":
  118.           _milestone_filter = beta_milestone_filter
  119.         elif mstone == "one":
  120.           _milestone_filter = one_point_oh_milestone_filter
  121.         elif mstone[0] == '-':
  122.           if mstone[1:] in _milestone_filter:
  123.             spot = _milestone_filter.index(mstone[1:])
  124.             _milestone_filter = _milestone_filter[:spot] \
  125.                                 + _milestone_filter[(spot+1):]
  126.         else:
  127.           _milestone_filter += [mstone]
  128.  
  129.     elif opt == "--update":
  130.       update = 1
  131.     elif opt == "--doc":
  132.       pydoc.doc(pydoc.importfile(sys.argv[0]))
  133.       sys.exit(0)
  134.  
  135.   if len(_milestone_filter) == 0:
  136.     _milestone_filter = noncore_milestone_filter
  137.  
  138.   if verbose:
  139.     sys.stderr.write("%s: Filtering out milestones %s.\n"
  140.                      % (me, string.join(_milestone_filter, ", ")))
  141.  
  142.   if len(args) == 2:
  143.     if verbose:
  144.       sys.stderr.write("%s: Generating gnuplot data.\n" % me)
  145.     if update:
  146.       if verbose:
  147.         sys.stderr.write("%s: Updating %s from %s.\n" % (me, args[0], DATA_FILE))
  148.       if os.system("curl " + DATA_FILE + "> " + args[0]):
  149.         os.system("wget " + DATA_FILE)
  150.     plot(args[0], args[1])
  151.  
  152.   elif len(args) == 3:
  153.     if verbose:
  154.       sys.stderr.write("%s: Generating summary from %s to %s.\n"
  155.                        % (me, args[1], args[2]))
  156.     if update:
  157.       if verbose:
  158.         sys.stderr.write("%s: Updating %s from %s.\n" % (me, args[0], DATA_FILE))
  159.       if os.system("curl " + DATA_FILE + "> " + args[0]):
  160.         os.system("wget " + DATA_FILE)
  161.  
  162.     try:
  163.       t_start = parse_time(args[1] + " 00:00:00")
  164.     except ValueError:
  165.       sys.stderr.write('%s: ERROR: bad time value: %s\n' % (me, args[1]))
  166.       sys.exit(1)
  167.  
  168.     try:
  169.       t_end = parse_time(args[2] + " 00:00:00")
  170.     except ValueError:
  171.       sys.stderr.write('%s: ERROR: bad time value: %s\n' % (me, args[2]))
  172.       sys.exit(1)
  173.  
  174.     summary(args[0], t_start, t_end)
  175.   else:
  176.     usage()
  177.  
  178.   sys.exit(0)
  179.  
  180.  
  181. def summary(datafile, d_start, d_end):
  182.   "Prints a summary of activity within a specified date range."
  183.  
  184.   data = load_data(datafile)
  185.  
  186.   # activity during the requested period
  187.   found, fixed, inval, dup, other = extract(data, 1, d_start, d_end)
  188.  
  189.   # activity from the beginning of time to the end of the request
  190.   # used to compute remaining
  191.   # XXX It would be faster to change extract to collect this in one
  192.   # pass.  But we don't presently have enough data, nor use this
  193.   # enough, to justify that rework.
  194.   fromzerofound, fromzerofixed, fromzeroinval, fromzerodup, fromzeroother \
  195.               = extract(data, 1, 0, d_end)
  196.  
  197.   alltypes_found = alltypes_fixed = alltypes_inval = alltypes_dup \
  198.                    = alltypes_other = alltypes_rem = 0
  199.   for t in _types:
  200.     fromzerorem_t = fromzerofound[t]\
  201.                     - (fromzerofixed[t] + fromzeroinval[t] + fromzerodup[t]
  202.                        + fromzeroother[t])
  203.     print '%12s: found=%3d  fixed=%3d  inval=%3d  dup=%3d  ' \
  204.           'other=%3d  remain=%3d' \
  205.           % (t, found[t], fixed[t], inval[t], dup[t], other[t], fromzerorem_t)
  206.     alltypes_found = alltypes_found + found[t]
  207.     alltypes_fixed = alltypes_fixed + fixed[t]
  208.     alltypes_inval = alltypes_inval + inval[t]
  209.     alltypes_dup   = alltypes_dup   + dup[t]
  210.     alltypes_other = alltypes_other + other[t]
  211.     alltypes_rem   = alltypes_rem + fromzerorem_t
  212.  
  213.   print '-' * 77
  214.   print '%12s: found=%3d  fixed=%3d  inval=%3d  dup=%3d  ' \
  215.         'other=%3d  remain=%3d' \
  216.         % ('totals', alltypes_found, alltypes_fixed, alltypes_inval,
  217.            alltypes_dup, alltypes_other, alltypes_rem)
  218.   # print '%12s  find/fix ratio: %g%%' \
  219.   #      % (" "*12, (alltypes_found*100.0/(alltypes_fixed
  220.   #         + alltypes_inval + alltypes_dup + alltypes_other)))
  221.  
  222.  
  223. def plot(datafile, outbase):
  224.   "Generates data files intended for use by gnuplot."
  225.  
  226.   global _types
  227.  
  228.   data = load_data(datafile)
  229.  
  230.   t_min = 1L<<32
  231.   for issue in data:
  232.     if issue.created < t_min:
  233.       t_min = issue.created
  234.  
  235.   # break the time up into a tuple, then back up to Sunday
  236.   t_start = time.localtime(t_min)
  237.   t_start = time.mktime((t_start[0], t_start[1], t_start[2] - t_start[6] - 1,
  238.                          0, 0, 0, 0, 0, 0))
  239.  
  240.   plots = { }
  241.   for t in _types:
  242.     # for each issue type, we will record per-week stats, compute a moving
  243.     # average of the find/fix delta, and track the number of open issues
  244.     plots[t] = [ [ ], MovingAverage(), 0 ]
  245.  
  246.   week = 0
  247.   for date in range(t_start, time.time(), ONE_WEEK):
  248.     ### this is quite inefficient, as we could just sort by date, but
  249.     ### I'm being lazy
  250.     found, fixed = extract(data, None, date, date + ONE_WEEK - 1)
  251.  
  252.     for t in _types:
  253.       per_week, avg, open_issues = plots[t]
  254.       delta = found[t] - fixed[t]
  255.       per_week.append((week, date,
  256.                        found[t], -fixed[t], avg.add(delta), open_issues))
  257.       plots[t][2] = open_issues + delta
  258.  
  259.     week = week + 1
  260.  
  261.   for t in _types:
  262.     week_data = plots[t][0]
  263.     write_file(week_data, outbase, t, 'found', 2)
  264.     write_file(week_data, outbase, t, 'fixed', 3)
  265.     write_file(week_data, outbase, t, 'avg', 4)
  266.     write_file(week_data, outbase, t, 'open', 5)
  267.  
  268. def write_file(week_data, base, type, tag, idx):
  269.   f = open('%s.%s.%s' % (base, tag, type), 'w')
  270.   for info in week_data:
  271.     f.write('%s %s # %s\n' % (info[0], info[idx], time.ctime(info[1])))
  272.  
  273.  
  274. class MovingAverage:
  275.   "Helper class to compute moving averages."
  276.   def __init__(self, n=4):
  277.     self.n = n
  278.     self.data = [ 0 ] * n
  279.   def add(self, value):
  280.     self.data.pop(0)
  281.     self.data.append(float(value) / self.n)
  282.     return self.avg()
  283.   def avg(self):
  284.     return reduce(operator.add, self.data)
  285.  
  286.  
  287. def extract(data, details, d_start, d_end):
  288.   """Extract found/fixed counts for each issue type within the data range.
  289.  
  290.  If DETAILS is false, then return two dictionaries:
  291.  
  292.    found, fixed
  293.  
  294.  ...each mapping issue types to the number of issues of that type
  295.  found or fixed respectively.
  296.  
  297.  If DETAILS is true, return five dictionaries:
  298.  
  299.    found, fixed, invalid, duplicate, other
  300.  
  301.  The first is still the found issues, but the other four break down
  302.  the resolution into 'FIXED', 'INVALID', 'DUPLICATE', and a grab-bag
  303.  category for 'WORKSFORME', 'LATER', 'REMIND', and 'WONTFIX'."""
  304.  
  305.   global _types
  306.   global _milestone_filter
  307.  
  308.   found = { }
  309.   fixed = { }
  310.   invalid = { }
  311.   duplicate = { }
  312.   other = { }  # "WORKSFORME", "LATER", "REMIND", and "WONTFIX"
  313.  
  314.   for t in _types:
  315.     found[t] = fixed[t] = invalid[t] = duplicate[t] = other[t] = 0
  316.  
  317.   for issue in data:
  318.     # filter out disrespected milestones
  319.     if issue.milestone in _milestone_filter:
  320.       continue
  321.  
  322.     # record the found/fixed counts
  323.     if d_start <= issue.created <= d_end:
  324.       found[issue.type] = found[issue.type] + 1
  325.     if d_start <= issue.resolved <= d_end:
  326.       if details:
  327.         if issue.resolution == "FIXED":
  328.           fixed[issue.type] = fixed[issue.type] + 1
  329.         elif issue.resolution == "INVALID":
  330.           invalid[issue.type] = invalid[issue.type] + 1
  331.         elif issue.resolution == "DUPLICATE":
  332.           duplicate[issue.type] = duplicate[issue.type] + 1
  333.         else:
  334.           other[issue.type] = other[issue.type] + 1
  335.       else:
  336.         fixed[issue.type] = fixed[issue.type] + 1
  337.  
  338.   if details:
  339.     return found, fixed, invalid, duplicate, other
  340.   else:
  341.     return found, fixed
  342.  
  343.  
  344. def load_data(datafile):
  345.   "Return a list of Issue objects for the specified data."
  346.   return map(Issue, open(datafile).readlines())
  347.  
  348.  
  349. class Issue:
  350.   "Represents a single issue from the exported IssueZilla data."
  351.  
  352.   def __init__(self, line):
  353.     row = string.split(string.strip(line), '\t')
  354.  
  355.     self.id = int(row[0])
  356.     self.type = row[1]
  357.     self.reporter = row[2]
  358.     if row[3] == 'NULL':
  359.       self.assigned = None
  360.     else:
  361.       self.assigned = row[3]
  362.     self.milestone = row[4]
  363.     self.created = parse_time(row[5])
  364.     self.resolution = row[7]
  365.     if not self.resolution:
  366.       # If the resolution is empty, then force the resolved date to None.
  367.       # When an issue is reopened, there will still be activity showing
  368.       # a "RESOLVED", thus we get a resolved date. But we simply want to
  369.       # ignore that date.
  370.       self.resolved = None
  371.     else:
  372.       self.resolved = parse_time(row[6])
  373.     self.summary = row[8]
  374.  
  375.  
  376. parse_time_re = re.compile('([0-9]{4})-([0-9]{2})-([0-9]{2}) '
  377.                            '([0-9]{2}):([0-9]{2}):([0-9]{2})')
  378.  
  379. def parse_time(t):
  380.   "Convert an exported MySQL timestamp into seconds since the epoch."
  381.  
  382.   global parse_time_re
  383.  
  384.   if t == 'NULL':
  385.     return None
  386.   try:
  387.     matches = parse_time_re.match(t)
  388.     return time.mktime((int(matches.group(1)),
  389.                         int(matches.group(2)),
  390.                         int(matches.group(3)),
  391.                         int(matches.group(4)),
  392.                         int(matches.group(5)),
  393.                         int(matches.group(6)),
  394.                         0, 0, -1))
  395.   except ValueError:
  396.     sys.stderr.write('ERROR: bad time value: %s\n'% t)
  397.     sys.exit(1)
  398.  
  399. def shortusage():
  400.   print pydoc.synopsis(sys.argv[0])
  401.   print """
  402. For simple text summary:
  403.       find-fix.py [options] query-set-1.tsv YYYY-MM-DD YYYY-MM-DD
  404.  
  405. For gnuplot presentation:
  406.       find-fix.py [options] query-set-1.tsv outfile
  407. """
  408.  
  409. def usage():
  410.   shortusage()
  411.   for x in long_opts:
  412.       padding_limit = 18
  413.       if x[0][-1:] == '=':
  414.           print "   --" + x[0][:-1],
  415.           padding_limit = 19
  416.       else:
  417.           print "   --" + x[0],
  418.       print (' ' * (padding_limit - len(x[0]))), x[1]
  419.   print '''
  420. Option keywords may be abbreviated to any unique prefix.
  421. Most options require "=xxx" arguments.
  422. Option order is not important.'''
  423.  
  424. if __name__ == '__main__':
  425.   main()

Raw Paste


Login or Register to edit or fork this paste. It's free.