# Takes runs, each of which is characterized by a timestamped sequence # of calls to files (represented by numbers). # There are good runs and bad runs in two separate lists. # Tries to identify which calls to files might indicate a problem. # March 8, 2015 version just looks at minimal file differences: files # that are in bad runs but not in good ones and finds a minimal set # that might explain the difference. # This version does not consider the sequence at all. import sys import math import csv import os import copy import operator import doctest import itertools import collections import datetime import random from operator import itemgetter, attrgetter sys.setrecursionlimit(20000) now = datetime.datetime.now() currentyear = now.year # APPLICATION-SPECIFIC # take the union of a collection of lists def unionlist(mylist): out = set(mylist[0]) j = 1 while (j < len(mylist)): out|= set(mylist[j]) j+= 1 return out # How often does each element of badones appear in the lists? def findcounts(badones, lists): uniqcounts = [] for b in badones: mycount = 0 for L in lists: if b in L: mycount+= 1 uniqcounts.append([b,mycount]) return uniqcounts # DATA good = [ [1, 2, 3, 4], [3, 2, 5, 7, 8, 9], [3, 2, 1, 4, 3]] bad = [ [1, 2, 3, 4, 17], [13, 2, 13, 7, 8, 9], [13, 2, 1, 14, 3]] # EXECUTION allgoods = unionlist(good) allbads = unionlist(bad) print 'allgoods is', allgoods print 'allbads is', allbads x = allbads - allgoods print 'difference is', x uniqcounts = findcounts(x, bad) uniqcountssorted = sorted(uniqcounts, key=itemgetter(1), reverse=True) print "Unique to bads, number of times" for u in uniqcountssorted: print u[0], u[1] newbad = copy.deepcopy(bad) for u in uniqcountssorted: if (0 < len(newbad)): print 'need: ', u[0] newnewbad = copy.deepcopy(newbad) for b in newbad: if u[0] in b: newnewbad.remove(b) newbad = copy.deepcopy(newnewbad)