# Takes runs, each of which is characterized by a timestamped sequence
# of calls to files (represented by numbers).
# There are good runs and bad runs in two separate lists.
# Tries to identify which calls to files might indicate a problem.
# March 8, 2015 version just looks at minimal file differences: files
# that are in bad runs but not in good ones and finds a minimal set
# that might explain the difference.
# This version does not consider the sequence at all.


import sys
import math
import csv
import os
import copy
import operator
import doctest
import itertools
import collections
import datetime
import random
from operator import itemgetter, attrgetter
sys.setrecursionlimit(20000) 

now = datetime.datetime.now()
currentyear = now.year

# APPLICATION-SPECIFIC

# take the union of a collection of lists
def unionlist(mylist):
	out = set(mylist[0])
	j = 1
	while (j < len(mylist)):
		out|= set(mylist[j])
		j+= 1
	return out

# How often does each element of badones appear in the lists?
def findcounts(badones, lists):
	uniqcounts = []
	for b in badones:
		mycount = 0
		for L in lists:
			if b in L: 
				mycount+= 1	
		uniqcounts.append([b,mycount])
	return uniqcounts
	

# DATA

good = [ [1, 2, 3, 4], [3, 2, 5, 7, 8, 9], [3, 2, 1, 4, 3]]

bad = [ [1, 2, 3, 4, 17], [13, 2, 13, 7, 8, 9], [13, 2, 1, 14, 3]]

# EXECUTION

allgoods = unionlist(good)
allbads = unionlist(bad)
print 'allgoods is', allgoods
print 'allbads is', allbads

x = allbads - allgoods
print 'difference is', x
uniqcounts = findcounts(x, bad)
uniqcountssorted = sorted(uniqcounts, key=itemgetter(1), reverse=True)
print "Unique to bads, number of times"
for u in uniqcountssorted:
	print u[0], u[1]
newbad = copy.deepcopy(bad)
for u in uniqcountssorted:
	if (0 < len(newbad)):
		print 'need: ', u[0]
		newnewbad = copy.deepcopy(newbad)
		for b in newbad:
			if u[0]  in b:
				newnewbad.remove(b)
		newbad = copy.deepcopy(newnewbad)