
#!/usr/bin/env python
#
# This program computes entropy in a vector


import math


#-------------------Entropy Functions------------------------


# computes the entropy of a single vector
def entropy(vec):
	answercount =   {} # empty dictionary
	totlen =   len(vec)
	for x in vec:
	   if x in answercount:
		answercount[x] +=   1.0
	   else:
		answercount[x] =   1.0
	ent =   0
	for x in answercount:
	   prob =   (answercount[x])/totlen
	   ent +=   - prob * math.log(prob,2)
	return ent



# computes the entropy of vec2 depending on vec1
def condentropy(vec1, vec2):
	if len(vec1) !=   len(vec2): return -1 # error
	totlen =   len(vec1)
	answervec =   {}
	i =   0
	while i < totlen:
	   x =   vec1[i]
	   if x in answervec:
		answervec[x].append(vec2[i])
	   else:
		answervec[x] =   []
		answervec[x].append(vec2[i])
	   i+=   1
	# print "Debugging answervec: ", answervec
	condent =   0
	for x in answervec:
		weight =   (0.0+ len(answervec[x])) / totlen
			# cond entropy weight of x
		# print ("Debugging letter: "), x, (" has weight: "), weight
		condent +=   weight * entropy(answervec[x])
	return condent



#---------------------Other Functions------------------------


def fileToDictionary( fileName ):
	file =   open( fileName, "r" )
	text =   file.readlines()
	file.close()

	for i in range(0, len(text)):
		if(text[i][len(text[i])-1] ==   '\n'):
			text[i] =   text[i][:len(text[i])-1]

	keys =   text[0].split(" ")
	text.remove(text[0])
	for i in range(0, len(text)):
		text[i] =   text[i].split("  ")

	map =   {}

	for i in range(0, len(keys)):
		map[keys[i]] =   []
		for line in text:
			map[keys[i]].append(line[i])

	return map



#returns an array of values in map[key] corresponding to
#the values in index
def getVector( map, key, index ):
	a =   []
	for i in index:
		a.append(map[key][i])
	return a



def getOptions( vector ):
	opt =   []
	for i in range(0, len(vector)):
		if not vector[i] in opt:
			opt.append( vector[i] )
	return opt



def getNewIndex( array, index, option ):
	newInd =   []
	for i in index:
		if array[i] ==   option:
			newInd.append(i)
	return newInd



#-------------------Recursive Entropy------------------------


#driver function
def findHighestEntropy( map ):
	index = []	#indexes of all of the members
	for i in range(0, len(map[map.keys()[0]])):
		index.append(i)
	keys =   map.keys()
	highestE( map, keys, index, 0 )



#recursive helper function
#step =   the step in the outline; responsible for the indents
def highestE( map, keys, index, step ):
	if( len(keys) ==   1 ):					#base case 1
		return
	if( entropy(getVector(map,target,index)) == 0 ):	#base case 2
		return

	minKey =   keys[0]
	if(minKey ==     target):
		minKey =   keys[1]
	minEnt =   condentropy(getVector(map,minKey,index), getVector(map,target,index))

	for key in keys:
		if ( key !=   target ):
			ent =   condentropy(getVector(map,key,index), getVector(map,target,index))
			if ( ent < minEnt ):
				minEnt =   ent
				minKey =   key
	minMember =   map[minKey]
	minVector =   getVector(map,minKey,index)
	options =   getOptions(minVector)
	newKeys =   []
	for k in keys:
		if not k ==     minKey:
			newKeys.append(k)

	count =   str(len(index))
	indent =   ""
	for i in range(0, step):
		indent =   indent + "    "
	#print indent + "*" + minKey + ":" + "    ("+count+")"

	for option in options:
		newInd =   getNewIndex(minMember,index, option)
		content =   indent + minKey + ": " + option
		if (len(getOptions(getVector(map,target,newInd))) ==     1):
			content =   content + ":  " + str(getOptions(getVector(map,target,newInd))[0])
		for i in range( 0, 30-len(content) ):
			content =   content + " "
		content =   content + "("+str(len(newInd))+"/"+count+")"
		print content
		highestE( map, newKeys, newInd, step+1 )



#------------------------Variables---------------------------

fileName =   "MakeATree"
target =   "target"	#used as key in the map
map =   fileToDictionary( fileName )

#--------------------------Main------------------------------

print "\n------------Entropy Table------------"
findHighestEntropy( map )












