From dlc333@nyu.edu  Tue Dec 15 13:43:20 2009
Received: from mx.cims.nyu.edu (mx.cims.nyu.edu [128.122.80.107])
	by mail.cims.nyu.edu (8.13.8+Sun/8.13.8) with ESMTP id nBFIhKUI022270
	for <shasha@mail.cims.nyu.edu>; Tue, 15 Dec 2009 13:43:20 -0500 (EST)
Received: from mx3.nyu.edu (MX3.NYU.EDU [128.122.118.243])
	by mx.cims.nyu.edu (8.13.8+Sun/8.13.8) with ESMTP id nBFIhGrx002480
	for <shasha@courant.nyu.edu>; Tue, 15 Dec 2009 13:43:16 -0500 (EST)
Received: from mx3.nyu.edu (localhost [127.0.0.1])
	by mx3.nyu.edu (8.13.8/8.13.8) with ESMTP id nBFIhBZ4008886
	for <shasha@courant.nyu.edu>; Tue, 15 Dec 2009 13:43:11 -0500 (EST)
Received: from 172-26-36-98.DYNAPOOL.NYU.EDU (172-26-36-98.DYNAPOOL.NYU.EDU [172.26.36.98])
	(authenticated bits=0)
	by mx3.nyu.edu (8.13.8/8.13.8) with ESMTP id nBFIhAl8008873
	(version=TLSv1/SSLv3 cipher=AES128-SHA bits=128 verify=NOT);
	Tue, 15 Dec 2009 13:43:11 -0500 (EST)
Message-Id: <276CD643-26F0-4D6C-8834-138C14386EA1@nyu.edu>
From: Dana Cohen <dlc333@nyu.edu>
To: Dennis Shasha <shasha@courant.nyu.edu>
Content-Type: multipart/mixed; boundary=Apple-Mail-15--872449738
Mime-Version: 1.0 (Apple Message framework v936)
Subject: dana & michael tree2
Date: Tue, 15 Dec 2009 13:43:11 -0500
Cc: mjk474@nyu.edu
X-Mailer: Apple Mail (2.936)
X-Scanned-By: MIMEDefang 2.58 on 128.122.80.107
X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-3.0 (mx.cims.nyu.edu [128.122.80.107]); Tue, 15 Dec 2009 13:43:20 -0500 (EST)
Status: R
Content-Length: 4925


--Apple-Mail-15--872449738
Content-Type: text/plain;
	charset=US-ASCII;
	format=flowed;
	delsp=yes
Content-Transfer-Encoding: 7bit

Dear Professor,
	We were able to present the solution in the way you wanted us to  
present it (it is attached). We hope that since we have not been as  
exposed to programming as some other students, we are not penalized  
for the lack of previous knowledge in the subject matter. We were both  
unaware of the intensive computer programming we were going to do, and  
believe that if we done this before we would not have needed to get so  
much help from other students. We believe that since we understand the  
subject and the programming functions, though we do not know how to  
type it, we should not be penalized harshly - python is like learning  
a completely new language and we do not believe that in the time we  
had this entire semester we would be able to make such an advanced  
program on our own.
Thanks for a great semester,
Dana Cohen and Michael Kasdan




--Apple-Mail-15--872449738
Content-Disposition: attachment;
	filename=danamichaeltree2.py
Content-Type: text/x-python-script;
	x-unix-mode=0644;
	name="danamichaeltree2.py"
Content-Transfer-Encoding: 7bit

#!/usr/bin/env python
#
# This program computes entropy in a vector 


import math


# computes the entropy of a single vector
def entropy(vec):
	answercount = {} # empty dictionary
	totlen = len(vec)
	for x in vec:
	   if x in answercount:
		answercount[x] += 1.0
	   else: 
		answercount[x] = 1.0
	ent = 0
	for x in answercount:
	   prob = (answercount[x])/totlen
	   ent += - prob * math.log(prob,2)
	return ent

# computes the entropy of vec2 depending on vec1
def condentropy(vec1, vec2):
	if len(vec1) != len(vec2): return -1 # error
	totlen = len(vec1)
	answervec = {}
	i = 0
	while i < totlen:
	   x = vec1[i]
	   if x in answervec:
		answervec[x].append(vec2[i])
	   else:
		answervec[x] = []
		answervec[x].append(vec2[i])
	   i+= 1
	# print "Debugging answervec: ", answervec
	condent = 0
	for x in answervec:
		weight = (0.0+ len(answervec[x])) / totlen 
			# cond entropy weight of x
		# print ("Debugging letter: "), x, (" has weight: "), weight
		condent += weight * entropy(answervec[x])
	return condent



def fileToDictionary( fileName ):
	file = open( fileName, "r" )
	text = file.readlines()
	file.close()
	
	for i in range(0, len(text)):
		if(text[i][len(text[i])-1] == '\n'):
			text[i] = text[i][:len(text[i])-1]
	
	keys = text[0].split(" ")
	text.remove(text[0])
	for i in range(0, len(text)):
		text[i] = text[i].split("  ")	
	
	parsed = {}
	
	for i in range(0, len(keys)):
		parsed[keys[i]] = []
		for line in text:
			parsed[keys[i]].append(line[i])
	
	return parsed


fileName = "maketree"
target = "target"	
parsed = fileToDictionary( fileName )

#returns an array of values in parsed[key] corresponding to
#the values in index
def getVector( parsed, key, index ):
	a = []
	for i in index:
		a.append(parsed[key][i])
	return a


def getOptions( vector ):
	opt = []
	for i in range(0, len(vector)):
		if not vector[i] in opt:
			opt.append( vector[i] )
	return opt


def getNewIndex( array, index, option ):
	newInd = []
	for i in index:
		if array[i] == option:
			newInd.append(i)
	return newInd

#driver function
def findSolution( parsed ):
	index = []	#indexes of all of the members
	for i in range(0, len(parsed[parsed.keys()[0]])):
		index.append(i)
	keys = parsed.keys()
	highestE( parsed, keys, index, 0 )



#recursive helper function

def highestE( parsed, keys, index, step ):
	if( len(keys) == 1 ):					#base case 1
		return
	if( entropy(getVector(parsed,target,index)) == 0 ):	#base case 2
		return

	minKey = keys[0]
	if(minKey == target):
		minKey = keys[1]
	minEnt = condentropy(getVector(parsed,minKey,index), getVector(parsed,target,index))

	for key in keys:
		if ( key != target ):
			ent = condentropy(getVector(parsed,key,index), getVector(parsed,target,index))
			if ( ent < minEnt ):
				minEnt = ent
				minKey = key
	minMember = parsed[minKey]
	minVector = getVector(parsed,minKey,index)
	options = getOptions(minVector)
	newKeys = []
	for k in keys:
		if not k == minKey:
			newKeys.append(k)
	
	count = str(len(index))
	indent = ""
	for i in range(0, step):
		indent = indent + "    "
	#print indent + "*" + minKey + ":" + "    ("+count+")"
	
	for option in options:
		newInd = getNewIndex(minMember,index, option)
		content = indent + minKey + ": " + option
		
		for i in range( 0, 30-len(content) ):
			content = content + " "
		print "Is " + minKey + "=" + option + "?"
                if (len(getOptions(getVector(parsed,target,newInd))) == 1):
                        content = str(getOptions(getVector(parsed,target,newInd))[0]) + " happens " + "("+str(len(newInd))+")" + " times."
		
		print content
		
		
                highestE( parsed, newKeys, newInd, step+1 )





print "Solution: "
findSolution( parsed )

--Apple-Mail-15--872449738--

