#!/usr/bin/env python
#
# This program computes entropy in a vector
import math

# computes the entropy of a single vector
def entropy(vec):
    answercount = {} # empty dictionary
    totlen = len(vec)
    for x in vec:
        if x in answercount:
            answercount[x] += 1.0
        else:
            answercount[x] = 1.0
    ent = 0
    for x in answercount:
        prob = (answercount[x])/totlen
        ent += - prob * math.log(prob,2)
    return ent

# computes the entropy of vec2 depending on vec1
def condentropy(vec1, vec2):
    if len(vec1) != len(vec2): return -1 # error
    totlen = len(vec1)
    answervec = {}
    i = 0
    while i < totlen:
        x = vec1[i]
        if x in answervec:
            answervec[x].append(vec2[i])
        else:
            answervec[x] = []
            answervec[x].append(vec2[i])
        i+= 1
    # print "Debugging answervec: ", answervec
    condent = 0
    for x in answervec:
        weight = (0.0+ len(answervec[x])) / totlen
                # cond entropy weight of x
        # print ("Debugging letter: "), x, (" has weight: "), weight
        condent += weight * entropy(answervec[x])
    return condent



# DATA

likesgladiator = ["yes", "no", "yes", "no", "no", "yes", "no", "yes"]

majors = ["math", "history", "cs", "math", "math", "cs", "history", "math"]


# EXECUTION


res = entropy(majors)
print("The entropy of the majors vector is: ", res)

res = entropy(likesgladiator)
print("The entropy of the likesgladiator vector is: ", res)

res = condentropy(majors, likesgladiator)
print("conditional entropy is: ", res)

print("information gain = ", entropy(likesgladiator) - condentropy(majors,likesgladiator))
