'''
Created on Mar 21, 2016

@author: Rishabh
'''

import math
import numpy as np
import pandas as pd
from pandas.core.frame import DataFrame
import os
from os import path

# function to calculate distances between two lat long cordinates
def calculateDistance(lat1,lon1,lat2,lon2):
    try:
        R = 6371; ## km
        dLat = math.radians((lat2-lat1));
        dLon = math.radians((lon2-lon1));
        lat1 = math.radians(lat1);
        lat2 = math.radians((lat2));
        #lon1 = math.radians(lon1);
        #lon2 = math.radians((lon2));
        
        a = math.sin(dLat/2) * math.sin(dLat/2) + math.sin(dLon/2) * math.sin(dLon/2) * math.cos(lat1) * math.cos(lat2); 
        c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a)); 
        d = R * c;
        return d
    except ValueError:
        print lat1,lon1,lat2,lon2
        print math.sin(dLat/2) 
        print math.sin(dLat/2)
        print math.sin(dLon/2) 
        print math.sin(dLon/2) 
        print math.cos(lat1) 
        print math.cos(lat2);
    

# fucntion to generate average max and minumum from income data per file
def getMaxMinAvgIncomeData(filename,outfile):

    print filename,outfile
    """    
    incomeData=np.genfromtxt(filename,dtype=None,skip_header=1,delimiter="\t")
    print incomeData[1]
    print incomeData.shape
    """
    dframe = pd.read_csv(filename, delimiter='\t')
    #dframe=pd.DataFrame(incomeData,index=['a', 'b', 'c', 'd', 'e','f','g','h','i','j','k','l','m','n','o','p','q','r'])
    print "columns:",dframe.columns
    
    grouped = dframe.groupby(['STPUMA'])
    
    """
    countList=grouped['PINCP'].count()
    maxList=grouped['PINCP'].max()
    minList=grouped['PINCP'].min()
    meanList=grouped['PINCP'].mean()
    #print grouped['PINCP'].groups.keys()
    #print grouped['PUMA']
    """
    pumaframe=DataFrame()
    pumaframe = DataFrame(columns=('name', 'max', 'min','mean','count','normalizedMean','unadjustedNormalizedMean'))
    i=0;
    
    for name,group in grouped['PINCP']:
        
        minVal=0
        if(group.min(skipna=True)>0):
            minVal=group.min(skipna=True)
        maxVa=group.max(skipna=True)
        meanVal=group.mean()
        pumaframe.loc[i] = [name,maxVa,group.min(skipna=True),meanVal,group.count(),(meanVal-minVal)/(maxVa-minVal),(meanVal-group.min(skipna=True))/(maxVa-group.min(skipna=True))]
        i=i+1
    """
    for j in range (0,i):
        print pumaframe.as_matrix()[j] 
    """
    #TOREMOVE#
    pumaframe.to_csv(outfile, sep=',',  header=('name', 'max', 'min','mean','count','normalizedMean','unadjustedNormalizedMean'),index=False)
    #matrix=np.column_stack((maxList,minList,countList,meanList))
    #print matrix
    

#Function to generate a mapping between pumas and malls
def mapPumaToMalls(mallDataFile, pumaGeographicalFile, stateMappingFile, outfile):
    stateAbbMapping = np.genfromtxt(stateMappingFile, dtype=None, autostrip=True, delimiter=",")
    print stateAbbMapping[0][1]
    mallData = np.genfromtxt(mallDataFile, dtype=None, names=True, delimiter='\t')
    print mallData[101]
    print mallData[102]
    pumaData = np.genfromtxt(pumaGeographicalFile, dtype=None, names=True, usecols=(0, 1, 5, 6, 9, 10), delimiter="\t")
    stateMap = dict()
    statePumaMap = dict()
    result = []
    print pumaData[0]
    for i in range(0, stateAbbMapping.shape[0]):
        stateMap[stateAbbMapping[i][0]] = stateAbbMapping[i][1]
    
    print stateMap
    for i in range(0, pumaData.shape[0]):
        #print pumaData[i][0]
        #print statePumaMap
        if (statePumaMap.has_key(pumaData[i][0])):
            #print "hasKey"
            #print statePumaMap[pumaData[i][0]]
            currentListOfIndex = []
            currentListOfIndex = statePumaMap[pumaData[i][0]]
            currentListOfIndex.append(i)
            statePumaMap[pumaData[i][0]] = currentListOfIndex
        else:
            statePumaMap[pumaData[i][0]] = []
            statePumaMap[pumaData[i][0]] = [i] #print "no key"
    
    print statePumaMap
    print len(pumaData[0])
    for i in range(0, mallData.shape[0]):
        mindist = -1
        minDIndex = -1
        stateLevelPuma = statePumaMap[stateMap[mallData[i][1]]]
        #print stateLevelPuma
        #print "for mall : ",mallData[i]
        #print "iterating",mallData[i][1]
        for j in range(0, len(stateLevelPuma)):
            #print pumaData[stateLevelPuma[j]][5]
            dist = calculateDistance(mallData[i][2], mallData[i][3], pumaData[stateLevelPuma[j]][4], pumaData[stateLevelPuma[j]][5])
        #    print dist, pumaData[stateLevelPuma[j]]
            if ((mindist == -1) | (mindist > dist)):
        #       print "replacing",mindist
                mindist = dist
                minDIndex = stateLevelPuma[j]
        
        #print "selecting ",minDIndex,mindist
        #print pumaData[minDIndex],mallData[i]
        newElement = []
        newElement = list(mallData[i])
        newElement.append(pumaData[minDIndex][1])
        result.append(newElement)
    
        #print i,len(result)
    print result
    """

"""
    with open(outfile, 'w') as f:
        for s in result:
            print str(s)
            f.write(str(s).strip('[]'))
            f.write('\n')

def main():
    dir_path='C:/Users/Rishabh/Desktop/capstone project/dataStuff/latlongdata/pumaPopHousSampleData/pumaSelectedFiles/'
    out_dir_path='C:/Users/Rishabh/Desktop/capstone project/dataStuff/latlongdata/pumaPopHousSampleData/pumaIncomeMeanData/'
    mallDataFile="C:/Users/Rishabh/Desktop/capstone project/dataStuff/latlongdata/mallsDataSelected.txt"
    pumaGeographicalFile="C:/Users/Rishabh/Desktop/capstone project/dataStuff/latlongdata/2010_Gaz_PUMAs_national.txt"
    stateMappingFile="C:/Users/Rishabh/Desktop/capstone project/dataStuff/latlongdata/stateAbbMapping.csv"
    outfile="C:/Users/Rishabh/Desktop/capstone project/dataStuff/latlongdata/mallPumaMapping.csv"
    incomeDataFile="C:/Users/Rishabh/Desktop/capstone project/dataStuff/latlongdata/pumaPopHousSampleData/3selectedss10pnj.csv"
    outfileForAggPuma='C:/Users/Rishabh/Desktop/capstone project/dataStuff/latlongdata/pumaPopHousSampleData/pumaIncomeMeanData/pnj_normalizedMeanIncome.csv'
    #stateAbbMapping=np.loadtxt(stateMappingFile,delimiter=',',dtype={'names': ('state', 'abb'),'formats': ('S4', 'S2')})
    wantMapping=False
    wantIncomeAggregation=True
    files = [x for x in os.listdir(dir_path) if path.isfile(dir_path+os.sep+x)]
    print files
    if(wantMapping):
        mapPumaToMalls(mallDataFile, pumaGeographicalFile, stateMappingFile, outfile)
    elif(wantIncomeAggregation):
        print "gettingMaxMin"
        for f in files:
            print f
            print (dir_path+os.sep+f,out_dir_path+os.sep+f)
            getMaxMinAvgIncomeData(dir_path+os.sep+f,out_dir_path+os.sep+f);
        """
        #testing 
        print "testing getminmax"
        getMaxMinAvgIncomeData(dir_path+os.sep+files[0],out_dir_path+os.sep+files[0]);
        """
    print "done"          
    

    
if __name__ == "__main__":
    main()
    
