/*
GraphBlast Algorithm
graphbuild.h
2006(c) Modified by Diego Reforgiato Recupero
2007(c) Modified by Dmitry Skripin
*/

#include "algebra.h"
#include "graph.h"

float time_to_remove = 0;

long int mem=0;  // count the memory in main memory of paths information
Db* _dbHASH;

time_t beg,end;

////////////////////////////////////////////
//
// Class: vDFS_GraphBuild
// Usage: GraphBuild main class. Doing DFS traversal and store all patterns found.
//
/////////////////////////////////////////////

// hash table for graphgrep filtering
class vet_graph 
{
private:
	string pat;
	int *num_graph;
	int *num_pattern;
	int cont_graphs;
	int collision;
	vet_graph *next;

public:
	vet_graph() 
	{                  // Constructor
		num_graph = NULL;
		num_pattern = NULL;
		collision = 0;
		pat = "";
		cont_graphs = 0;
		next = NULL;
	}

	void Free1() 
	{         // Free Memory
		if(this->num_graph!=NULL)
			free(this->num_graph);
		if(this->num_pattern!=NULL)
			free(this->num_pattern);
		this->num_graph = NULL;
		this->num_pattern = NULL;
		this->collision=0;
		this->pat="";
		this->cont_graphs=0;
		if(this->next!=NULL) {
			Free_Ric(this->next);
			this->next=NULL;
		}
	}

	void Free_Ric(vet_graph *a) 
	{  // Free Memory
		if(a->num_graph!=NULL)
			free(a->num_graph);
		if(a->num_pattern!=NULL)
			free(a->num_pattern);
		a->num_graph = NULL;
		a->num_pattern = NULL;
		a->collision=0;
		a->pat="";
		a->cont_graphs=0;
		if(a->next!=NULL)
			Free_Ric(a->next);
		free(a);
	}

	void Free() 
	{   // Free Memory
		int y=0;
		vet_graph *tmp,*tmp1;
		tmp=this;
		do {
			if(tmp->num_graph!=NULL)
				free(tmp->num_graph);
			if(tmp->num_pattern!=NULL)
				free(tmp->num_pattern);
			tmp->num_graph = NULL;
			tmp->num_pattern = NULL;
			tmp->collision=0;
			tmp->pat="";
			tmp->cont_graphs=0;
			tmp1=tmp;
			tmp=tmp->next;
			tmp1->next=NULL;
			if(y==1)
				if(tmp1!=NULL) 
				{
					free(tmp1);
					tmp1=NULL;
				}
				y=1;
		}
		while(tmp!=NULL);
	}

	void Merge(int *a,int *b,int *temp1,int *temp2,int first,int last) { // Merge procedure of Mergesort
		int mid,i1,i2,i3;
		mid = (first + last)/2;
		i1 = i2 = first;
		i3 = mid + 1;
		while(i2<=mid && i3<=last)
			if(a[i2] < a[i3]) 
			{
				temp1[i1] = a[i2];
				temp2[i1++] = b[i2++];
			}
			else 
			{
				temp1[i1] = a[i3];
				temp2[i1++] = b[i3++];
			}
			while(i2<=mid) 
			{
				temp1[i1] = a[i2];
				temp2[i1++] = b[i2++];
			}
			while(i3<=last) 
			{
				temp1[i1] = a[i3];
				temp2[i1++] = b[i3++];
			}
			i1=first;
			for(i2=first;i2<=last;i2++) 
			{
				a[i2]=temp1[i1];
				b[i2]=temp2[i1++];
			}
	}

	void Sort(int *a,int *b,int *temp1,int *temp2,int first,int last) 
	{  // Mergesort to sort temp1 and temp2
		int mid;
		if(first<last) 
		{
			mid = (first + last)/2;
			Sort(a,b,temp1,temp2,first,mid);
			Sort(a,b,temp1,temp2,mid+1,last);
			Merge(a,b,temp1,temp2,first,last);
		}
	}

	// insert procedure in hash table
	void Insert(string s,int gn,int npt,int flag) 
	{  // Stores s,gn,npt
		if(pat=="") { // First entry. Empty
			pat = s;
			num_graph = (int *)realloc(num_graph,sizeof(int)*(cont_graphs+1));
			if(flag==0) {
				mem += (long int)sizeof(int)*(cont_graphs+1);
				mem += (long int)sizeof(int)*(cont_graphs+1);
			}
			num_graph[cont_graphs] = gn;
			num_pattern = (int *)realloc(num_pattern,sizeof(int)*(cont_graphs+1));
			num_pattern[cont_graphs++] = npt;
		}
		else 
		{ // busy entry.
			if(pat == s) 
			{ // no collision. If gn exists then increment num_pattern
				for(int i=0;i<cont_graphs;i++) 
				{
					if(num_graph[i] == gn) 
					{
						(num_pattern[i])+=npt;
						return;
					}
				} // else insert a new graph gn for the pattern
				num_graph = (int *)realloc(num_graph,sizeof(int)*(cont_graphs+1));
				if(flag==0)
					mem += (long int)sizeof(int)*(cont_graphs+1);
				num_graph[cont_graphs] = gn;
				num_pattern = (int *)realloc(num_pattern,sizeof(int)*(cont_graphs+1));
				if(flag==0)
					mem += (long int)sizeof(int)*(cont_graphs+1);
				num_pattern[cont_graphs++] = npt;
				int temp1[cont_graphs];
				int temp2[cont_graphs];
				Sort(num_graph,num_pattern,temp1,temp2,0,cont_graphs-1);
				for(int ij=0;ij<cont_graphs;ij++) 
				{
					num_graph[ij]=temp1[ij];
					num_pattern[ij]=temp2[ij];
				}
			}
			else 
			{ // collision.
				// manage collision by using a linked list
				vet_graph *link,*link1;
				link = this->next;
				if(this->next == NULL) { // If it's the first collision
					collision++;
					vet_graph *tmp = new vet_graph();
					if(flag==0)
						mem += (long int)sizeof(vet_graph);
					tmp->pat = s;
					tmp->num_graph = (int *)malloc(sizeof(int)*(tmp->cont_graphs+1));
					if(flag==0)
						mem += (long int)sizeof(int)*(tmp->cont_graphs+1);
					tmp->num_graph[tmp->cont_graphs] = gn;
					tmp->num_pattern = (int *)malloc(sizeof(int)*(tmp->cont_graphs+1));
					if(flag==0)
						mem += (long int)sizeof(int)*(tmp->cont_graphs+1);
					tmp->num_pattern[tmp->cont_graphs++] = npt;
					this->next = tmp;
					return;
				}
				while(link!=NULL) 
				{ // If already exists a pattern pat and the graph gn then increment
					if(link->pat == s) 
					{
						for(int i=0;i<link->cont_graphs;i++) 
						{
							if(link->num_graph[i] == gn) 
							{
								(link->num_pattern[i])+=npt;
								return;
							}
						} // The graph gn doesn't exist. So create e new graph entry
						link->num_graph = (int *)realloc(link->num_graph,sizeof(int)*(link->cont_graphs+1));
						if(flag==0)
							mem += (long int)sizeof(int)*(link->cont_graphs+1);
						link->num_graph[link->cont_graphs] = gn;
						link->num_pattern = (int *)realloc(link->num_pattern,sizeof(int)*(link->cont_graphs+1));
						if(flag==0)
							mem += (long int)sizeof(int)*(link->cont_graphs+1);
						link->num_pattern[link->cont_graphs++] = npt;
						int temp1[link->cont_graphs];
						int temp2[link->cont_graphs];
						Sort(link->num_graph,link->num_pattern,temp1,temp2,0,link->cont_graphs-1);
						for(int ij=0;ij<link->cont_graphs;ij++) 
						{
							link->num_graph[ij]=temp1[ij];
							link->num_pattern[ij]=temp2[ij];
						}
						return;
					}
					link1 = link;
					link = link->next;
				}
				// if doesn't exist the pattern pat
				collision++;
				vet_graph *tmp = new vet_graph();
				if(flag==0)
					mem += (long int)sizeof(vet_graph);
				tmp->pat = s;
				tmp->num_graph = (int *)malloc(sizeof(int)*(tmp->cont_graphs+1));
				if(flag==0)
					mem += (long int)sizeof(int)*(tmp->cont_graphs+1);
				tmp->num_graph[tmp->cont_graphs] = gn;
				tmp->num_pattern = (int *)malloc(sizeof(int)*(tmp->cont_graphs+1));
				if(flag==0)
					mem += (long int)sizeof(int)*(tmp->cont_graphs+1);
				tmp->num_pattern[tmp->cont_graphs++] = npt;
				link1->next = tmp;
			}
		}
	}

	int get_cont_graph() 
	{
		return cont_graphs;
	}

	int * Compose(int j,int &siz) 
	{ // Called when storing information into the DB. Called in two ways:
		// 1) when mem > MEM_THRESHOLD
		// 2) after reading the last graph in graphbuild.cpp in readNextGraph
		Dbt key,data;
		int *str = NULL,ret;
		char d[10];

		strcpy(d,"");
		sprintf(d,"%d",j);
		int size_str = 0;
		key.set_data((char *)d);
		key.set_size(strlen(d)+1);
		ret = _dbHASH->get(0,&key, &data, 0);

		if(ret==DB_NOTFOUND) 
		{ // not found the record with key 'j'
			str = (int *)malloc(sizeof(int)*2);
			size_str = 2;
			str[0] = collision;
			str[1] = pat.size();
			int i=0;
			while((pat.c_str())[i]!='\0') 
			{
				str = (int *)realloc(str,sizeof(int)*(size_str+1));
				str[size_str] = ((pat.c_str())[i++]);
				size_str++;
			}
			str = (int *)realloc(str,sizeof(int)*(size_str+1));
			str[size_str] = cont_graphs;
			size_str++;
			for(i=0;i<cont_graphs;i++) 
			{
				str = (int *)realloc(str,sizeof(int)*(size_str+2));
				str[size_str] = num_graph[i];
				size_str++;
				str[size_str] = num_pattern[i];
				size_str++;
			}

			vet_graph *link;
			link = next;

			while(link!=NULL) 
			{
				str = (int *)realloc(str,sizeof(int)*(size_str+1));
				str[size_str] = link->pat.size();
				size_str++;
				i = 0;
				while((link->pat.c_str())[i]!='\0') 
				{
					str = (int *)realloc(str,sizeof(int)*(size_str+1));
					str[size_str] = ((link->pat.c_str())[i++]);
					size_str++;
				}
				str = (int *)realloc(str,sizeof(int)*(size_str+1));
				str[size_str] = link->cont_graphs;
				size_str++;
				for(i=0;i<link->cont_graphs;i++) 
				{
					str = (int *)realloc(str,sizeof(int)*(size_str+2));
					str[size_str] = link->num_graph[i];
					size_str++;
					str[size_str] = link->num_pattern[i];
					size_str++;
				}
				link = link->next;
			}
			siz = size_str;
			return str;
		}
		else 
		{ // record with key 'd' already exists and it is stored in data
			int col,i,j1=0,k,cont,gg,ngg;
			char *pt=NULL;
			int *punt=(int *)data.get_data();
			col = punt[0];
			for(i=0;i<=col;i++) 
			{
				pt = (char *)malloc(sizeof(char)*(punt[j1+1]+1));
				for(k=0;k<punt[j1+1];k++) 
				{
					pt[k]=punt[j1+2+k];
				}
				pt[k]='\0';
				j1+=k;
				cont=punt[j1+2]; // cont graphs
				for(k=0;k<2*cont;k+=2) 
				{
					gg = punt[j1+2+1+k];
					ngg = punt[j1+2+2+k];
					Insert(pt,gg,ngg,1);
				}
				free(pt);
				j1 = k+2+j1;
			}
			ret = _dbHASH->del(0,&key,0);
			if(ret == DB_NOTFOUND) 
			{
				cout << "Error!!!";
				exit(1);
			}
			return Compose(j,siz);
		}
	}
};


vet_graph *vet_map=new vet_graph[HASHP]; // HASHP objects of vet_graph

////////////////////////////////////////////
//
// Class: vDFS_GraphBuild
// Usage: GraphBuild main class. Doing DFS traversal and store all patterns found.
//
/////////////////////////////////////////////

template <class GraphADT> class vDFS_GraphBuild 
{
	GraphADT &G;			//The Graph reference need to be visited
	DEGREE<GraphADT> degree;	//Degree information of this graph

	//data used in DFS travseral
	int cnt,LP,curLP;
	int pathcount,backEdge;

	NodeType* pathstack;		//Stack contains nodes of the current path
	//NodeType* NodeDegree;		//Stack contains degree of nodes in the current path
	string strNodeLabel;		//String contains lable of nodes in the current path
	string strNodeDegree;		//String contains degree of nodes in the current path

	fingerprint fp;				//fingerprint of this graph

	int DegreeCount;			//tmp variable for degree count
	int tablecount;				//tmp variable for table count

	//Checking if this edge already visited in the current path
	bool checkEdgeVisited(Edge e)
	{
		if(curLP>3)
		{  //no possible looping(1-2-3-1) for nodes less than 4(lp<=3), because already check parent-edge before
			for(int i=1;i<curLP;i++)
			{	//checking all nodes in the path-stack
				NodeType n1,n2;

				n1=pathstack[i-1];
				n2=pathstack[i];

				if(e.v==n1 && e.w==n2)
					return true;
				else if(e.w==n1 && e.v==n2)
					return true;
			}
		}
		return false;
	}


	//Data-structure store all the Label-Path(pattern) data
	Map_PathTableWithDegree* myDegreePathMap;
	Map_PathTable* myPathMap;
	//Map_PathTable* EdgeMap;

	//output a path of given LengthPath
	void outputPath(int lp) 
	{
		pathcount++;

		string tablename(G.getName());

		beg = clock();
		if(FILTERING2==true) 
		{ // GRAPHGREP FILTERING
			// hash-table init computation
			int h=0;
			int c_st = 0;
			while((strNodeLabel.c_str())[c_st]!='\0') 
			{
				h = (64*h + (strNodeLabel.c_str())[c_st++])%HASHP;
			}

			int gn=0;
			int dec=1;
			for(int ign=tablename.size()-1;ign>1;ign--) 
			{
				gn += ((tablename.c_str())[ign]-48)*dec;
				dec *= 10;
			}
			vet_map[h].Insert(strNodeLabel,gn,1,0);
		}
		end = clock();
		time_to_remove += (float)(end-beg)/CLOCKS_PER_SEC;
		tablename+=strNodeLabel;

		if(FILTERING2 == false) // FROWNS FILTERING
			unsigned int FP=fp.addPattern(strNodeLabel);

		beg = clock();

		Map_PathTable::iterator it=(*myPathMap).find(tablename);
		if( it!=(*myPathMap).end())
		{
			(*it).second->insertData(pathstack);
		}else{
			PathTable *myPathTab=new PathTable(tablename,lp);
			myPathTab->insertData(pathstack);
			(*myPathMap).insert(Map_PathTable::value_type(tablename,myPathTab));
			tablecount++;
		}


		end = clock();
		time_to_remove += (float)(end-beg)/CLOCKS_PER_SEC;
	}

	//show the element of input vector
	void show(const vector<int>& out)
	{
		cout<<"#path:";

		for(int j=0;j<out.size();j++)
		{
			cout<<out[j]<<" ";
		}
		cout<<endl;	
	};

	//DFS traversal(right side)
	void dfsR(Edge e);

public:
	//Constructor
	// Params: 1. GraphADT Object 
	//         2. Lengthpath 
	vDFS_GraphBuild(GraphADT&,int LP);

	//Destructor
	~vDFS_GraphBuild();

	//Get the PathTable(with degree) of this graph
	Map_PathTableWithDegree* getMapDegreePathTable(){return myDegreePathMap;};
	//Get the PathTable of this graph
	Map_PathTable* getMapPathTable(){return myPathMap;};
	//Map_PathTable* getEdgePathTable(){return EdgeMap;};
	//Get the fingerprint of this graph
	unsigned int* getFingerPrint(){return fp.getFP();};
};

/////////////////////////////////////////////////////////////
//	
//	Implementation detail of vDFS_GraphBuild templete class. 
//	Put here because of templete limitation
//
/////////////////////////////////////////////////////////////

//DFS travesal of this graph, starting from a given edge
template <class GraphADT>
void vDFS_GraphBuild<GraphADT>::dfsR(Edge e)
{
	int w=e.w;

	//Node position information
	pathstack[curLP]=w;

	//Degree information
	char num[5];			
	//NodeDegree[curLP]=degree[w];
	sprintf(num,"%d",degree[w]);

	int oldDegreeLen=strNodeDegree.size();
	strNodeDegree +=" ";
	strNodeDegree +=num;

	//degree count
	DegreeCount+=degree[w];		

	//Label information
	int oldLabelLen=strNodeLabel.size();
	strNodeLabel +=G.getLabel(w);

	typename GraphADT::adjIterator A(G,w);

	int ichild=0;
	bool isOverLP=false;
	bool isEdgeExist=false;

	for(int t=A.beg();!A.end();t=A.nxt()) 
	{

		if(t!=e.v)
		{

			Edge x(w,t);

			ichild++;
			curLP++;

			if(curLP>=(LP+1))
			{		//if over the LP limitation, output it
				outputPath(curLP);
				isOverLP=true;
				isEdgeExist=false;

				curLP--;

				break;	
			}
			else if(checkEdgeVisited(x))
			{	 //this edge already exist in the current path, go another sibling
				isEdgeExist=true;

				curLP--;
			}
			else
			{
				dfsR(x);		//no matter what ... keep going down
			}
		}
	}

	//Check if need to output path
	if(ichild==0 && curLP>=1)
	{	//leaf node
		outputPath(curLP+1);
	}
	else
	{
		if(isEdgeExist &&!isOverLP)
		{	//looping, outputing it and setting backEdge count
			outputPath(curLP+1);
			backEdge++;
		}
		else if(ichild>0 && !isOverLP)
		{
			outputPath(curLP+1);
		}
	}

	curLP--;
	DegreeCount -=degree[w];

	strNodeLabel = strNodeLabel.substr(0,oldLabelLen);
	strNodeDegree = strNodeDegree.substr(0,oldDegreeLen);
}

//Constructor
// Params: 1. GraphADT Object
//         2. Lengthpath 
template <class GraphADT>
vDFS_GraphBuild<GraphADT>::vDFS_GraphBuild(GraphADT &G,int lp):G(G),degree(G),curLP(0),LP(lp),cnt(0),pathcount(0),backEdge(0),tablecount(0),DegreeCount(0),fp(NUM_INTS) 
{	

	//allocate mem data member
	pathstack=new NodeType[LP+1];
	//NodeDegree=new NodeType[LP+1];

	//create pathtable
	myPathMap=new Map_PathTable();

	//call DFS traversal, start from node 0
	for(int v=0;v<G.V();v++)
	{
		if(degree[v]>0)
		{
			dfsR(Edge(v,v));
		}
		else if(degree[v]==0)
		{	//output the single node
			strNodeLabel +=G.getLabel(v);
			pathstack[0]=v;	
			outputPath(1);

			strNodeLabel=string("");
		}	
		curLP=0;
	}

	//screen out
	if(!QUIET_MODE)
	{
		cout<<"total path:"<<pathcount<<"\n";
		cout<<"total backEdge:"<<backEdge<<"\n";
		cout<<"total table:"<<tablecount<<"\n";
	}	
}

//Destructor
template <class GraphADT>
vDFS_GraphBuild<GraphADT>::~vDFS_GraphBuild()
{
	delete pathstack;


	Map_PathTable::iterator it=myPathMap->begin(),
		it_end=myPathMap->end();

	for(it;it!=it_end;it++)
	{
		delete (*it).second;
	}

	delete myPathMap;		

}

