/* M E D I C A L  L A N G U A G E  P R O C E S S I N G, LLC
   (c) 2005 All rights reserved.
   Read Terms of Use at http://mlp-xml.sourceforge.net.
   Contact medical_language_processing@gmail.com
*/
#define LPAREN 0
#define RPAREN 1
#define LBRACK 2
#define RBRACK 3
#define PERIOD 4
#define EOFMK 5
#define QOTMRK 6
#define QUOTE 7

#include <iostream.h>
#include <fstream.h>
#include <stdlib.h>
#include <string.h>
#include "lispdefs.fcm"
#include "symtab.h"
#include "nodefs.fcm"
#include "common.fcm"

#define FALSE 0
#define TRUE 1

extern char * nodnam(int); //debug
extern int gcons(int d,int s,int a);
extern int symbol(char*);
extern int litral(char*);

fstream *treefile;
fstream *getstptr(int);
#define STCKSIZE 200
static int stck[STCKSIZE],sptr; //make this a NEW

inline void push(int x){stck[sptr++]=x;return;}

inline int pop(void){return stck[--sptr];}

static int value, token, last;
static char getAchr(){
	char c;
	treefile->unsetf(ios::skipws);
	*treefile>>c;
	if(!(*treefile)) return '\0';
	if(c=='\n')c=' ';
	return c;
	}

static char chr;
extern int tokenProc(void);
extern int rdtok(void);
extern int rdlist(void);
extern int rdparscont(int);
extern int listToTree(int, int);

// * This code is executed when a new symbol table is read in and
static int symn[8];
void rdlistinit(void){
const static char* symlst[]={"(",")","[","]",".", "$EOF$", "'",  "QUOTE"};
	for(int i=0;i<8;i++) symn[i]=symbol((char *) symlst[i]);
	return;
}

static int symtype(char c) {
static char sytptab[]={1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,
1,1,1,1,1,1,0,1,1,1,1,1,1,1,};
if(c>'Z'){
if(c>'z') return 1;
if(c>='a')return 0;
return 1;
}
if(c>='A')return 0;
return sytptab[c-32];
}

//static streampos nextTreePosition=0;//where to read the next tree from
static long nextTreePosition=0;//where to read the next tree from
#define PREVTREESTARTSIZE 5
//static streampos prevTreeStart[PREVTREESTARTSIZE];
static long prevTreeStart[PREVTREESTARTSIZE];
static int numTreesInPrevArr=0;
static int indexPrevTreeRead=0;// 1= the first prev tree has been read
static char *wordTextP;

static char *sentidP;

// read in the next parse tree
int rdpars(){
/*
As it may be required to read in a previous parse tree it is necessary to
keep track of the position of the next tree to be read in as well as the
starting position of the the previous trees.

We keep track of the starting position of the tree in the file for
some of the previous file.
*/
	if(numTreesInPrevArr<PREVTREESTARTSIZE){
		prevTreeStart[numTreesInPrevArr++] =  nextTreePosition;
		}
	else {
		for(int i=0;i< 5-1; i++) 
			prevTreeStart[i] = prevTreeStart[i+1];
			prevTreeStart[5-1] =  nextTreePosition;
			}
	sentidP=sentid;
	indexPrevTreeRead = numTreesInPrevArr-2;
	int xrt= rdparscont(0);
	//return rdparscont(0);

	return xrt;
}

int rdparscont(int prevTrRead) {
	char linebuf[121];
	treefile=getstptr(trefli);
//skip over any blank lines at the beginning and end and between the trees
	while(1){
		if((chr = getAchr()) == '\0') return -1;// end-of file
		if(chr != ' ') break;
		}

// read SID and sentence if any if EOF encounted end of file of trees
	while(1){
		if(chr != '*')break;
		treefile->getline(linebuf,100);
		if(strncmp(linebuf,"SID=",4)==0){
			strcpy(sentidP,&linebuf[4]);
			} // if
		if((chr = getAchr()) == '\0') return -1;// end-of file
		} // while

	int parsno=1;
	int it;
	if((it=rdlist())>0){
		nextTreePosition = treefile->tellg();//get file pointer of next read
		if(listToTree(it, prevTrRead) == FALSE) return FALSE;
		wordstart=1; wordend=nwordSent;
		return 1;
		}
	else {
		if(it == -1)*coutP<< "*** Error in list expression"<<endl;
		if(it == -2)*coutP<< "*** Unmatched '"<<endl;
	*coutP<<"Errors in parse tree. Tree will be ignored"<<endl;
	return 0;
		}//else
}

char *prevSenWordTextP=0;

/*
To read a previous tree one must position the file pointer to the start of
the tree previous to the currently "previous" tree. One may not move back
further that the number of file pointers currently saved.
*/

int readPrevTree(){
	prevSenWordTextP = new char[400];
		if(indexPrevTreeRead < 0){
// too far back, cannot do the request
			return 0;
			}
//move to the start of a previous tree in file
		treefile->seekg(prevTreeStart[indexPrevTreeRead--]);
		int rdp=rdparscont(1);//read and build the previous tree
		return rdp;
}

/*
 ************************************************************************
     RDLIST reads the next list expression from the input file and
     returns in -S- a pointer to it.  The list expression can be
     either a single symbol or literal or a list of the form

           (CAR [CDR] CAR [CDR] ... CAR [CDR] . CSR)

     where CAR, CDR, and CSR are themselves list expressions.  If
     a CDR is NIL, it can be omitted, together with its surrounding
     brackets;  similarly, if the CSR is NULL, the CSR field and its
     preceding period can be omitted.
 ************************************************************************
*/

int rdlist(){

	last=0;
	sptr=0;
//                  initialization
	token = rdtok();
	value = tokenProc();
	//treefile->ignore(200,'\n');
	if(value>0) return value;
	return 0;
}

int tokenProc(){
	if(token == symn[LPAREN]) {

//                   recur to read in CAR

	push(last);
	push(0);
	token = rdtok();

	while(1){
		if(token == symn[EOFMK]) return -2;
		if(token == symn[RPAREN]) {
			value=pop();
			last=pop();
			return value;
			} // RPAREN
		value=tokenProc();
		if(value<0) return value;

//                   build list element

		int	p=gcons(0,0,value);
		int pp;
		if((pp=pop())==0) pp=p;
		else CDR(last)=p;
		push(pp);
		last=p;

//            if a CDR is present, recur to read it in

		token = rdtok();
		if(token == symn[LBRACK]) {
			token = rdtok();
			value=tokenProc();
			if(value<0) return value;
			token = rdtok();
			if(token != symn[RBRACK]) return -1;
			CSR(last)=value;
			token = rdtok();
			}//LBRACK

//                   if CSR is present, recur to read it in
		if(token == symn[RPAREN]) {
			value=pop();
			last=pop();
			return value;
			} //RPAREN

		if(token == symn[PERIOD]){
			token = rdtok();
			value = tokenProc();
			if(value<0) return value;
			CDR(last)=value;
			token = rdtok();
			if(token != symn[RPAREN]) return -1;
			value=pop();
			last=pop();
			return value;
			}//PERIOD
		}// while
	}
	if(token == symn[QOTMRK]) {

//       procsess quote mark
		token = rdtok();
		value = tokenProc();
		if(value<0) return value;
		value=gcons(gcons(0,0,value),0,symn[QUOTE]);
		return value;
		} // quote
	if(token == symn[RPAREN] || token == symn[PERIOD] ||
	token == symn[LBRACK] || token == symn[RBRACK]) return -1;
	value=token;
//cout<<"tokproc value "<<value<<" ig "<<ig<<endl;
	return token;
}

/*
************************************************************************
     RDTOK reads the next token from the input file and returns a pointer
     to the head for that token. RDTOK skips leading blanks and commas.
     RDTOK recognizes five classes of tokens:

        1. A special character is returned as a symbol whose name is
           that one character.
        2. A sequence of digits terminated by a special character is
           returned as a numeric symbol (not entered in symbol table)
        3. A 'quoted string' is returned as a literal.
           (Two consecutive ' in the string become a single ' in the
           literal)
        4. Any other sequence of characters terminated by a special
           character is returned as a symbol
        5. If an END-OF-FILE is encountered, the symbol $EOF$ is
           returned
************************************************************************
*/

int rdtok(void){
	char tokenName[50];
	char* tokenst=tokenName;
//                   skip leading blanks and commas, check for EOF
	while(chr == ' ' || chr == ',') chr = getAchr();

	if(chr == '\'') {
//                   pack quoted string
		while(1){
 			chr=getAchr();
			if(chr == '\''){
 				chr=getAchr();
				if(chr != '\'')  break;
				}
			*tokenst++=chr;
			} //while
		*tokenst++='\0';
//cout<<"literal "<<tokenName<<endl;// **debug**
//if token name contains a blank make it a bang(!)
char * tokpt=tokenName;
while(*tokpt != '\0'){if(*tokpt==' ')*tokpt='!';tokpt++;}
		return litral(tokenName);
		}//if

	if(symtype(chr) == 0){

//                   pack symbol, compute value if numeric
int	numflg=1;

	while(1){
		numflg = numflg && (chr <= '9' && chr >= '0');
		*tokenst++=chr;
 		chr=getAchr();
		if(symtype(chr) != 0) break;
		} //while

	*tokenst++='\0';
	if(numflg){ //  numeric symbol -- create head
	int	rtt=atoi(tokenName);
		return gcons(rtt,(HEAD+CNSTBT),0);
		}// numflg
//             non-numeric symbol: create head and symbol table entry
	else{
//cout<<"symbol "<<tokenName<<endl;// **debug**
 		return symbol(tokenName);
		}//else
		}//if

//                   special character -- pack one character as symbol

	*tokenst++=chr;
	*tokenst++='\0';
 	chr=getAchr();
	if(tokenName[0] == '(') return symn[LPAREN];
	if(tokenName[0] == ')') return symn[RPAREN];
	if(tokenName[0] == '[') return symn[LBRACK];
	if(tokenName[0] == ']') return symn[RBRACK];
	if(tokenName[0] == '.') return symn[PERIOD];
//cout<<"special char "<<tokenName<<endl;// **debug**
	return symbol(tokenName);

}
