/* M E D I C A L  L A N G U A G E  P R O C E S S I N G, LLC
   (c) 2005 All rights reserved.
   Read Terms of Use at http://mlp-xml.sourceforge.net.
   Contact medical_language_processing@gmail.com
*/
#include <iostream.h>
#include <fstream.h>
#include <iomanip.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#include "symtab.h"
#include "common.fcm"
#include "gencom.fcm"
#include "lispdefs.fcm"

#define TRUE 1
#define FALSE 0
#define SBPLIM 25
#define SUBPARTREC_TYPE 7
#define WDWITHCANFORMREC_TYPE 8
#define WDWITHATTRIBREC_TYPE 6

extern fstream *getstptr(int);
extern int load(int);
extern void iniget(unsigned int *,int);
extern int cat(int, int);
extern int append(int, int);
extern int litral(char *);
extern int gcons(int,int,int);
extern void exitr(const char * text);
#define SENTOKARLEN 1500
static char senToksAr[SENTOKARLEN], *senToksP;
extern void errmgp(int,int,int,int);
extern SymbTable sytab;

void plist(int,int,int);
//         table of special form names
//         table for subparts
static int isbp;
struct sbptstr{ char * sbpnamp; int sbpadr; short sbpsti;
	char sbpref; char sbpdef; };
static struct sbptstr sbpttbla[SBPLIM];
static struct sbptstr *sbpttbl=sbpttbla;

static int lexerr;
static int ncsf=0;
#define CSFTABLEN 100
static unsigned int csftab[CSFTABLEN];
static unsigned int *csftabP=csftab;

// ----------------------------------------------------

struct def_ptr_st{int wordInSen; int groupPos;};
void storeCanonData_dose(unsigned int *canfobj, int len, int indexInFile){
	int index=sytab.getTransStIdx(indexInFile);	
	ncsf++;
	sytab.setstcanf(index, ncsf);
	*csftabP++ = len + (index<<8);
	for(int i=0; i<len; i++){
		*(csftabP++)=*(canfobj++); ncsf++;
		if(ncsf>CSFTABLEN){
cerr<<"csftable overflow"<<endl;
cerr<<"the last value read was "<<hex<<*csftabP<<endl;
exit(12);
}
		}//for
	return ;
}

// ----------------------------------------------------

static void scerrmsg(int type, char * nameP, char *dwordP){
	lexerr=TRUE;
	if(type <= 0){
		if(type < 0)
	*coutP <<" *** Invalid interpret field in "<< nameP<<endl;
		else
	*coutP << " *** No head in record "<<nameP<<endl;
		}

	else {
	switch (type){

	case 1:
		*coutP <<"*** Undefined canonical form "<<nameP;
		if(dwordP != (char *)NULL) *coutP<<" in "<<dwordP;
		*coutP<<endl;
		break;

	case 2:
		*coutP <<"*** Undefined special form "<<nameP;
		if(dwordP != (char *)NULL) *coutP<<" in "<<dwordP;
		*coutP<<endl;
		break;

	case 3:
		*coutP <<"*** Undefined subpart symbol "<<nameP
		<<" in defn of "<< dwordP <<endl;
		break;

	case 4:
		*coutP <<"*** Undefined symbol "<< nameP
		<<" in defn of "<< dwordP <<endl;

		}//switch
		}//else
	return;
}//func end

// - - - - - - - - - - - - - - - - - - - - - - - - - - 

struct wdgrpst{int frstgw; int lastgw;};
static int endWordProc(int headp, struct wdgrpst *info, int nent, char * dwordP){
	int addr,ksym, j, idxInFile;

//                   Change entries in grammar for this word
//                   from symbol table pointers to addresses
//cout<<"word processing "<<dwordP<<endl;
	for(int lp=0;lp < nent;lp++){
	for(int lgrp=info[lp].frstgw;lgrp <= info[lp].lastgw;lgrp++){
		ksym=CSRINT(lgrp);
		if((ksym & HEAD)){
	 		if((idxInFile=(CARINT(lgrp)-SOS))>0){
				if((sytab.getTransStIdx(idxInFile))<0)continue; //** get rid
				CARADDR(lgrp)=STADDR(sytab.getTransStIdx(idxInFile));
//cout<<"swd:head "<< lgrp<<" "<<STNAME(sytab.getTransStIdx(idxInFile))<<endl;
				}//if
			} //if head

		else { // not a head

        		for(int i=0;i<2;i++) {
        			if(i==1)ksym=CARINT(lgrp);

				if((idxInFile = ksym-SOS)<0)continue;
				if(ksym<SOL){ // a symbol
					ksym=sytab.getTransStIdx(idxInFile);
					if(ksym < 0){//  subpart--look up on subpart table

					addr=0;
					for(j=0;j<isbp;j++) {
					if(sbpttbl[j].sbpsti == -ksym) {
						addr = sbpttbl[j].sbpadr;
						sbpttbl[j].sbpref=1;
						break;
						} // if
						} // for

					if(!addr) {
					if(j<isbp)
					scerrmsg(3, sbpttbl[j].sbpnamp, dwordP);
					else *coutP<<"Undefined subpart symbol with "
						<<"WD file index "<<idxInFile
						<<" in word "<<dwordP<<endl;
						return -1;
						}//if no addr
						} // if subpart

					else { //not subpart
//                         other symbol--get address from staddr
					if(!(addr=STADDR(ksym))){
						scerrmsg(4,STNAME(ksym),dwordP);
						return -1; }
						}//else other sym no subpart
					}//if symbol

          			else {// literal: if undefined create new literal
					if(!(ksym = ksym-SOL)){
//this word is not in the grammar the string 
//however the process must go foward to read in the rest of the definitions
//for this dose string
		*coutP<<"Error in grammar, zero symbol table pointer"<<endl;
return 1;
					}
				ksym=sytab.getTransStIdx(ksym);
				if(!(addr = STREFC(ksym))){
					addr=gcons(0,(HEAD+ATOMIC+LTOMIC),ksym);
//                                   save address of literal in STREFC
				   	SETSTREFC(ksym,addr);
					} //if undefined
					}//else- literal

//                               store address in grammar
				if(i!=1) CSRADDR(lgrp)=addr;
				else CARADDR(lgrp)=addr;
				} // for i
			} // else not head
		} //for on lgrp
		} //for on lp
	return 0 ;
}

// - - - - - - - - - - - - - - - - - - - - - - - - - - 

static char * appendSymToSentPile(char *sword){
	char * svp;
	svp=strcpy(senToksP,sword);
	senToksP += strlen(sword)+1;
	return svp;
	}

// - - - - - - - - - - - - - - - - - - - - - - - - - - 

static void clearDef(int defInGroup,int ntoksdef,
		struct def_ptr_st ptr2defs[]){
	for(int i=0; i<ntoksdef; i++){
		if(ptr2defs[i].groupPos == defInGroup)
		SENTE6(ptr2defs[i].wordInSen)=-1;
		}
	return;
}
static const char *canfNameTab[1]={"NUM"};

// - - - - - - - - - - - - - - - - - - - - - - - - - - 

struct doseInst{short sentNumInDoc; short wordsInStr;
	char medPos; char rightOrLeft; char isOF;
	char startWordInDoc; char endWordInDoc;
	char startWord; char endWord;};

int scanwd_dose(int wdextf, struct doseInst *doseInp){

	int idexOfCan, idexOfSymb, isente, ctlw;
	struct wdgrpst wdinfo[40];
	int retValue=1;
	int wdobjlen, numDefGrp;
	int defInGroup,ntoksdef;
	struct def_ptr_st ptr2defs[30];
	unsigned int wdobject[200], *wdbfP, *wdbfEndP;
	char recNameAr[300], *recNameP, *currWord;
	char * recnamParr[80], *pronamP, *recnamP, **wdtokP;
	int numInfoEnt=0, kat, nsymwd,homoflag,grpProc,nwdbuf;
	int lochd, icf, categ,idiomWordCount,tost,frmlst,form;

	senToksP=senToksAr;

//
	fstream *wdextFileP=getstptr(wdextf);
//                                    read WD data for sentence

// At the start of reading in the sentence data the temp WD file is
// positioned after the canonical forms and the number of sentences.

	char inb[100];
	*wdextFileP>>doseInp->wordsInStr; //number of words in string
	if(wdextFileP->eof())return -1;
	int nwstr = doseInp->wordsInStr; //number of words in string
	if(nwstr>149)*coutP<<"sentence too long "<<nwstr;
	wdextFileP->ignore(50,'\n'); //skip over newline


//                     read in string tokens, identification
	lexerr=FALSE;
	int sw, ew, mp, ofn, rol;
	*wdextFileP>>doseInp->sentNumInDoc>>mp>>rol>>ofn>>sw>>ew;
	doseInp->medPos = mp; doseInp->isOF=ofn; doseInp->rightOrLeft = rol;
	doseInp->startWordInDoc = sw; doseInp->endWordInDoc = ew;
	wdextFileP->ignore(50,'\n'); //skip over newline
if(!nwstr){
return 2;
}

// make a sentence ID for diagnostic purposes
	strcpy(sentid,"Dose Sent #                                  ");
char *cp;
int dv;
cp=sentid+15;
dv=doseInp->sentNumInDoc;
while(dv){
*cp-- = (dv % 10) + '0';
dv /= 10;
}
cp = sentid + 15 +5;
*cp-- = '\0';
dv = sw;
while(dv){
*cp-- = (dv % 10) + '0';
dv /= 10;
}
	for(int p=0;p<nwstr;p++) {// put sen tokens in pile
		*wdextFileP>>senToksP;
		wdextFileP->setf(ios::skipws);
		//*wdextFileP>>ws; // skip the white space
// put sent word text into word pile and put address into symbol table
		SENTE1(p+1)=senToksP;
		SENTE2(p+1)=0;
		SENTE3(p+1)=0;
		SENTE4(p+1)=0;
		SENTE5(p+1)=0;
		SENTE6(p+1)=-1;

		senToksP += strlen(senToksP)+1;
		}


	*wdextFileP >> numDefGrp; // read number of groups of defs
//cerr<<"DB:nmbfs "<<numDefGrp<<endl;

	grpProc=0;
	while(grpProc++ < numDefGrp) {
//   Read in a unit of input. This consists of the the material for
//   a single group of word definitions

	*wdextFileP >>ntoksdef; // lines in def to word pointer table
	for(int i=0;i<ntoksdef;i++)
		*wdextFileP >> ptr2defs[i].wordInSen>>ptr2defs[i].groupPos;


//                 Read in record names, idioms (if any), and compiled code
	*wdextFileP >> nsymwd;
//cerr<<"nsymwd "<<nsymwd<<endl;
	recNameP=recNameAr;
//					1. put rec names in pile
	for(int i=0;i<nsymwd;i++){
		*wdextFileP>>recNameP;
// put def word into word pile and put address of word into array
		recnamParr[i]=recNameP;
		recNameP += strlen(recNameP)+1 ;
		}
	wdtokP=recnamParr;
	pronamP = *wdtokP;

//				Read object data
	*wdextFileP>>nwdbuf;
	*wdextFileP>>hex;
	wdbfEndP=wdobject;
	for(int i=0;i<nwdbuf;i++) *wdextFileP>>*(wdbfEndP++); 
	*wdextFileP>>dec;

	defInGroup=0;
	isbp=0;
	frmlst=0;


//                             Unpack one record from buffer
	wdbfP=wdobject;
	while(wdbfP<wdbfEndP){
//				Decode control word
	ctlw = *(wdbfP++);
#define CW_NIDWD 26
#define CW_NIDWDLEN 6
#define CW_CANIDX 16
#define CW_CANIDXLEN 10
#define CW_RECTYP 12
#define CW_RECTYPLEN 4
#define CW_OBJLEN 12
	idiomWordCount = (ctlw>>CW_NIDWD)&((1<<CW_NIDWDLEN)-1);
	idexOfSymb = (ctlw>>CW_CANIDX) & ((1<<CW_CANIDXLEN)-1);
	tost = (ctlw>>CW_RECTYP) & ((1<<CW_RECTYPLEN)-1);
	wdobjlen = ctlw & ((1<<CW_OBJLEN)-1);

// New definition
		if(defInGroup!=0){ //  Complete processing of previous word
			if((retValue=endWordProc(lochd, wdinfo, numInfoEnt, currWord))>=0) {
;
}
		else {
			clearDef(defInGroup,ntoksdef,ptr2defs);
			}
			}//if

		numInfoEnt=0;
		recnamP=*(wdtokP++) ;
		currWord = recnamP;


		if(tost == WDWITHATTRIBREC_TYPE){ //  Unpack category list
// these will be defined as literals
// for the dose case the definition is incomplete. We must put a pointer
// to the symbol in the definition read in.
int wdst=LOOKST(currWord);
wdbfP[1] |= (wdst<<20);
if(!wdst){
// this word does not have a definition and this word will be marked as
// undefined. When all the definitions are read in the longest span of
// defined words are parser for a dose recognition
wdbfP += wdobjlen;
continue;
}
			iniget(wdbfP,wdobjlen);
			wdbfP += wdobjlen;
			} //if tost 6

		else { // Unpack canonical form for special forms def
			wdbfP += wdobjlen;
const char *canfname = canfNameTab[idexOfSymb-1];
idexOfSymb=LOOKST(canfname);
			//idexOfCan = sytab.getTransStIdx(idexOfSymb);
			idexOfCan = idexOfSymb;
			if(!(icf=sytab.retstcanf(idexOfCan)))
			{scerrmsg(1,STNAME(idexOfCan),recnamP);continue;}
			iniget(&csftab[icf],(csftab[icf-1]&0xff));
			}//else for canonical form def
		wdinfo[numInfoEnt].frstgw=ig+1;
		lochd = load(ig);
		wdinfo[numInfoEnt++].lastgw=ig;
		if(lochd <= 0)
		{scerrmsg(lochd,recnamP,(char *)NULL);continue;}
		defInGroup++;



int frmwd;
	char ** idmst=wdtokP;
	frmwd = form = gcons(0,0,litral(recnamP));
	for(int i=0; i<idiomWordCount; i++) {
		frmwd = CDR(frmwd)=gcons(0,0,litral(*(wdtokP++)));
		}

//           complete category list and add to FORMS list
//                1. kat = category list without head
	kat=CDR(lochd);
//                2. prefix kat with word FORM and add to forms list
//                   [note:  it is important that FRMLST be extended
//                       by modifying the existing list, because it
//                       it is extended after pointers to frmlst have
//                       been placed in the list of category lists]

	frmlst=cat(frmlst,gcons(0,0,gcons(kat,0,form)));

//                   3. categ = kat prefixed word form
//                       [forms list], for insertion into SENTE2(.) list
	categ=gcons(kat,frmlst,form);

//            look for word in sentence
	for(int i=0 ; i<ntoksdef;i++){
		if(defInGroup == ptr2defs[i].groupPos){
        	isente = ptr2defs[i].wordInSen;

//            insert category list from word defn
//              into category list of sentence word
        	if(!SENTE2(isente)){
//                                   1. word has no prior category list
			SENTE2(isente)=categ;
			SENTE6(isente)=idiomWordCount+1;
			}//if
        	else {
//                                   2. sentence word has other category
//                                      sublists. Append current category
//                                      list to old catagory lists.
			SENTE3(isente)++;
			homoflag=1;
			*coutP<<"*** "<<recnamP;
			for(int ix=0; ix<idiomWordCount; ix++) *coutP<<' '<<*(idmst++);
			*coutP<<" appears more than once in WD (a homograph)"<<endl;

			SENTE2(isente)=append(SENTE2(isente),categ);
			} //else
		} // if this def is used

// 		  		 check if word appears again in sentence
  		} // for on def used table
	} // while on defs in the groups

// 				   complete processing of last word
	if((retValue=endWordProc(lochd, wdinfo, numInfoEnt, currWord))>=0);
	else clearDef(defInGroup,ntoksdef,ptr2defs);

	} // while on number of def groups for sentence

//    - - - - - - - - - - - - - - - - - - - - - - - - - - - -

//                       scan for words not defined
if(!rol){//a right side med
// start with the lst word and look the first defined word. Try to
// detect isolated defined words by acertaining that next word is ok.
	int frstDef, lastDef, iword;
	if(nwstr==1){//only one word in string
	if(SENTE6(1) <= 0) return 0;

	doseInp->startWord=1; doseInp->endWord=1;
	return 1;
		}//if one word

	for(iword=nwstr;iword>0;iword--){
	if(SENTE6(iword) > 0) {
		if(SENTE6(iword-1) > 0) {
			lastDef=iword;
			break;
			}
		else iword--;
		}//if
		}//for
	if(iword==0){//no defined words return a skip string indicator
		return 0;
		}
	for(iword=1;iword<lastDef;iword++){
	if(SENTE6(iword) > 0) {
	if(SENTE6(iword+1) > 0) {
	frstDef=iword;
	break;
}
}
	} // for on words
	if(lastDef-frstDef == 0)return 0;
	doseInp->startWord=frstDef; doseInp->endWord=lastDef;
}
else {
	int frstDef, lastDef, iword;
lastDef=nwstr;
if(SENTE6(lastDef)<0)lastDef--;
	for(iword=lastDef-1;iword>0;iword--){
if(SENTE6(iword)<0)break;
}//for
	doseInp->startWord=iword+1; doseInp->endWord=lastDef;
}//else lhs list
	return 1;

}
