#! /bin/sh
#*    M E D I C A L  L A N G U A G E  P R O C E S S I N G, LLC
#*    (c) 2005 All rights reserved.
#*    Read Terms of Use at http://mlp-xml.sourceforge.net.
#*    Contact medical_language_processing@gmail.com
#*
#    compile  dictionaries from source
# the first parameter is the basename of the source file of the main
# dictionary. It is assumed that the extension is .src
# the second and following parameter(s) are the basenames of the various
# special dictionaries or nothing. The special dictionary object files
# (.wdo) will not contain any symbol table or "top" material, ie BNF etc 
# The machinery for using "special dictionaries" has the appearance of being
# a separate dictionary. This is in fact deceiving.
#
# For a given dictionary run (dlook) there is, logically speaking, only one
# dictionary because there is only one symbol table. The separation of the
# definitions into two files is simply a device for producing a selection
# procedure wherein the special dictionary definitions take priority.
#
# To guarantee that the symbol table contained in the main dictionary is
# correct for all the special dictionaries all the dictionaries must be 
# recompiled each time.
#
# This script does that.
#
# where the source dictionaries are
SRCDIR="$PWD/"
#this is the directory for the temp files
TEMPDIR=/usr/tmp
#where the programs are
BINS=/home/lsp/mlp/src/compiler
# Set collate locale to C
LC_CTYPE="C"; export LC_CTYPE
LC_NUMERIC="C"; export LC_NUMERIC
LC_COLLATE="C"; export LC_COLLATE
#the name of the BNF and LISTS top file ie top.$TOPBNF
# TOPBNF="emgrm_100"
echo "             Generating a Dictionary Set\n"
nargs=$#
if [ $nargs -lt 1 ]
then
echo "There must be at least one file argument"
exit 1
fi

DEBUG=0
NOSTRIP=0
ERR=0
# check for a no strip on main
if [ $1 = "-n" ]
then
nargs=`expr $nargs - 1`
NOSTRIP=1
shift
fi

#check for a compile only of a spec dict
if [ $1 = "-d" ]
then
if [ $NOSTRIP -gt 0 ]
then
echo " -d and -n options illegal"
exit 3
fi
nargs=`expr $nargs - 1`
DEBUG=1
if [ $nargs -gt 1 ]
then
echo "Cannot have more than 1 file in a debug type run"
exit 2
fi
shift
fi

if [ ! -r "${SRCDIR}$1.src" ]
then
echo "File: ${SRCDIR}$1.src does not exist"
ERR=1
fi

MAIN=$1
if [ $nargs -gt 1 ]
then
spn=1
shift
for spf
do
if [ ! -r "${SRCDIR}$spf.src" ]
then
echo "File: ${SRCDIR}$spf.src does not exist"
ERR=1
fi
eval "SPECDICT$spn"="$spf"
spn=`expr $spn + 1`
done
fi

#exit if any file is not availible
if [ $ERR -ne 0 ]
then
exit 1
fi

#this is where the input to the compiler is built
COMPIN="$TEMPDIR/dictcomp.inp$$"
rm $COMPIN > /dev/null
#put the control images at the top
# cp $SRCDIR/top.control $COMPIN
#contains the BNF and lists
# cat $SRCDIR/top.$TOPBNF >> $COMPIN
gramtop $EPAR.obg >> $COMPIN
# This "top" contains canonical forms.
cat $SRCDIR/top.canforms >> $COMPIN

#the   .src file of definitions is transformed into one in which each
#definition is on one line. This is done so the the definitions may be
#sorted. The output is fed to a program which translates the .src form of
#the dictionary entries to that required by the compiler. 
#Another file is produced with the .alf extension. This is  a sorted
#version of the original .src file

if [ $nargs -gt 1 ]
then
for spf
do
eval echo "*XZMARKZX $spf">>$COMPIN
$BINS/utils/mkoneline < "${SRCDIR}$spf".src | sort -t"	" +0 -1 | $BINS/utils/transsrc -a "$spf.alf">> $COMPIN
rm $spf.wdo>/dev/null
done

echo "*XZMARKZX $MAIN">>$COMPIN
BASENAME="dictcomp"
LISTNAME=${MAIN}
for spf
do
LISTNAME=${LISTNAME}-$spf
done
WDOFILE=$TEMPDIR/$BASENAME.wdo
else
BASENAME="$MAIN"
LISTNAME="$MAIN"
if [ $DEBUG -ne 0 ]
then
WDOFILE=$TEMPDIR/$BASENAME.wdo
else
if [ $NOSTRIP -gt 0 ]
then
WDOFILE=$BASENAME.wdo
else
WDOFILE=$TEMPDIR/$BASENAME.wdo
fi
fi
fi

# -- sorting routine set in SOLARIS is based on locale
#    LANG=
#    LC_CTYPE="C"
#    LC_COLLATE="C"
#    LC_NUMERIC="C"
#    LC_TIME="C"
#    LC_MONETARY="C"
#    LC_MESSAGES="C"
#    LC_ALL=
# However, default in Linux is "en_US.iso8859.15" for all except
# LC_ALL.
# This sorting order affects SOBS, SOBBED, SOBBING, SOB. which
# cause parser to report errors.
$BINS/utils/mkoneline < "${SRCDIR}$MAIN".src | sort -t"	" +0 -1 | $BINS/utils/transsrc -a "$MAIN.alf">> $COMPIN
echo "*CLOSE()">> $COMPIN
echo "Transformation  completed"

#compile the dictionary
PARINP=$COMPIN ; export PARINP
GRAMIN="/home/lsp/compiler/canfrls.obg"; export GRAMIN
GRAMOT=$WDOFILE ; export GRAMOT
rm $GRAMOT >/dev/null
$COMPILER > $LISTNAME.lis
rm $COMPIN >/dev/null

if [ $nargs -gt 1 ]
then
rm $MAIN.wdo >/dev/null
for spf
do
rm $spf.wdo > /dev/null
done
#remove the *XZMARKSZX and make separate files of the object dict
#also strip the top but not the canonical forms
awk -v fstfile="$MAIN.wdo" -v NOSP="$NOSTRIP" '
BEGIN{strip=NOSP; curfile=fstfile}
{
if (strip == 0){
if(substr($0,1,1) != "@")next
if($5 < 6)next #skip BNF and LISTS
strip=1#turn off stripping
}
#search for *XZMARKZX
if($1 != "*XZMARKZX"){print $0>> curfile; next}
#a separator image was read switch output file
curfile=$2 ".wdo"
}' $WDOFILE
rm $WDOFILE
else
#only one file either a debug or a main only
#for a debug run strip top's and symbol table
if [ $DEBUG -ne 0 ]
then
#strip the spec dict (only one) in a debug type run
awk 'BEGIN{noskp=0}
{if(substr($0,1,1) == "S")exit
if(!noskp){
if(substr($0,1,1) != "@")next
if($5 < 6)next #skip BNF and LISTS
if($5 == 9)next #skip cannon forms
noskp=1}
print $0 }' $WDOFILE > $MAIN.wdo
rm $WDOFILE > /dev/null
else
   if [ $NOSTRIP -eq 0 ]
   then
      #a strip main only generation
      awk 'BEGIN{noskp=0}
{ if (strip == 0){
if(substr($0,1,1) != "@")next
if($5 < 6)next #skip BNF and LISTS
strip=1}#turn off stripping
print $0
}' $WDOFILE > $MAIN.wdo
      rm $WDOFILE > /dev/null
   fi
fi
fi
echo " ... done ..."
# restore locale
LC_CTYPE="en_US.iso885915"; export LC_CTYPE
LC_NUMERIC="en_US.iso885915"; export LC_NUMERIC
LC_COLLATE="en_US.iso885915"; export LC_COLLATE
chmod gou+w $1.wdo $1.src $1.alf
echo "--- You can run /griffin.c/lsp/bin/chkcomp $LISTNAME"
if [ $nargs -eq 1 ]
then
   echo "    to see how $LISTNAME was compiled."
else
   echo "    to see how combined was compiled."
   chmod gou+rw $2.wdo $2.src $2.alf
fi
echo "--- Run $LSPBIN/exportMLPdicts for SHML and wdatt"
