/ download K at www.kx.com.
/ Given 
/ prodsubstrate(id, name)
/ reactionsubstrate( reaction id, substrateid, stoich, importance)
/ reactionproduct( reaction id , productid, stoich, importance)
/ directedge( reaction id, substrateid, productid)
/ reactionenergy( reactionid, enzyme,  free energy symbolic, major path)
/ affygene( affy, gene)
/ geneenzyme( gene, enzyme)
/ experdesc( experiment, treat, conditions)
/ expervalue( experiment, treat, replica, affyid, value)
/ synpath(path, start, end)
/ mile(milestones)

/ Infer pathways: given starting substrate and ending value,
/ infer pathway between.
/ Find the affys that are there.
/ 
/ Queries:
/ 1. given start and end, find path
/ 2. given end, how to produce


\l db

/ PROCESS ARGUMENTS

filename: "peter.k"
allroutesflag: 0
mode: `default
okflag: 1
/ +q says give me a query after this
/ +p says stop after generating matrix
/ +pmin says don't regenerate matrix and keep going
/ +allroutes says all routes flag
processargs:{[args]
  i: 0
  while[(i < #args) & okflag
	okflag:: 0
	addextra: 0
	if[args[i] ~ "+q"
		filename:: args[i+1]
		mode:: `query
		addextra: 1
		okflag:: 1
	] 
	if[args[i] ~ "+p"
		mode:: `prepare
		okflag:: 1
	] 
	if[args[i] ~ "+pmin"
		mode:: `preparemin
		okflag:: 1
	] 
	if[args[i] ~ "+allroutes"
		allroutesflag:: 1
		okflag:: 1
	] 
	i+: 1 + addextra
	if[0 = okflag
		` 0: "format is k transclos [+q filename | +p] [+allroutes]\n"
		` 0: "filename is file for querying (e.g. peter.k)\n"
		` 0: "+p says to read in csv and recreate tcmat matrix \n"
		` 0: "+pmin says to read in csv, but don't recreate tcmat\n"
		` 0: "+allroutes says to consider all routes, not just shortest routes. \n"
		. "\\\\"
	]
  ]
}

/ END PROCESS ARGUMENTS
processargs[_i]
if[ mode = `query
  expression: 1: "expression"
  affygene: 1: "affygene"
  oldaffygene: 1: "oldaffygene"
  reactionenergy: 1: "reactionenergy"
  geneenzyme: 1: "geneenzyme"
  prodsubstrate: 1: "prodsubstrate"
  reactionproduct: 1: "reactionproduct"
  reactionsubstrate: 1: "reactionsubstrate"
  directedge: 1: "directedge"
  directedgelocal: 1: "directedgelocal"
  mile: 1: "mile"
  transporterbase: 1: "transporterbase" 
	/ #transporterbase|transportid|protein|base|target
  transportmolecule: 1: "transportmolecule"
	/ #transportmolecule|transportid|molecule^M
]

/ BASIC ROUTINES


spit:{[list] (-1) _ ,/ ($list) ,\: ("|")}


/ find difference between list[0] and list[1]
listdiff:{[list]
  :differ[list[0]; list[1]]
}

/ returns one if x is a subset of y
subset:{[x; y]
  i: y ?/: x
  : ~ (#y) _in i
}

/ returns one if x is a subset of y
subset:{[x;y] (#y) > |/ y ?/: x}        


differ:{[x;y]
  x,: ()
  y,: ()
  i: y ?/: x
  j: & i = #y
  :?x[j]
}

/ A faster difference, yielding indexes in x that differ from y
differindexes:{[x;y]
  i: y ?/: x
  j: & i = #y
  :j
}


/finds intersection of two lists
/ fastest of all
intersect: {[x;y]
  x,: ()
  y,: ()
  i: x ?/: y
  :x[(?i) _dv #x]
}

/finds intersection of two lists
/ fastest of all
intersect: {[x;y]
  x,: ()
  y,: ()
  if[(#x) < (#y)
    i: x ?/: y
    j: & i < #x
    :x[?i[j]]
  ]
  i: y ?/: x
  j: & i < #y
  :y[?i[j]]
}

/finds intersection of two lists
/ fastest of all
hasintersect: {[x;y]
  x,: ()
  y,: ()
  i: x ?/: y
  : (&/i) < #x
}

/ x is a proper subset of y
propersubset:{[x;y]
  x,: ()
  y,: ()
  if[~ (#x) < (#y); :0] / must be smaller
  :(#x) = (#intersect[x;y])
}

/ x is a proper subset of y
propersubset:{[x;y]
  x,: ()
  y,: ()
  if[~ (#x) < (#y); :0] / must be smaller
  :subset[x;y]
}

/finds indexes in x and y that intersect
/ If x and y are both sets, then the results will be of the same length
/ fastest of all
intersectindexes: {[x;y]
  i: x ?/: y / where each y hits
  j: & i < #x / those ys that hit
  :(i[j];j)
}

/finds indexes in x that intersect with y
intersectleftindexes: {[x;y]
  i: x ?/: y / where each y hits
  j: & i < #x / those ys that hit
  :i[j]
}

/finds indexes in x that intersect with y
/ even if there are dups in the left
intersectleftindexesdup: {[x;y]
  i: y ?/: x
  j: & i < #y
  :j
}

even:{[x] (x % 2) = (_ (x % 2))}

/finds intersection of two lists
/ and returns index pairs of matches. Assumes no duplicates
/ in either list
intersectbothindexes: {[x;y]
  x,: ()
  y,: ()
  i: x ?/: y
  pairs: (i ,' (!#y))
  k: & pairs[;0] < #x
  :pairs[k]
}

intersectbothindexesdup:{[x;y]
     x,: ()
     y,: ()
     uniqx: ?x
     partx: = x
     uniqy: ?y
     party: = y
     uniqpairs: intersectbothindexes[uniqx; uniqy]
     k: 0
     outpairs: ()
     while[k < #uniqpairs
        mypair: uniqpairs[k]
        outpairs,: ,/ partx[mypair[0]] ,/:\: party[mypair[1]]
        k+: 1
     ]
     :outpairs
}

multiintersectallindexes:{[lists]
  x: multiintersect[lists]
  out: ()
  i: 0
  while[i < #lists
        y: lists[i]  ?/: x
        out,: ,y
        i+: 1
  ]
  :+out
}


/ intersect many lists
multiintersect:{[lists]
 size: #lists
 if[2 > size; :lists]
 first: lists[0],()
 jj: ,/ ?:' first (?/:)/: lists[1+ !(size-1)] / find indexes in first
 x: @[(1+#first) # 0; jj; + ; 1]
 x: (-1) _ x / delete missing entry
 kk: & x = size - 1
 :first[kk]
}

/ this is a set intersection so we remove duplicates
multiintersect:{[lists]
 size: #lists
 if[2 > size; :lists]
 first: lists[0],()
 jj:  first ?/: (,/ ?:' lists[1+ !(size-1)]) / find indexes in first
 x: @[(1+#first) # 0; jj; + ; 1]
 x: (-1) _ x / delete missing entry
 kk: & x = size - 1
 :first[kk]
}

avg:{(+/ x) % # x}

var:{avg[_sqr x] - _sqr avg[x]}
std:{_sqrt var[x]}
cov:{avg[x * y] - avg[x] * avg[y]}
corr:{ (cov[x;y])%((std[x]) * (std[y]))}
/ delay based search
corrdelay:{[delay;x;y] 
  x: (-delay) _ x
  y: delay _ y
  (cov[x;y])%((std[x]) * (std[y]))}

 
/ END BASICS


/ FILE INPUT
/ Table schemas are written:
/ with a leading number sign, so # R| A |B|  Car |Salary
/ for a table R(A,B,Car,Salary).
/ The values are written without a number sign
/ Interior blanks are significant, but leading and trailing blanks in
/ a field are not.
/ Null values should be represented by having only blanks in a field.
/ Tables are separated by a single blank line


inputfromfile:{[filename]
  a: 0: filename
  globalvals:: ()
  oldglobalvals:: () / should not be necessary
  justempty:: 1 / as if we've just seen an empty line. should process table
  a,: ,,""
  processline'a
}

/ a list is numeric if each member is either empty, a period
/ or a digit
isnumeric:{[list]
  :0 / HACK: always return 0 here
  s: ,/list
  nums: "-1234567890."
  ii: & s = "."
  if[1 < #ii; :0]
  x: nums ?/: s
  :(#nums) > |/x / if greater then everything in s met its match
}

/ parses a field based on vertical bars
getfields:{[line]
  i: line = "|"
  j1: &i
  j2: &~i
  line @:j2
  size: #j1
  :(0,(j1 - !size)) _ line
}

/ get rid of blanks at either end of the string
delendblanks:{[string]
  if[0 = #string; :""]
  if[string ~ ,"" ; :""]
  i: & ~ string = " "
  if[(#string) = (#i); :string]
  if[0 = (#i); :""]
  string: (- ((#string)  - (1 + *|i))) _ string
  :(*i) _ string
}
  
/ convert all characters to lower case
lower:{[let]
 if[0 ~ #let
        :let]
 v: _ic let
 if[(64 < v) & (v < 91)
	v+: (97-65)
	:_ci v
 ]
 :let
}
  

/ Handles one line of input at a time according to table schema above.
/ A little more space efficient than the previous version.
processline:{[line]
 emptyorblank:{[line] (0 = #delendblanks[line])}
 emptyflag: emptyorblank[line]
 if[~emptyflag
  justempty:: 0
  if[line[0] = "#"
    newline: delendblanks'getfields[1 _ line]
    if[(1 < #newline) 
	globaltable:: *newline
	globalatt:: 1 _ newline
	globalvals:: () / initialize
    ] / newline
  ] / table declaration
  if[~ line[0] = "#"
	newline: delendblanks'getfields[line]
	if[(1 < #newline) | (0 < #newline[0]) 
		globalvals,: ,newline
	]
  ] / end of test on data line
 ] / end of test on line is non-empty
 if[emptyflag & (0 = justempty)
	justempty:: 1
	/ if[ ((#globalatt) > 1) | (~ (#globalatt) = (^globalvals)[0])
		/ globalvals:: + globalvals / take transpose
	/ ]
	if[~ (#globalatt) = (#globalvals[0]); !-1]
	numericflag:()
	hh: 0
	while[hh < #globalatt
		numericflag,: isnumeric[globalvals[;hh]] 
		hh+: 1
	]
	inindex:: 0
	while[inindex < #globalatt
	  / table.att[2] :: ` $ vals[3]
		string1: globaltable, (".")
		string1,: globalatt[inindex]
		teststring: ("0 = #"), string1
		/ For big sizes, uncomment following two statements
		/ User will get an error message, but it will work.
		/ x: @[.: ; teststring; :]
		/ currentlyempty: *x / error means empty
		/ For big sizes, comment following
		currentlyempty: 1
		/ if empty, then assign, else append
		string2: :[currentlyempty; (":: "); (",: ")] 
		string3: :[0 = numericflag[inindex]
			"` $ "
			:[1 = numericflag[inindex]; "0.0 $ "; "0 $ "]]
		string3,: "globalvals[;inindex]"
		. string1, string2, string3
		if[1 = numericflag[inindex]
			globalindexes:: & 0N = . string1
			string3: string1, ("[globalindexes] :: `")
			. string3
		]
		inindex+: 1
	]
 ]
}

/ DUMP TABLE dumptable


/ formstring takes a list and makes a string
formstring:{[list]
  list,: ()
  : (-1) _ ,/ ($list) ,\: (" ")
}

formstringvertbar:{[list]
  list,: ()
  : (-1) _ ,/ ($list) ,\: ("|")
}

formstringcomma:{[list]
  list,: ()
  : (-1) _ ,/ ($list) ,\: (",")
}

/ Output a table (a variable) to a text file outfile (string)
/ e.g.   output[`guide; guide; "foobar"]
dumptable:{[tablename; table; outfile]
  out: ,("# "), ($tablename), ("|"), formstringvertbar[!table]
  first: *!table
  numofelements: . ("#"), ($tablename), ("."), ($first)
  i: 0
  while[i < numofelements
	list: table[;i]
	x: formstring'list
	out,: , (-1) _ ,/x ,\: ("|")
	i+: 1
  ]
  outfile 0: out
}

dumptablecsv:{[tablename; table; outfile]
  out: , formstringcomma[!table]
  first: *!table
  numofelements: . ("#"), ($tablename), ("."), ($first)
  i: 0
  while[i < numofelements
	list: table[;i]
	x: formstring'list
	out,: , (-1) _ ,/x ,\: (",")
	i+: 1
  ]
  outfile 0: out
}

/ END OF DUMP TABLE


/ APPLICATION SPECIFIC

/ return 1 if x is a prefix of y (not necessarily proper)
prefix:{[x;y] 
  if[ (#x) < (#y)
	:x ~ y[!#x]
  ]
  :0
}

/ eliminate prefixes from a list
squeezeprefix:{[list]
  if[2 > #list; :list]
  ind: !#list
  cross: ,/ ind ,/:\: ind
  cross@: & ~ cross[;0] = cross[;1]
  jj: & prefix'[list[cross[;0]]; list[cross[;1]]]
  badones: ?cross[;0][jj]
  :list[differ[ind;badones]]
}

/ return 1 if x is a suffix of y (not necessarily proper)
suffix:{[x;y] 
  if[(#x) < (#y)
	:x ~ |((|y)[!#x])
  ]
  :0
}

/ eliminate prefixes from a list
squeezesuffix:{[list]
  if[2 > #list; :list]
  ind: !#list
  cross: ,/ ind ,/:\: ind
  cross@: & ~ cross[;0] = cross[;1]
  jj: & suffix'[list[cross[;0]]; list[cross[;1]]]
  badones: ?cross[;0][jj]
  :list[differ[ind;badones]]
}

/  Build the matrix from a set of source-destination pairs.
build:{[all; directedge]
  mat: ()
  mat: ((#all);(#all)) # maxnum / extreme distance
  i: 0
  while[i < #mat
	mat[i;i]: 0
	i+: 1
  ]
  jsource: all _bin/: directedge.substrateid
  jdest: all _bin/: directedge.productid
  j: 0
  while[j < #jsource
	mat[jsource[j]; jdest[j]]: 1
	j+: 1
  ]
  :mat
}

/ find the transitive closure of a matrix, giving the minimum
/ length of a path
tc:{[mat]
  old: mat
  while[1
	mat: square[mat]
	if[old ~ mat; :mat]
	old: mat
  ]
}

/ must get rid of edgesleaving milestones after the initial
/ establishment of edges
tcstop:{[mat; milenumbers]
  increment: mat
  i: 0
  while[i < #milenumbers
	myindex: milenumbers[i]
	increment[myindex]: (#increment[0]) # maxnum
	increment[myindex;myindex]: 0
	i+: 1
  ]
  while[1
	mat: matmult[mat; increment]
	if[old ~ mat; :mat]
	old: mat
  ]
}

matmultold:{[mat; mat2]
  new: &/'' mat +/:\: +mat2
  :new &'' mat
}


matmult:{[mat; mat2]
  tmat: +mat2
  new: mat
  i: 0
  while[i < #mat
        new[i]:  &/' (mat[i]) +/: tmat
        i+: 1
  ]
  :new &'' mat
}


/ testing data
xmat: (0 1 9 9 1 9 9
9 0 1 9 9 9 9
9 9 0 1 9 9 9
9 9 9 0 1 9 9 
9 9 9 9 0 1 9 
9 9 9 9 9 0 1 
1 9 9 9 9 9 0) 

/ xmiles: 0 2 3 6
/ maxnum: 90
/ xmattc: tcstop[xmat; xmiles]
/ end of testing data

/ Finding the square of matrix and including the path length
squareold:{[mat]
  new: &/'' mat +/:\: +mat
  :new &'' mat
}

square:{[mat]
  tmat: +mat
  new: mat
  i: 0
  while[i < #mat
	new[i]:  &/' (mat[i]) +/: tmat	
	i+: 1
  ]
  :new &'' mat
}
  
squarenew:{[mat]
  :mat(&/+)/:\:+mat
}
  

/ Find shortest routes from a source to a destination
/ Input is the transitive closure as numbers.

findroutesold:{[source; dest; excluded; tcmat]
  out: ()
  dist: tcmat[source;dest]
  if[dist=maxnum; :()]
  / if[dist = 1; :,(source; dest)]
  directroute: ()
  if[dist = 1; directroute,:,(source; dest)]
  intermediates: & (tcmat[;dest] = 1) 
  intermediates: differ[intermediates _dv source; excluded] 
    / to cut down infinite loop
  intermediates: intersect[intermediates; & tcmat[source;] < maxnum]
  if[0 = #intermediates; :()]
  excluded,: intermediates
  x: (findroutes[source;;excluded;tcmat]'intermediates) 
  / attempt to find only routes that have no nulls
  qq: ,/x
  zz: 4::' ,/x
  izz: & zz = -1
  qq@: izz
  if[0 = #qq; :directroute]
  y: (qq) ,\: dest
  / y: (*:'x) ,\: dest
/ if[dest = 3; !-2]
  / :y
   :directroute,y
}

/ finds shortest routes
findroutes:{[source; dest; excluded; tcmat]
  out: ()
  dist: tcmat[source;dest]
  if[dist=maxnum; :()]
  if[dist = 1; :,(source; dest)]
  intermediates: & (tcmat[;dest] = 1) 
  intermediates: differ[intermediates _dv source; excluded] 
    / to cut down infinite loop
  intermediates: intersect[intermediates; & tcmat[source;] < maxnum]
  if[0 = #intermediates; :()]
  excluded,: intermediates
  x: (findroutes[source;;excluded;tcmat]'intermediates) 
  / attempt to find only routes that have no nulls
  qq: ,/x
  zz: 4::' ,/x
  izz: & zz = -1
  qq@: izz
  if[0 = #qq; :()]
  y: (qq) ,\: dest
  / y: (*:'x) ,\: dest
/ if[dest = 3; !-2]
  :y
}

nodup:{[list] (#list) = #?list}

/ find all non-redundant routes
findallroutes:{[source; dest;  tcmat]
  out: ()
  dist: tcmat[source;dest]
  if[dist=maxnum; :()]
  if[dist = 1; out,: ,(source; dest)]
  i: 0
  flag: 1
  while[flag
	i+: 1
  	intermediates: & (tcmat[;dest] = i) 
  	intermediates: intermediates _dv source
	flag: 0
	if[0 < #intermediates
		x: findroutes[source;;();tcmat]'intermediates
		z:  findroutes[;dest;();tcmat]'intermediates
		jj: 0
		new: ()
		while[jj < #x
			if[0 < #x[jj]
				myz: 1 _' z[jj]	
				toadd: ,/x[jj] ,/:\: myz
				zz: & nodup'toadd
				new,: toadd[zz]
			]
			jj+: 1
		]
		if[0 < #new
			y: differ[new;out]
			if[0 < #y
				flag: 1
				out,: y
			]
		]
	]
  ]
  :out
}

/ find routes through transitive closure matrix
alphafindroutes:{[alphasource; alphadest]
  / if[alphadest = `"00108";!-1]
  source: all _bin alphasource
  dest: all _bin alphadest
  out: :[allroutesflag
  	findallroutes[ source; dest;  tcmat]
  	findroutes[ source; dest; (); tcmat]]
  if[0 = #out; :()]
  :all[out]
}


/ looks for routes among the important ones
/ This looks in tcmatimportant (which are the paths whose
/ product and substrates are all important)
alphafindroutesimportant:{[alphasource; alphadest]
  / if[alphadest = `"00108";!-1]
  source: all _bin alphasource
  dest: all _bin alphadest
  out: :[allroutesflag
  	findallroutes[ source; dest;  tcmatimportant]
  	findroutes[ source; dest; (); tcmatimportant]]
  if[0 = #out; :()]
  :all[out]
}

/ don't specify the locations of source and dest
alphafindrouteslocalgen:{[alphasource; alphadest]
  ii: & ($alllocal) _sm\: ($alphasource),("*") 
  sourcelocs: alllocal[ii]
  ii: & ($alllocal) _sm\: ($alphadest),("*") 
  destlocs: alllocal[ii]
  out: ()
  i: 0
  while[i < #sourcelocs
	j: 0
	while[j < #destlocs
		out,: alphafindrouteslocal[sourcelocs[i]; destlocs[j]]
		j+: 1
	]
	i+: 1
  ]
  :out
}

/ find routes through transitive closure matrix
alphafindrouteslocal:{[alphasourceloc; alphadestloc]
  / if[alphadest = `"00108";!-1]
  source: alllocal _bin alphasourceloc
  dest: alllocal _bin alphadestloc
  out: :[allroutesflag
  	findallroutes[ source; dest;  tcmatlocal]
  	findroutes[ source; dest; (); tcmatlocal]]
  if[0 = #out; :()]
  :alllocal[out]
}

/ looks for routes but stop at the first milestone
/ This looks in tcmatimportant (which are the paths whose
/ product and substrates are all important)
alphafindroutesstop:{[alphasource; alphadest]
   z: ()
   x: alphafindroutes[alphasource; alphadest]
   relmiles:mile.milestones _dv alphasource
   i: 0
   while[i < #x
        jj: &/ x[i] ?/: relmiles
        if[jj < #x[i]
                x[i]@: !1+jj
        ]
        if[(|,/x[i])[0] = alphadest
          z,: ,x
        ]        
        i+: 1
   ]  
   :?z
} 

/ looks for routes but stop at the first milestone
/ This looks in tcmatimportant (which are the paths whose
/ product and substrates are all important)
alphafindroutesimportantstop:{[alphasource; alphadest]
   x: alphafindroutesimportant[alphasource; alphadest]
   relmiles: mile.milestones _dv alphasource
   i: 0
   z: ()
   while[i < #x
	jj: &/ x[i] ?/: relmiles
	if[jj < #x[i]
		x[i]@: !1+jj
	]
	if[(|,/x[i])[0] = alphadest
          z,: ,x
        ]
        i+: 1
        
   ]
   :?z
}

/ ??? Peter: this looks only at milestones, but may not capture
/ the leaves properly
alphafindroutesimportantstop:{[alphasource; alphadest]
  / if[alphadest = `"00108";!-1]
  source: all _bin alphasource
  dest: all _bin alphadest
  out: :[allroutesflag
  	findallroutes[ source; dest;  tcmatimportantstop]
  	findroutes[ source; dest; (); tcmatimportantstop]]
  if[0 = #out; :()]
  :all[out]
}

/ find all routes from all sources to a destination
alphahowproduce:{[alphadest]
  dest: all _bin alphadest
  out: & tcmat[;dest] < maxnum
  :all[out]
}


/ find all substrate predecessors of the destination
/ Always on the complete matrix
alphaallroutesold:{[alphadest]
  dest: all _bin alphadest
  out: & tcmat[;dest] < maxnum / all ways to reach dest
  myroots: intersect[out; allroots]
  :,/alphafindroutes[;all[dest]]'all[myroots]
}

/ get all those that lead to some alphadest
alphaallsources:{[alphadest]
  dest: all _bin alphadest
  out: & tcmat[;dest] < maxnum / all ways to reach dest
  outnum: tcmat[out;dest]
  :all[out[>outnum]] / from farthest to closest
}

/ get all those that lead to some alphadest but are important
alphaallsourcesimportant:{[alphadest]
  dest: all _bin alphadest
  out: & (tcmatimportant[;dest] < maxnum) & (~0 = tcmatimportant[;dest])  / all ways to reach dest
  outnum: tcmatimportant[out;dest]
  :all[out[>outnum]] / from farthest to closest
}

/ get all those that start from some source
alphaalldests:{[alphasource]
  source: all _bin alphasource
  out: & tcmat[source] < maxnum / all ways to get to a dest
  outnum: tcmat[source;out] / actual numbers
  :all[out[>outnum]] / from farthest to closest
}

/ get all those that start from some source up to some limit
alphaalldestslimit:{[alphasource; limit]
  source: all _bin alphasource
  out: & tcmat[source] < limit+1 / all ways to get to a dest
  outnum: tcmat[source;out] / actual numbers
  :all[out[>outnum]] / from farthest to closest
}

/ get all those that start from some source
alphaalldestsimportant:{[alphasource]
  source: all _bin alphasource
  out: & tcmatimportant[source] < maxnum / all ways to get to a dest
  outnum: tcmatimportant[source;out] / actual numbers
  :all[out[>outnum]]
}


/ limit is some number. Want only limit levels before alphadest
alphaallrouteslimit:{[alphadest; limit]
  paths: alphaallroutes[alphadest]
  rpaths: |:' paths
  out: ()
  i: 0
  while[i < #rpaths
	count: (#rpaths[i]) & limit
	out,: ,|rpaths[i;!count]
	i+: 1
  ]
  out?:
  :out
}


/ Is there some route from a root to this
alphahasroute:{[alphadest]
  dest: all _bin alphadest
  out: & tcmat[;dest] < maxnum / all ways to reach dest
  :hasintersect[out; allroots]
}

/ find roots in the transitive closure matrix
findroots:{[mat]
  myroots: ()
  i: 0
  while[i < #mat[0] / all columns
	col: mat[;i]
	col: col _dv 0 / take away self-loop
	if[maxnum = &/ col
		myroots,: i
	]
	i+: 1
  ]
  :myroots
}

/ find leaves in the transitive closure matrix
findleaves:{[mat]
  myleaves: ()
  i: 0
  while[i < #mat[;0] / all rows
	row: mat[i]
	row: row _dv 0 / take away self-loop
	if[maxnum = &/ row
		myleaves,: i
	]
	i+: 1
  ]
  :myleaves
}

/ find the name of the product
findname:{[symbol]
  ii: & prodsubstrate.id = symbol
  :prodsubstrate.name[ii]
}

findfirstname:{[symbol]
  ii: & prodsubstrate.id = symbol
  if[0 = #ii; :()]
  :prodsubstrate.name[*ii]
}


findcommonname:{[symbol]
  ii: & (prodsubstrate.id = symbol)
  if[0 = #ii; :()]
  jj: & (prodsubstrate.id = symbol) & (prodsubstrate.common = `yes)
  if[0 = #jj; :prodsubstrate.name[*ii]] / no common one, return first one
  :prodsubstrate.name[*jj] / return common one
}

/ This outputs the
/the name that matches the symbol
findcommonnamesymbol:{[symbol]
   name: findcommonname[symbol]
   :(symbol;name)
}      

/ When bringing in prodsubstrate, create a new column
/ prodsubstrate.lowername: lower'' prodsubstrate.name
/ find substrate ids having name as a substring
findsymbol:{[name]
  ii: & prodsubstrate.lowername _sm\: ("*"),(lower'$name),("*")
 :((` $ prodsubstrate.name[ii]),' prodsubstrate.id[ii])
}


/ find substrate ids having name as a substring
findsymbolold:{[name]
  ii: & prodsubstrate.name _sm\: ("*"),($name),("*")
 :((` $ prodsubstrate.name[ii]),' prodsubstrate.id[ii])
}


/ find the gene from the affy
findgene:{[affyid] affygene.gene[&affygene.affy = affyid]}

/ findaffy finds the affyid from the gene
findaffy:{[gene] affygene.affy[& affygene.gene = gene]}

/ findreactionsthatuse finds reactions that use molecule
/ as a substrate
findenzymesthatuse:{[symbol]
   ii: & reactionsubstrate.substrateid = symbol
   jj: & reactionenergy.reactionid _in\: reactionsubstrate.reactionid[ii]
   :reactionenergy.enzyme[jj]
}
/findenzymesthatproduce finds enzyme that have a molecule as a product
findenzymesthatproduce:{[symbol]
    ii: & reactionproduct.productid = symbol
    jj: & reactionenergy.reactionid _in\: reactionproduct.reactionid[ii]
    :reactionenergy.enzyme[jj]
}


/ find enzyme(s) with gene geneenzyme( gene, enzyme)
findenzyme:{[gene]
  ii: & geneenzyme.gene = gene
  :geneenzyme.enzyme[ii]
}

findgenefromenzyme:{[enzyme]
  ii: & geneenzyme.enzyme = enzyme
  :geneenzyme.gene[ii]
}

/ reactionenergy( reactionid, enzyme, freeenergysymbolic)
/ find reactionid from enzyme
findreaction:{[enzyme]
  ii: & reactionenergy.enzyme = enzyme
  :reactionenergy.reactionid[ii]
}

findsubstratefromreaction:{[reactionid]
  ii: & reactionsubstrate.reactionid = reactionid
  :reactionsubstrate.substrateid[ii]
}


findproductfromreaction:{[reactionid]
  ii: & reactionproduct.reactionid = reactionid
  :reactionproduct.productid[ii]
}

/finds the expression value for a gene given the affyid using expression.csv

findexpressionvalue:{[affyid]
  ii: & expression.affyid = affyid
  :expression.value[ii]
}
/findexpressionfromgene find the expression value for a gene directly
findexpressionfromgene:{[gene]
   ii: & affygene.gene = gene
  jj: & expression.affyid _in\: affygene.affy[ii]
  : (expression.value[jj] ,\: gene)
}

/findexpressionfromenzyme find the expression value from an enzyme name
findexpressionfromenzyme:{[enzyme]
    ii: & geneenzyme.enzyme = enzyme
  jj: & affygene.gene _in\: geneenzyme.gene[ii]
  kk: & expression.affyid _in\: affygene.affy[jj]
 : (expression.value[kk] ,\: enzyme)
}

/ I've added a if statement (currently deactivated by a /).  This statement
/will return all reactionids, myenzymes, and genes if every gene doesn't
/have an affy, but still returns info on genes w/ an affy.  This creates 
/redundant info in the output.  I'd like that redundancy eliminated:  that
/is out put only the complete set of info (including expression value for
/gene with an affy
 findexpressionfromreactionold:{[reactionid]
  hh: & reactionenergy.reactionid = reactionid
  out: ()
  i: 0
  while[i < #hh
 	myenzyme: reactionenergy.enzyme[hh[i]]
 	ii: & geneenzyme.enzyme = myenzyme
 	jj: & affygene.gene _in\: geneenzyme.gene[ii]
 	noaffygene: differ[geneenzyme.gene[ii]; affygene.gene[jj]]
 	if[0 <  #noaffygene 
		out,: reactionid,/: myenzyme,/: noaffygene
	]
	j: 0
	while[j < #jj
  		mygene: affygene.gene[jj[j]]
 		qq: & affygene.gene = mygene
     		kk: & expression.affyid _in\: affygene.affy[qq]
  		out,: reactionid,/: myenzyme,/:mygene,/:(expression.value[kk] )
  		j+: 1
	]
	i+: 1
  ]
  :out
}
/ finds regulated things (proteins, genes, molecules, proteins from genes) based on the x.ysymbol (e.g. proteins.prosymbol) 
/ generated by createcombofiles
findregtypeweb:{[reg; type; filename]
     ii: ()
     out: ()
     out,: createheader[filename]
     if[(type = `"gene") | (type = `"protein_from_gene")
         if[reg = `I
            reg_type: `"Up Regulated" 
            ii: & expression.expsymbol = `UR
         ]
         if[reg = `R
            reg_type: `"Down Regulated"
            ii: & expression.expsymbol = `DR
         ]
         if[reg = `NC
            reg_type: `"Not Changed"
            ii: & expression.expsymbol = `NC
          ]
     ]
     if[~0 = #ii
         if[type = `"gene"
            affyid: expression.affyid[ii]
            call: expression.call[ii]
            value: expression.value[ii]
            expsymbol: expression.expsymbol[ii]
            gene: findgene' affyid
            jj: #:'gene 
            ll: & jj = 0
            gene[ll]: `"none entered"
            protein: findenzyme' ,/gene
            jj: #:'gene
            ll: & jj = 0
            gene[ll]: `"none entered"
            if[~0 = (#proteins.protein)
                mm: intersectleftindexes[proteins.protein]'protein
            ]
            jj: #:'protein
            ll: & jj = 0
            protein[ll]: `"none entered"
            ll: & jj = 2
            protein_2: spitand' protein[ll]
            protein_2: ` $ protein_2
            protein[ll]: protein_2
            gene: ,/gene
            protein: ,/protein
            if[0 = (#proteins.protein)
               out,: ,($STB1), ($SR), ($SC), "Affymetrix ID", ($EC), ($SC), "Gene Expression Call", ($EC), ($SC), "P/A Call", ($EC), ($SC), "Gene Value", ($EC), ($SC), "NCBI gene name", ($EC), ($SC), "Protein(s)", ($EC), ($ER)
               out,: ,(SR),/:(SC),/:(affyid),'(EC),/:(SC),/:(expsymbol),'(EC),/:(SC),/:(call),'(EC),/:(SC),/:(value),'(EC),/:(SC),/:(gene),'(EC),/:(SC),/:(protein),\:(EC)
               out,: ,($ET) 
            ]
            if[~0 = (#proteins.protein)
              jj: #:'mm 
              ll: & jj = 0
              proteinvalue: protein
              proteinsymb: protein
              proteinvalue[ll]: `"no data"
              proteinsymb[ll]: `"no data"
              kk: & jj > 0
              proteinvalue[kk]: proteins.provalue[mm[kk]]
              proteinsymb[kk]: proteins.prosymbol[mm[kk]]
              kk: & jj > 1
              temp_value: spitand' proteins.provalue[kk]
              temp_symb: spitand' proteins.prosymbol[kk]
              temp_value: ` $ temp_value
              temp_symb: ` $ temp_symb
              proteinvalue[kk]: temp_value
              proteinsymb[kk]: temp_symb
              out,: ,($STB1), ($SR), ($SC), "Affymetrix ID", ($EC), ($SC), "Gene Expression Call", ($EC), ($SC), "P/A Call", ($EC), ($SC), "Gene Value", ($EC), ($SC), "NCBI Gene Name", ($EC), ($SC), "Protein(s)", ($EC), ($SC), "Protein Value", ($EC), ($SC), "Protein Expression Call", ($EC), ($ER)
              out,: ,(SR),/:(SC),/:(affyid),'(EC),/:(SC),/:(expsymbol),'(EC),/:(SC),/:(call),'(EC),/:(SC),/:(value),'(EC),/:(SC),/:(gene),'(EC),/:(SC),/:(protein),'(EC),/:(SC),/:(proteinvalue),'(EC),/:(SC),/:proteinsymb,\:(EC)                            
              out,: ,($ET)
          ]
          out,: createtrailer[] 
          out: ` $ ($out)
          out: ,//out
          out: ,:'out
          :spit' out
       ]   
       if[type = `"protein_from_gene"
          jj: intersectbothindexesdup[affygene.affy; expression.affyid[ii]]
          jj: ?jj[;0]
          kk: intersectbothindexesdup[geneenzyme.gene; affygene.gene[jj]]
          kk: ?kk[;0]
          enzyme: geneenzyme.enzyme[kk]
          ll: intersectleftindexesdup[geneenzyme.enzyme; enzyme]
          total_enzyme: geneenzyme.enzyme[ll]
          total_uniqs: ?total_enzyme
          total_counts: #:' = total_enzyme
          uenzyme: ?enzyme
          counts: #:' = enzyme
          mm: < uenzyme
          uenzyme@: mm
          counts@: mm
          nn: < total_uniqs
          total_counts@: nn
          if[0 = (#proteins.protein)
             out,: ,($STB1), ($SR), ($SC), "Protein", ($EC), ($SC), "Number of Genes in Genome", ($EC), ($SC), "Number of", ($reg_type), "Genes", ($EC), ($ER)
             out,: ,(SR),/:(SC),/:(uenzyme),'(EC),/:(SC),/:(total_counts),'(EC),/:(SC),/:(counts),\:(EC)
             out,: ,($ET)
         ]
         if[~0 = (#proteins.protein)
            ll: intersectbothindexes[proteins.protein; uenzyme]
            oo: intersectleftindexes[uenzyme; uenzyme]
            proteinsymb: uenzyme
            proteinsymb[oo]: `"no data"
            proteinvalue: uenzyme
            proteinvalue[oo]: `"no data"
            temp_value: proteins.provalue[ll[;0]]
            temp_symbol: proteins.prosymbol[ll[;0]]
            proteinvalue[ll[;1]]: temp_value
            proteinsymb[ll[;1]]: temp_symbol
            out,: ,($STB1), ($SR), ($SC), "Protein", ($EC), ($SC), "Number of Genes in Genome", ($EC), ($SC), "Number of", ($reg_type), "Genes", ($EC), ($SC), "Protein Expression", ($EC), ($SC), "Protein Value", ($EC), ($ER)                
            out,: ,(SR),/:(SC),/:uenzyme,'(EC),/:(SC),/:(total_counts),'(EC),/:(SC),/:(counts),'(EC),/:(SC),/:(proteinsymb),'(EC),/:(SC),/:(proteinvalue),\:(EC)         
         ]
         out,: createtrailer[]
         out: ` $ ($out)
         out: ,//out
         out: ,:'out
         :spit' out        
      ]
   ]
}

findexpressionfromreaction:{[reactionid]
  hh: & reactionenergy.reactionid = reactionid
  out: ()
  i: 0
  while[i < #hh                             
    myenzyme: reactionenergy.enzyme[hh[i]]
    myenzyme,: ()
    ii: & geneenzyme.enzyme _in\:  myenzyme
    if[0 = #ii
       mygene: `"there is no gene in Arabidopsis||"
       out,: spit'reactionid,/: myenzyme,'mygene
    ]
    jj: & affygene.gene _in\: geneenzyme.gene[ii]
    noaffygene: differ[geneenzyme.gene[ii]; affygene.gene[jj]]
    if[0 < #noaffygene
       hold: `"|"
       out,: spit'reactionid,/: myenzyme,/: noaffygene,'hold
    ]
   j: 0
   genes: ?affygene.gene[jj]
   while[j < #genes
    mygene: genes[j]
     qq: & affygene.gene = mygene
     kk: & expression.affyid _in\: affygene.affy[qq]
     out,: spit'reactionid,/:myenzyme,/:mygene,/:(expression.value[kk]),/:(expression.expsymbol[kk])
     j+: 1
   ]
  i+: 1
 ]
 :out
}                              


/ findroutesfromaffy
/ starting from the affyid, find the gene, enzyme, reaction
/ Look at substrates and find the destinations of all those substrates.
findroutesfromaffy:{[af]
 x: findsubstratefromreaction',/findreaction'findenzyme[findgene[af]]
 i:0
 out: ()
 while[i < #x
   zz: ,/ x[i],\:/: allleavesalpha
   routes: alphafindroutes'[zz[;0];zz[;1]]
   jj: & 0 < #:'routes
   / out,: ,(findname'x[i]; findname'''*:'routes[jj])
   out,: ,(findfirstname'x[i]; findfirstname'''*:'routes[jj])
   i+: 1
 ]
:out
}


findreactionfrompair:{[pair]
ii: & (directedge.substrateid = pair[0]) & (directedge.productid = pair[1])
:,/(directedge.reactionid[ii]; pair[0]; pair[1])
}

/ given a path of substrate, product pairs, find the reaction ids
findreactionsfrompath:{[path]
pairs: ((-1) _ path) ,' (1 _ path)
:findreactionfrompair'pairs
}

findenzymefromreaction:{[reactionid]
ii: &reactionenergy.reactionid = reactionid
:reactionenergy.enzyme[ii]
}

/Finds info on affys with expression below a value
findaffysbelow:{[value]
  hh: & expression.value < value
  :(expression.affyid[hh] ,' expression.value[hh])
}

/Finds info on affys with expression below a value
findaffysabove:{[value]
  hh: & expression.value > value
  :(expression.affyid[hh] ,' expression.value[hh])
}

/ second argument is 1 if you want detail and 0 otherwise
findenzymesabove:{[value; detailflag]
  :findenzymesval[value; `above; detailflag]
}

/ second argument is 1 if you want detail and 0 otherwise
findenzymesbelow:{[value]
  :findenzymesval[value; `below; detailflag]
}


/ call findaffysbelow to see whether there are affys
/ with values below the argument.
/ Then see whether there are genes and enzymes. 
/ If so, then present those.
/ e.g. findenzymes[2;`above]
/ third argument is 1 if you want detail and 0 otherwise
findenzymesval:{[value; abovebelow; detailflag]
  if[abovebelow = `below
  	x: findaffysbelow[value]
  ]
  if[abovebelow = `above
  	x: findaffysabove[value]
  ]
  y: differ[x[;0]; affygene.affy]
  ii: intersectleftindexes[x[;0]; y] / those for which we have no gene
  out: x[ii]
  jj: intersectleftindexes[x[;0]; affygene.affy]
  if[0 = #jj; :out]
  x@: jj / only those with a gene
  kk: intersectleftindexes[affygene.affy; x[;0]]
  goodgenes: affygene.gene[kk] / find those genes
  x: goodgenes ,' x
  / now find genes that are not in enzymes
  y: differ[x[;0]; geneenzyme.gene]
  ii: intersectleftindexesdup[x[;0]; y] / those for which there is no enzyme
  out,: x[ii]
  jj: intersectleftindexesdup[x[;0]; ?geneenzyme.gene]
	/ if there are duplicate genes on the left side, we will find them
  if[0 = #jj; :out]
  x@: jj
  kk: intersectleftindexesdup[geneenzyme.gene; ?x[;0]] 
	/ genes that have an enzyme
  goodenzymes: geneenzyme.enzyme[kk] / the enzymes
  goodgenes: geneenzyme.gene[kk]
  xout: ()
  i: 0
  while[i < #x
	jj: & goodgenes = x[i;0]
	xout,: goodenzymes[jj],\: x[i]
	i+: 1
  ]
  x: xout
  / now find enzymes that are not in reactionenergy.enzyme
  y: differ[(?x[;0]); reactionenergy.enzyme]
  ii: intersectleftindexesdup[x[;0]; y] / those having no enzyme
	/ in reactionenergy.enzyme
  out,: x[ii]
  jj: intersectleftindexesdup[x[;0]; ?reactionenergy.enzyme]
  if[0 = #jj; :out]
  x@: jj
  kk: intersectbothindexesdup[reactionenergy.enzyme; x[;0]] 
	/ enzymes that have a reactionenergy
  goodrxns: reactionenergy.reactionid[kk[;0]]
  zz: !#x
  zz: differ[zz; kk[;1]]
  out,: x[zz]
  x@: kk[;1]
  pathout: ()
  i: 0
  while[(i < #x) & detailflag
	qq: findimportantinfofromreaction[goodrxns[i]]
	pathout,: ,x[i]
	pathout,: ,,goodrxns[i]
	pathout,: qq
	i+: 1
  ]
  out,: :[detailflag; pathout; x]
  :spit'out
}


/New by PMP find symbol that exactly matches name.  Seems to be working.
findsymbolexact:{[name]
  ii: & prodsubstrate.name _sm\: name
  : prodsubstrate.id[ii] 
}


/finds a common name of a gene from gene
findgene_common:{[gene]
  ii: & geneenzyme.gene = gene
  :geneenzyme.gene_common[ii]
}

/find gene from gene_common
findgenefromgene_common:{[gene_common]
  ii: & geneenzyme.gene_common = gene_common
  :geneenzyme.gene[ii]
}

/find gene from gene_common
findgenefromgene_common:{[gene_common]
  ii: & geneenzyme.gene_common = gene_common
  :geneenzyme.gene[ii]
}

/finds the localization of an enzyme from gene
findlocfromgene:{[gene]
  ii: & geneenzyme.gene = gene
  :geneenzyme.localization[ii]
}

/finds the localization of an enzyme from gene
findlocfromgene:{[gene]
  ii: & geneenzyme.gene = gene
  :geneenzyme.localization[ii]
}

/find the localization of an enzyme from gene_common
findlocfromgene_common:{[gene_common]
  ii: & geneenzyme.gene_common = gene_common
  :geneenzyme.localization[ii]
}

recordtime:{[string]
  x: _ltime _t
  y: (string) , (": "), ($x[1])
  :y
}

/ New from PMP 2/28/02


/finds expression symbol
findexpressionsymbol:{[affyid]
  ii: & expression.affyid = affyid
  :expression.expsymbol[ii]
}

findcall:{[affyid]
  ii: & expression.affyid = affyid
  :expression.call[ii]
}


whatcanbemadelimit:{[symbol;limit]
  paths: whatcanbemade[symbol]
  out: ()
  i: 0
  while[i < #paths
	count: (#paths[i]) & limit
	out,: ,paths[i;!count]
	i+: 1
  ]
  out?:
  :out
}


/ NEW 3/15/02


/ NEW 3/22/02

/ finds routes to a molecule using tcmat important
alphaallroutesimportant:{[alphadest]
  dest: all _bin alphadest
  out: & tcmatimportant[;dest] < maxnum / all ways to reach dest
  myroots: intersect[out; allroots]
  :squeezesuffix ,/alphafindroutesimportant[;all[dest]]'all[myroots]
}

/ Is there some route from a root to this
/ NOT SURE STILL USEFUL
alphahasrouteimportant:{[alphadest]
  dest: all _bin alphadest
  out: & tcmatimportant[;dest] < maxnum / all ways to reach dest
  :hasintersect[out; allroots]
}


 / finds routes to a molecule using alphaallroutesimportant, but then cuts
/ the routes off at a milestone if there is one in the route
alphaallroutesimportantstopold:{[alphadest]
  paths: alphaallroutesimportant[alphadest]
  rpaths: |:' paths
  i: 0
  out: ()
  while[i < #rpaths
       jj: intersectleftindexes[1 _ rpaths[i]; mile.milestones]
       if[0=#jj
                out,: ,paths[i] / if there is no milestone, give me path
       ]
       if[0<#jj
               jj: (&/jj) + 1
               out,: ,|rpaths[i;!jj+1]
       ]
       i+: 1
  ]
  out?: / eliminate dups
  :out
}

 / finds routes to a molecule using alphaallroutesimportant, but then cuts
/ the routes off at a milestone if there is one in the route
alphaallroutesimportantstop:{[alphadest]
  dest: all _bin alphadest
  out: & tcmatimportantstop[;dest] < maxnum / all ways to reach dest
  myroots: intersect[out; allroots,allmilesnumbers]
  / y:,/alphafindroutesimportant[;all[dest]]'all[myroots]
  y:,/alphafindroutesimportantstop[;all[dest]]'all[myroots]
  out: ()
  i: 0
  while[i < #y
	kk: mile.milestones ?/: y[i]
	ind: &kk < #mile.milestones
	mymiles: (y[i])[ind]
	if[((#?mymiles) = #mymiles) & (3 > #mymiles) 
		out,: ,y[i]
	]
	i+: 1
  ]
  / :squeezesuffix[?,/out]
  :squeezesuffix[?out]
}


/ finds expression info on the enzymes in the paths generated by
/ alphaallroutesimportantstop, but only enzymes in which the prod and
/ substrate were given an importance of 1
findexpinfobiosynold:{[alphadest]
       out: ()
      x: alphaallroutesimportantstop[alphadest]
      i: 0
      while[i < #x
 	  out,: ,"New Path "
          ii: ()
          pairs: ((-1) _ x[i]) ,' (1 _ x[i])
          j: 0
          while[j < #pairs
                       ii: & (reactionsubstrate.substrateid =  pairs[j;0]) & (reactionsubstrate.importance = 1)
                       goodsub:  reactionsubstrate.reactionid[ii]
                       jj: & (reactionproduct.productid =  pairs[j;1]) & (reactionproduct.importance = 1)
                      goodprod: reactionproduct.reactionid[jj]
                      goodrxn: intersect[goodprod; goodsub]
			/ for this substrate-product pair, may be several
			/ reaction ids where the pair is important.
                      expinfo: findexpressionfromreaction' goodrxn
                      subname: findcommonname[pairs[j;0]]
                       prodname: findcommonname[pairs[j;1]]
                       out,: ,($subname), (" to "), ($prodname)
                      out,: spit'expinfo
                       j+: 1
                      ]
                  out,: ,"New Path"
                 i+: 1
           ]
   :out
}

/ finds expression info on the enzymes in the paths generated by
/ alphaallroutesimportantstop, but only enzymes in which the prod and
/ substrate were given an importance of 1

findregfromgenelist:{[list; reg; method; val1; val2; use_or_not]
     out: ()
     ii: intersectleftindexes[affygene.gene; list]
     if[method = `affy
       if[use_or_not = `not
          if[reg = `I
            jj: & (expression.expsymbol = `I) | (expression.expsymbol = `MI)
            nn: & affygene.affy _in\: expression.affyid[jj]
            oo: intersect[ii; nn]  
            out: ?affygene.gene[oo]
          ]
          if[reg = `D
            jj: & (expression.expsymbol = `D) | (expression.expsymbol = `MD)
            nn: & affygene.affy _in\: expression.affyid[jj]
            oo: intersect[ii; nn]
            out: ?affygene.gene[oo]
          ]
          if[reg = `NC
             jj: & (expression.expsymbol = `NC)
              nn: & affygene.affy _in\: expression.affyid[jj]
            oo: intersect[ii; nn]
            out: ?affygene.gene[oo]
          ]
       ]
       if[use_or_not = `use 
          if[reg = `I
            jj: & (expression.expsymbol = `I) | (expression.expsymbol = `MI) 
            kk: & expression.value > val1
            ll: intersect[jj; kk]
            nn: & affygene.affy _in\: expression.affyid[ll]
            oo: intersect[ii; nn]
            out: ?affygene.gene[oo]
          ]
          if[reg = `D
            jj: & (expression.expsymbol = `D) | (expression.expsymbol = `MD)
            kk: & expression.value < val2
            ll: intersect[jj; kk]
            nn: & affygene.affy _in\: expression.affyid[ll]
            oo: intersect[ii; nn]
            out: ?affygene.gene[oo]
          ]
          if[reg = `NC
            jj: & expression.expsymbol = `NC
            kk: & (expression.value > val2) | (expression.value = val2)
            nn: & (expression.value < val1) | (expression.value = val1)
            kk: intersect[kk; nn]
            ll: intersect[jj; kk]
            nn: & affygene.affy _in\: expression.affyid[ll]
            oo: intersect[ii; nn]
            out: ?affygene.gene[oo]
          ]
       ]
    ]
    if[method = `fold_change
       if[reg = `I
          jj: & expression.value > val1
          kk: &  affygene.affy _in\: expression.affyid[jj]
          kk: intersect[ii; kk]
          out: ?affygene.gene[kk]
       ]
       if[reg = `D
          jj: & expression.value < val2
          kk: &  affygene.affy _in\: expression.affyid[jj]
          kk: intersect[ii; kk]
          out: ?affygene.gene[kk]
       ]
       if[reg = `NC
         jj: & (expression.value < val1) | (expression.value = val1)
         ll: & (expression.value > val2) | (expression.value = val2)
         jj: intersect[jj; ll]
         kk: &  affygene.affy _in\: expression.affyid[jj]
          kk: intersect[ii; kk]
          out: ?affygene.gene[kk]
       ]
  ]
  :out
}


findexpinfobiosyn:{[alphadest; method; val1; val2; use_or_not]   
      firstsub: ()
      lastprod: ()
      out: ()
      num_steps: ()
      num_rxn: ()
      tg: ()
      indgene: ()
      ncgene: ()
      repgene: ()
      x: alphaallroutesimportantstop[alphadest]
      i: 0
      while[i < #x
          firstsub,:` $ findcommonname[x[i;0]]
          rx: |x[i]
          lastprod,: ` $ findcommonname[rx[0]]
          j: 0
          out,: , "Pathway ", ($(i+1)), ": ", ($firstsub[i]), " to ", ($lastprod[i])
          rxns: findreactionsfrompathimportant[x[i]]
          num_steps,: #rxns
          frxns: intersect[reactionenergy.reactionid; ,/rxns]
          num_rxn,: #frxns
          enz: findenzymefromreaction' frxns 
          gene: ?,/findgenefromenzyme' ,/enz
          indgene,: ,(findregfromgenelist[gene; `I; method; val1; val2; use_or_not])
          repgene,: ,(findregfromgenelist[gene; `D; method; val1; val2; use_or_not])
          ncgene,: ,(findregfromgenelist[gene; `NC; method; val1; val2; use_or_not])         
          tg,: ,(gene)
          while[j < #rxns
             sub: findcommonname' rxns[j;0]
             prod: findcommonname' rxns[j;1]
             out,: sub " to " prod,
             k: 2
               while[k < #rxns[j]
                  expinfo: findexpressionfromreaction' rxns[j;k]
                  out,: ,expinfo
                  k+: 1
               ]
             j+: 1  
          ]
          i+: 1
      ]
   i: 0
   out1: , ("Pathway #|# of steps in pathway|# of reactions in pathway|# of genes that encode enzymes in the pathway|# of induced genes|# of repressed gene|#not changed genes")
   while[i < #num_steps
      out1,: , ($(i+1)), ("|"), ($num_steps[i]), ("|"), ($num_rxn[i]), ("|"), ($(#tg[i])), ("|"), ($(#indgene[i])), ("|"), ($(#repgene[i])), ("|"), ($(#ncgene[i]))
      i+: 1
   ]
   out1,: out
   :out1
}

/finds expression  info for enzymes that make a molecule for an entire path
findreactionsfrompathimportant:{[path]
  out: ()
  pairs: ((-1) _ path) ,' (1 _ path)
  j: 0
    while[j < #pairs
      ii: & (reactionsubstrate.substrateid =  pairs[j;0]) & (reactionsubstrate.importance = 1)
      goodsub:  reactionsubstrate.reactionid[ii]
      jj: & (reactionproduct.productid =  pairs[j;1]) & (reactionproduct.importance = 1)
      goodprod: reactionproduct.reactionid[jj]
      goodrxn: intersect[goodprod; goodsub]
      out,: ,pairs[j], goodrxn
      j+: 1
    ]
:out
}
/finds expression info for enzymes on where a molecule can go for an entire path
findexpinfobioprod:{[alphadest; method; val1; val2; use_or_not]
      firstsub: ()
      lastprod: ()
      out: ()
      num_steps: ()
      num_rxn: ()
      tg: ()
      indgene: ()
      ncgene: ()
      repgene: ()
      x: whatcanbemadeimportantstop[alphadest]
      i: 0
      while[i < #x
          firstsub,:` $ findcommonname[x[i;0]]
          rx: |x[i]
          lastprod,: ` $ findcommonname[rx[0]]
          j: 0  
          out,: ,"Pathway ", ($(i+1)), ": ", ($firstsub[i]), " to ", ($lastprod[i])
          rxns: findreactionsfrompathimportant[x[i]]
          num_steps,: #rxns
          frxns: intersect[reactionenergy.reactionid; ,/rxns]
          num_rxn,: #frxns
          enz: findenzymefromreaction' frxns
          gene: ?,/findgenefromenzyme' ,/enz
          indgene,: ,(findregfromgenelist[gene; `I; method; val1; val2; use_or_not])
          repgene,: ,(findregfromgenelist[gene; `D; method; val1; val2; use_or_not])
          ncgene,: ,(findregfromgenelist[gene; `NC; method; val1; val2; use_or_not])
          tg,: ,(gene)
          while[j < #rxns
             sub: findcommonname' rxns[j;0]
             prod: findcommonname' rxns[j;1]
             out,: sub " to " prod,
             k: 2
               while[k < #rxns[j]
                  expinfo: findexpressionfromreaction' rxns[j;k]
                  out,: ,expinfo
                  k+: 1
               ]
             j+: 1
          ]
          i+: 1
      ]
      i: 0
      out1: , ("Pathway #|# of steps in pathway|# of reactions in pathway|# of genes that encode enzymes in the pathway|# of induced genes|# of repressed gene|#not changed genes")
      while[i < #num_steps
        out1,: , ("Pathway "), ($(i+1)), ("|"), ($num_steps[i]), ("|"), ($num_rxn[i]), ("|"), ($(#tg[i])), ("|"), ($(#indgene[i])), ("|"), ($(#repgene[i])), ("|"), ($(#ncgene[i]))
        i+: 1
   ]
   out1,: out
   :out1
}


/ get the stoichiometry of a reaction
viewreaction:{[rxn]
  out: ()
  ii: & reactionsubstrate.reactionid = rxn
  jj: & reactionproduct.reactionid = rxn
  subs: ($reactionsubstrate.stoich[ii]),' ("X"),/: (findcommonname'reactionsubstrate.substrateid[ii])
  strsub: (-3) _ ,/subs ,\: (" + ")
  prods: ($reactionproduct.stoich[jj]),' ("X"),/: (findcommonname'reactionproduct.productid[jj])
  strprod: (-3) _ ,/prods ,\: (" + ")
  :strsub, (" --> "), strprod
}


/ finds what can be made from a molecule, but only uses alphafindroutesimportant
whatcanbemadeimportant:{[substrate]
  routes: alphafindroutesimportant[substrate]'allleavesalphaimportant
  ii: & 0 < #:' routes
  :squeezeprefix ,/routes[ii]
}


/ same as above, but stops at first milestone
whatcanbemadeimportantstop:{[alphasource]
  source: all _bin alphasource
  out: & tcmatimportantstop[source] < maxnum / all ways to leave from source
  mydests: intersect[out; allleavesimportant,allmilesnumbers]
  / y:,/alphafindroutesimportant[all[source]]'all[mydest]
  y:,/alphafindroutesimportantstop[all[source]]'all[mydest]
  out: ()
  i: 0
  while[i < #y
	kk: mile.milestones ?/: y[i]
	ind: &kk < #mile.milestones
	mymiles: (y[i])[ind]
	if[((#?mymiles) = #mymiles) & (3 > #mymiles) 
		out,: ,y[i]
	]
	i+: 1
  ]
  :squeezeprefix[?out]
}

/ Find the pathways for important reactions


findreactionsthatuseimportant:{[symbol]
   ii: & (reactionsubstrate.substrateid = symbol) & (reactionsubstrate.importance = 1)
   :reactionsubstrate.reactionid[ii]
}

findreactionsthatuse:{[symbol]
   ii: & reactionsubstrate.substrateid = symbol
   :reactionsubstrate.reactionid[ii]
}

findreactionsthatproduce:{[symbol]
    ii: & reactionproduct.productid = symbol
    :reactionproduct.reactionid[ii]
}

/findreactionsthatproduce finds enzyme that have a molecule as a product
findreactionsthatproduceimportant:{[symbol]
    ii: & (reactionproduct.productid = symbol) & (reactionproduct.importance = 1)
    :reactionproduct.reactionid[ii]
}


importantsubstratesfromreaction:{[rxn]
     ii: & (reactionsubstrate.reactionid = rxn) & (reactionsubstrate.importance = 1)
     :reactionsubstrate.substrateid[ii]
}

importantproductsfromreaction:{[rxn]
     ii: & (reactionproduct.reactionid = rxn) & (reactionproduct.importance = 1)
     :reactionproduct.productid[ii]
}


/ for reaction rxn, find the sources of each substrate and 
/ the end products of each product and put them out.
findimportantinfofromreaction:{[rxn]
        subs: importantsubstratesfromreaction[rxn]
        i: 0
	outsubs: ()
	while[i < # subs
	  sub_routes: alphaallroutesimportantstop[subs[i]]
	  if[0 < # sub_routes
		outsubs,:(subs[i]),/:(sub_routes[;0]),'(#:'sub_routes)
	  ]
	  i+: 1
	]
	if[0 < #outsubs
		outsubs[;0]: findcommonname'outsubs[;0]
		outsubs[;1]: findcommonname'outsubs[;1]
	]
	prods: importantproductsfromreaction[rxn]
	i: 0
	outprods: ()
	while[i < #prods
	  prod_routes: whatcanbemadeimportantstop[prods[i]]
	  if[0 < # prod_routes
	   outprods,:(prods[i]),/:(*:'|:'prod_routes),'(#:'prod_routes)
	  ]
	  i+: 1
	]
	if[0 < #outprods
		outprods[;0]: findcommonname'outprods[;0]
		outprods[;1]: findcommonname'outprods[;1]
	]
	x: (,,`"Substrates: "), outsubs, (,,`"Products: "), outprods
	:x
}


/ finds all the enzymes that are in the same path as a given enzyme
findpathenzymes:{[enz]
   enzymesubs: ()
   enzymeprods: ()
   rxnids: findreaction[enz]
   i: 0
   while[i < #rxnids
     subs: findsubstratefromreaction[rxnids[i]]
     j: 0
     while[j < #subs
       path: alphaallroutesimportantstop[subs[j]]
       k: 0
       while[k < #path
         rxns: findreactionsfrompathimportant[path[k]]
         rxns: ,/rxns
         ii: intersectleftindexes[reactionenergy.reactionid; rxns]
         enzymesubs,: reactionenergy.enzyme[ii]
         k+: 1
       ]
      j+: 1
     ]
    i+: 1
   ]
   i:0
   while[i < #rxnids
     prods: findproductfromreaction[rxnids[i]]
     j: 0
     while[j < #prods
       path: whatcanbemadeimportantstop[prods[j]]
       k: 0
       while[k < #path
         rxns: findreactionsfrompathimportant[path[k]]
         rxns: ,/rxns
         ii: intersectleftindexes[reactionenergy.reactionid; rxns]
         enzymeprods: reactionenergy.enzyme[ii]
         k+: 1
       ]
     j+: 1
     ]
    i+: 1
   ]
x: (,,`"Substrate enzymes: "), enzymesubs, (,,`"Products enzymes: "),enzymeprods
:x
}

/finds out what it is `affy; `geneid, `genename, `protein, `moleculename
/requires everything to match exactly what's in the database
identifyit:{[input]
    type: `unknown
    affy: intersect[expression.affyid; input]
    geneid: intersect[affygene.gene; input]
    geneid,: intersect[geneenzyme.gene; input]
    genename: intersect[geneenzyme.gene_common; input]
    protein: intersect[geneenzyme.enzyme; input]
    reactionid: intersect[reactionenergy.reactionid; input]
    moleculename: findsymbolexact' input
    localization: intersect[geneenzyme.localization; input]
    if[~0 = #affy
       type: `affy
    ]
    if[~0 = #geneid
       type: `geneid
    ]
    if[~0 = #genename
       type: `genename
    ]
    if[~0 = #protein
       type: `protein
    ]
    if[~0 = #reactionid
       type: `reactionid  
    ] 
    if[(~0 = #moleculename) & (0 = #protein)
       type: `moleculename
    ]   
    if[~0 = #localization
       type: `localization
    ]
    :type
}

findencoding:{[input]
   ii: & input = geneenzyme.gene
   :geneenzyme.DNA[*ii]
}

findsubunit:{[input]
   ii: & input = geneenzyme.gene
   :geneenzyme.subunit[ii]
}

findfx_unit:{[input]
   out: `
   ii: & reactionenergy.enzyme = input
   if[~0 = #ii
     out: reactionenergy.fx_unit[*ii]
   ]
   :out
}

linkfunction:{[input]
   type: identifyit' input
   chrom: ()
   out: ()
   if[~0 = #type
     if[type = `affy
        expsymbol: findexpressionsymbol' input
        call: findcall' input
        geneid: findgene' input
        protein: findenzyme'' findgene' input
        protein: ,/protein
        genename: *findgene_common'' *findgene' input
        exp: findexpressionvalue' input
        loc: *findlocfromgene'' *findgene' input
        out: , ("Affy ID: "), ($input), ("; "), ("absent/present call: "), ($*call), ("; "), ("expresssion call: " ),($*expsymbol), ("; "), ("fold change: "), ($*exp)
        if[~0 = #geneid
         if[*genename = `
            genename: `"none entered"
         ]
         if[*loc = `
            loc: `"none entered"
         ]
         chrom: *findencoding'' *findgene' input
         rxn: `
         vrxn: `
         out,: , ("NCBI gene name: "), ($*geneid), ("; "), ("common gene name: "), ($*genename)
         out,:  , ("    protein sub-cellular localization: "), ($*loc), ("; encoded by: "), ($*chrom)
         protein,: ()
         i: 0
         if[0 = #protein
            out,: , ("no protein entered")
         ]
         while[i < #protein
            unit: ()
            subunit: ()
            ii: & (geneenzyme.gene = *geneid) & (geneenzyme.enzyme = protein[i])
            subunit: geneenzyme.subunit[*ii]
            if[subunit = `
              out,: , ("protein name: "), ($protein[i])           
            ]
            if[~subunit = `
               out,: , ("protein name: "), ($protein[i]), (" "), ($geneenzyme.subunit[*ii]), (" subunit")
            ]
            unit: findfx_unit' protein[i]
            if[unit = `
              unit: `"none entered"
            ]
            out,: , ("   functional unit of protein: "), ($*unit)
            rxn: findreaction' protein[i]
            j: 0 
            if[0 =  #rxn
              out,: , ("no reaction for protein")     
            ]
            rxn: ,/rxn
            rxn,: ()
            while[j < #rxn
              vrxn: viewreaction' rxn[j]
              out,: , ("reaction id: "), ($rxn[j])
              out,: ,("reaction: "), ($vrxn)
              j+: 1
            ]
            i+: 1
         ]            
       ]
     ]
     if[type = `geneid
      protein: findenzyme' input
      protein: ,/protein
      genename: findgene_common' input
      affy: findaffy' input
      affy: ,/affy
      loc: findlocfromgene' input
      chrom: *findencoding' input
      if[*genename = `
           genename: `"none entered"
        ]
        if[*loc = `
           loc: `"none entered"
        ]
      out: , ("NCBI gene name: "), ($input), ("; "), ("common gene name: "), ($*genename)
      out,: , ("sub-cellular localization of protein: "), ($*loc), ("; encoded by: "), ($*chrom)     
      if[0 = #affy
        out,: , ("no Affy ID")
      ]
      i: 0
      affy,: ()
      while[i < #affy
         exp: findexpressionvalue' affy[i]
         expsymbol: findexpressionsymbol' affy[i]
         call: *findcall' affy
     out,: , ("Affy ID: "), ($affy[i]), ("; "), ("absent/present call: "), ($*call), ("; "), ("expresssion call: " ), ($*expsymbol), ("; "), ("fold change: "), ($*exp)
         i+: 1
      ]
      i: 0
      if[0 = #protein
         out,: , ("no protein entered")
      ]
      while[i < #protein
            subunit: ()
            unit: ()
            ii: & (geneenzyme.gene = input) & (geneenzyme.enzyme = protein[i])
            subunit: geneenzyme.subunit[*ii]
            if[subunit = `
              out,: , ("protein name: "), ($protein[i])
            ]
            if[~subunit = `
               out,: , ("protein name: "), ($protein[i]), (" "), ($geneenzyme.subunit[*ii]), (" subunit")
            ]
            unit: findfx_unit' protein[i]
            if[unit = `
              unit: `"none entered"
            ]
            out,: , ("   functional unit of protein: "), ($*unit)
            rxn: findreaction' protein[i]
            j: 0
            if[0 =  #rxn
              out,: , ("no reaction for protein")
            ]
            rxn: ,/rxn
            rxn,: ()
            while[j < #rxn
              vrxn: viewreaction' rxn[j]
              out,: , ("reaction id: "), ($rxn[j])
              out,: ,("reaction: "), ($vrxn)
              j+: 1
            ]
         i+: 1
        ]
     ]
   if[type = `genename
      geneid: *findgenefromgene_common' input
      protein: findenzyme' geneid 
      protein: ,/protein
      affy: findaffy' geneid
      affy: ,/affy
      loc: findlocfromgene' geneid
      chrom: *findencoding' geneid
      
      if[*loc = `
           loc: `"none entered"
      ]
      out,: , ("NCBI gene name: "), ($*geneid), ("; "), ("common gene name: "), ($*genename)
      out,:  , ("    protein sub-cellular localization: "), ($*loc), ("; encoded by: "), ($*chrom)
      if[0 = #affy
        out,: , ("no Affy ID entered")
      ]
      i: 0
      affy,: ()
      while[i < #affy
         exp: findexpressionvalue' affy[i]
         expsymbol: findexpressionsymbol' affy[i]
         call: *findcall' affy
         out,: , ("Affy ID: "), ($affy[i]), ("; "), ("absent/present call: "), ($*call), ("; "), ("expresssion call: " ), ($*expsymbol), ("; "), ("fold change: "), ($*exp)
         i+: 1
      ]
      i: 0  
      if[0 = #protein
         out,: , ("no protein entered")
      ]
      while[i < #protein
         unit: ()
         subunit: ()
         ii: & (geneenzyme.gene = *geneid) & (geneenzyme.enzyme = protein[i])
         subunit: geneenzyme.subunit[*ii]
         if[subunit = `                  
           out,: , ("protein name: "), ($protein[i])
         ]
         if[~subunit = `
           out,: , ("protein name: "), ($protein[i]), (" "), ($geneenzyme.subunit[*ii]), (" subunit")
         ]
         unit: findfx_unit' protein[i]
         if[unit = `
           unit: `"none entered"
         ]
         out,: , ("   functional unit of protein: "), ($*unit)
         rxn: findreaction' protein[i]
         rxn: ,/rxn
         j: 0
         if[0 =  #rxn
           out,: , ("no reaction for protein")
         ]
         rxn,: ()
         while[j < #rxn
           vrxn: viewreaction' rxn[j]
           out,: , ("reaction id: "), ($rxn[j])
           out,: ,("reaction: "), ($vrxn)
           j+: 1
         ]
         i+: 1
      ]
   ]
   if[type = `protein
     out: , ("protein name: "), ($input)
     unit: findfx_unit' input
     if[unit = `
       unit: `"none entered"
     ]
     out,: , ("   functional unit of protein: "), ($*unit)
     rxn: findreaction' input
     rxn: ,/rxn
     j: 0
     if[0 =  #rxn
         out,: , ("no reaction for protein")
       ]
      rxn,: ()
      while[j < #rxn
        vrxn: viewreaction' rxn[j]
        out,: , ("reaction id: "), ($rxn[j])
        out,: ,("reaction: "), ($vrxn)
        j+: 1 
      ]
     gene: findgenefromenzyme' input   
     gene: ,/gene
     i: 0
     while[i < #gene
      genename: findgene_common' gene[i]
      affy: findaffy' gene[i]
      affy: ,/affy
      loc: findlocfromgene' gene[i]
      if[*genename = `
           genename: `"none entered"
      ]
      if[*loc = `
           loc: `"none entered"
      ]
      chrom: *findencoding'' *findgene' input
      out,: , ("NCBI gene name: "), ($gene[i]), ("; "), ("common gene name: "), ($*genename) 
      out,: , ("sub-cellular localization of protein: "), ($*loc), ("; encoded by: "), ($*chrom) 
      if[0 = #affy
        out,: , ("no Affy ID")
      ]
      j: 0
      affy,: ()
        while[j < #affy 
           exp: findexpressionvalue' affy[j]    
           expsymbol: findexpressionsymbol' affy[j]
           call: *findcall' affy
   out,: , ("Affy ID: "), ($affy[j]), ("; "), ("absent/present call: "), ($*call), ("; "), ("expresssion call: " ), ($*expsymbol), ("; "), ("fold change: "), ($*exp)
         j+: 1
      ] 
      i+: 1
    ]
    mols: *findsymbolexact' input
      if[~0 = #mols
        rxns: findreactionsthatuse' mols
        rxns,: findreactionsthatproduce' mols
        i: 0
        if[~0 = #rxns
           out,: , ("Enzymes that use or produce "), ($input)
           while[i < #rxns
             enz: findenzymefromreaction' rxns[i]
             enz: ,/enz
             out,: , ((-2) _ ,/($enz) ,\: (", ")) , ("; reaction ID: "), ($rxns[i])
             viewrxn: viewreaction' rxns[i]
             out,: , ("  reaction: "), viewrxn
             i+: 1
           ]
         ]
      ]
  ]   
  if[type = `reactionid
    vrxn: viewreaction' input
    out: , ("reaction id: "), ($input)
    out,: ,("reaction: "), ($vrxn)
    protein: findenzymefromreaction' input
    protein: ,/protein
    k: 0
    protein,: ()
    while[k < #protein
      unit: ()
      unit: *findfx_unit' protein[k]
      if[unit = `
         unit: `"none entered"
      ]
      out,: ,("protein name: "), ($protein[k]), ("; functional unit: "), ($unit)
      gene: findgenefromenzyme' protein[k]
      gene: ,/gene
      gene,: ()
      i: 0
      while[i < #gene
        genename: findgene_common' gene[i]
        affy: findaffy' gene[i]
        affy: ,/affy
        loc: findlocfromgene' input
        if[*genename = `
           genename: `"none entered"
        ]  
        if[*loc = `  
           loc: `"none entered"
        ]  
        out,: , ("NCBI gene name: "), ($gene[i]), ("; "), ("common gene name: "), ($*genename), ("; "), ("sub-cellular localization: "), ($*loc)
        if[0 = #affy
          out,: , ("no Affy ID")
        ]
        j: 0
        affy,: ()
        while[j < #affy
           exp: findexpressionvalue' affy[j]
           expsymbol: findexpressionsymbol' affy[j]
           call: *findcall' affy
   out,: , ("Affy ID: "), ($affy[j]), ("; "), ("absent/present call: "), ($*call), ("; "), ("expresssion call: " ), ($*expsymbol), ("; "), ("fold change: "), ($*exp)
           j+: 1
        ]  
        i+: 1
      ]
      k+: 1
    ]
  ]
  if[type = `moleculename
    moleculeid: findsymbolexact' input
    jj: & prodsubstrate.id = *moleculeid
    class: prodsubstrate.classification[*jj]
    if[class = `small
      class: `"small molecule"
    ]
    ii: & mile.milestones = *moleculeid    
    yorn: `no
    if[#ii > 0
      yorn: `yes
    ]
  out: ,("molecule name: "), ($input), ("; "), ("molecule id: "), ($*moleculeid), ("; "), ("mile stone: "), ($yorn), ("; "), ("classification: "), ($class)
    ii: & reactionsubstrate.substrateid = *moleculeid
    ii,: ()
    out,: ,("reactions in which "), ($input), (" is a substrate:")
    i: 0
    while[i < #ii
      rxn: reactionsubstrate.reactionid[ii[i]]
      importance: reactionsubstrate.importance[ii[i]]
      out,: ,("   reaction ID: "), ($rxn), ("; "), ("importance: "), ($importance)
      vrxn: viewreaction' rxn
      out,: ,("reaction: "), ($vrxn)
      i+: 1
    ]
    ii: & reactionproduct.productid = *moleculeid
    ii,: ()
    out,: ,("reactions in which "), ($input), (" is a product")
    i: 0
    while[i < #ii
      rxn: reactionproduct.reactionid[ii[i]]
      importance: reactionproduct.importance[ii[i]]
      out,: ,("   reaction ID: "), ($rxn), ("; "), ("importance: "), ($importance)
      vrxn: viewreaction' rxn
      out,: ,("       reaction: "), ($vrxn)
      i+: 1
    ]         
]
  ]
   :out
}

showme:{[list; val1; val2]
ii: ()
i: val1
while[val2 > i
  ii,: i
  i+: 1
]
:list[ii]
}

searchdatabase:{[input; type]
  out: ()
  genecommon: `
  gene: `
  affy: `
  geneid: `
  protein: `
  exp: `
  reaction: `
  vrnx: `
  input: ` $ lower' ($input)
  if[type = `affy
     affyid: ` $ lower'' ($expression.affyid)
     ii: & affyid = input
     expsymbol: `
     call: `
     affy: expression.affyid[ii]
     exp: expression.value[ii]
     expsymbol: expression.expsymbol[ii]
     call: expression.call[ii]
     jj: & affygene.affy = input
     if[~0 = #jj
        geneid: affygene.gene[jj]
        kk: & geneenzyme.gene  _in\: affygene.gene[jj]
        if[~0 =#kk
             protein: geneenzyme.enzyme[kk]
             genecommon: geneenzyme.gene_common[kk]
          ]
     ]
     out: affy, geneid, genecommon, protein, exp, expsymbol, call
     :spit[out]
  ]
  if[type = `gene_systematic
     ii: & affygene.gene = `
     affygene.gene[ii]: `"?"
     lcgene: ` $ lower'' ($affygene.gene)
     ii: & lcgene = input
     if[~0 = #ii
        geneid: affygene.gene[ii]
        geneid: ?geneid
        affy: affygene.affy[ii]
        exp: findexpressionvalue' affy
     ]
     affy_exp: spit' affy,'exp
     ii: & geneenzyme.gene = `
     geneenzyme.gene[ii]: `"?"
     lcgene: ` $ lower'' ($geneenzyme.gene)
     jj: & lcgene = input
     if[~0 = #jj
       protein: geneenzyme.enzyme[jj]
       genecommon: geneenzyme.gene_common[jj]
       geneid: geneenzyme.gene[jj]
     ]
     out: geneid, genecommon, protein, affy_exp
     :spit[out]
     ]
  if[type = `gene_common
     ii: & geneenzyme.gene_common = `
     geneenzyme.gene_common[ii]: `"?"
     lcgene: ` $ lower'' ($geneenzyme.gene_common)
     ii: & lcgene _sm\: ("*"),($input),("*")
     ii,: ()
     i: 0
     while[i < #ii
        if[~0 = #ii
          genecommon: ,geneenzyme.gene_common[ii[i]]
          geneid: ,geneenzyme.gene[ii[i]]
          protein: ,geneenzyme.enzyme[ii[i]]
        ]
        jj: & affygene.gene = geneenzyme.gene[ii[i]]
        if[~0 = #jj
          affy: affygene.affy[jj]
          exp: findexpressionvalue' affy
        ]
        affy_exp: spit' affy,'exp
        affy_exp: ,affy_exp
        out,: spit' genecommon,'geneid,'protein,'affy_exp
        i+: 1
     ]
     ii: < out
     :out[ii]
  ]
  if[type = `protein
     tempout: ()
     ii: & geneenzyme.enzyme = `
     geneenzyme.enzyme[ii]: `"?"
     lcenzyme: ` $ lower'' ($geneenzyme.enzyme)
     ii: lcenzyme _sm\: ("*"),($input),("*")
     jj: & ~0 = ii
     protein: geneenzyme.enzyme[jj]
     protein: ?protein
     i: 0
     while[i < #protein
         reaction: findreaction' protein[i]
         reaction: ,/reaction
         rxn_vrxn: ()
         if[~0 = #reaction
             vrxn: viewreaction' reaction
         ]
         j: 0
         while[j < #reaction
             rxn_vrxn,: , spit[reaction[j], ,vrxn[j]]
             j+: 1
          ]
         tempout,: spit' (protein[i]) ,/: ` $ rxn_vrxn
         i+: 1
       ]
       ii: < tempout  
       :tempout[ii]
   ]
   if[type = `reactionid
     out: ()
     ii: & reactionenergy.reactionid _sm\: ("*"),($input),("*")
     reaction,: () 
     i: 0 
     reaction: ?reactionenergy.reactionid[ii]
     while[i < #reaction 
       protein: ,reactionenergy.enzyme[ii[i]]
       if[~0 = #ii
         vrxn: ,viewreaction' reaction[i]
       ]
       out,: spit' reaction[i],'(,vrxn),'protein
       i+: 1
     ]
     ii: < out
     :out[ii]
   ]
   if[type = `moleculename
      names: findsymbol' input
      if[~ 0 = (#names)
        jj: intersectleftindexes[names[;0]; reactionenergy.enzyme]
        names: differ[names; names[jj]]
        ii: < names[;0]
        :spit' names[ii;0],'names[ii;1]
      ] 
      if[0 = (#names)
        :"not found" 
      ]
   ]
   if[type = `moleculeid
      name: `"not in database"
      nametemp: ,findcommonname' input
      if[~0 = #nametemp
        nametemp: name
      ]
      :spit[input, name]
   ]
   if[type = `all
      ii: & expression.affyid _sm\: ("*"),($input),("*")
      out: ()
      out,: ,(expression.affyid[ii])
      ii: & affygene.gene _sm\: ("*"),($input),("*")
      out,: affygene.gene[ii]
      ii: & geneenzyme.gene_common = `
      geneenzyme.gene_common[ii]: `"?"
      lcgene: ` $ lower'' ($geneenzyme.gene_common)
      ii: & lcgene  _sm\: ("*"),($input),("*")
      out,: ,geneenzyme.gene_common[ii]
      ii: & geneenzyme.gene = `
      geneenzyme.gene[ii]: `"?"
      lcgene: ` $ lower'' ($geneenzyme.gene)
      ii: & lcgene _sm\: ("*"),($input),("*")
      out,: ,geneenzyme.gene[ii]
      ii: & geneenzyme.enzyme = `
      geneenzyme.enzyme[ii]: `"?"
      lcenzyme: ` $ lower'' ($geneenzyme.enzyme)
      ii: & lcenzyme _sm\: ("*"),($input),("*")
      out,: geneenzyme.enzyme[ii]
      ii: & reactionenergy.enzyme _sm\: ("*"),($input),("*")
      out,: reactionenergy.enzyme[ii]
      ii: & prodsubstrate.name _sm\: ("*"),($input),("*")
      out,: ` $ prodsubstrate.name[ii]
      ii: & prodsubstrate.id _sm\: ("*"),($input),("*") 
      out,: ,prodsubstrate.id[ii]
      ii: & reactionenergy.reactionid _sm\: ("*"),($input),("*")
      out,: ,reactionenergy.reactionid[ii]
      out: ?,/out
      out1: out
      ii: & out1 _in\: geneenzyme.enzyme
      out1[ii]: `protein
      ii: & out1 _in\: geneenzyme.gene
      out1[ii]: `"NCBI gene name"
      ii: & out1 _in\: geneenzyme.gene_common
      out1[ii]: `"common gene name"
      ii: & out1 _in\: reactionenergy.enzyme
      out1[ii]: `protein
      ii: & out1 _in\: reactionenergy.reactionid
      out1[ii]: `"reaction ID"
      ii: & out1 _in\: affygene.gene
      out1[ii]: `"NCBI gene name" 
      ii: & out1 _in\: expression.affyid
      out1[ii]: `"Affy ID"
      ii: & ($out1) _in\: prodsubstrate.name
      out1[ii]: `molecule
      ii: & out1 _in\: prodsubstrate.id
      out1[ii]: `"molecule ID"
      ii: < out
      out: out[ii]
      out1: out1[ii]
      :spit' out,'out1
      ]
}


/ to find the enzymes in a path with their counts.
/ zz: findpathenzymes'reactionenergy.enzyme[!5]
/ qq: ,/ zz
/ (?qq) ,' (#:' = qq)

/  to change records given an index set.
/ ii -- is the index set
/ reactionenergy.reactionid[ii]: ` $ ("p") ,/: ($reactionenergy.reactionid[ii])

/ Suppose you are putting in information from rows ii of 
/ reactionenergy into prodsubstrate
/ prodsubstrate.name,: reactionenergy.reactionid[ii]
/ prodsubstrate.commonname,: (#ii) #`yes


/ April 16, 2002. Eliminate roots and leaves. There are a lot of loops


/ find everything that symbol can lead to and then get
/ an irredundant subset
whatcanbemade:{[symbol]
  xx: alphaalldests[symbol]
  routes: alphafindroutesminimal[symbol; xx]
  ii: &0 < #:' routes
  :,/routes[ii]
}

whatcanbemadeimportant:{[symbol]
  xx: alphaalldestsimportant[symbol]
  routes: alphafindroutesminimalimportant[symbol; xx]
  ii: &0 < #:' routes
  :routes[ii]
}

/ looks at alphadests in order and finds the minimal
/ set of paths (if a dest is in a previous path then don't
/ compute the path to it).
alphafindroutesminimal:{[alphasource; alphadests]
  mydests: alphadests
  out: ()
  while[0 < #mydests
        xx: alphafindroutes[alphasource; *mydests]
        mydests: differ[mydests; ?,/xx]
        out,: xx
  ]
  :out
}


/finds important routes designed so that it will handle the cases
/where either dest or sources are list
/ Note that if both are lists of size greater than 1, this doesn't work.
alphafindroutesminimalimportant:{[alphasources; alphadests]
  mydests: alphadests
  mysources:  alphasources
  out: ()
  if[1 < #mydests
     while[0 < #mydests
         xx: alphafindroutesimportant[mysources; *mydests]
         mydests: differ[mydests; ?,/xx]
         out,: xx
     ]
  ]
 if[1 < #mysources
     while[0 < #mysources
         xx: alphafindroutesimportant[*mysources; mydests]
         mysources: differ[mysources; ?,/xx]
         out,: xx
     ]
  ]
 if[(1 = #mysources) & (1 = #mydests)
     xx: alphafindroutesimportant[*mysources; *mydests]
     out,: xx
 ]
  :out
}

/finds routes where only mysources is list
alphaallroutesalphafindroutesminimal:{[alphasource; alphadests]
  mysources: alphasource
  out: ()
  mydests: alphadests
  while[0 < #mysources
       xx: alphafindroutes[*mysources; *mydests]
       mysources: differ[mysources; ?,/xx]
       out,: xx
       ]
   :out
}

alphaallroutesimportant:{[alphadest]
  xx: alphaallsourcesimportant[alphadest]
  routes: alphafindroutesminimalimportant[xx; alphadest]
  ii: &0 < #:' routes
  :squeezesuffix ?routes[ii]
}

alphaallroutes:{[alphadest]
  xx: alphaallsources[symbol]
  routes: alphaallroutesalphafindroutesminimal[xx; symbol]
  routes: ?routes
  ii: &0 < #:' routes
  :squeezesuffix routes[ii]
}

/ Peter, ??? This should be faster if you use the function I just sent.
alphaallrouteslimit:{[alphadest; limit]
  paths: alphaallroutes[alphadest]
  rpaths: |:' paths
  out: ()
  i: 0
  while[i < #rpaths
        count: (#rpaths[i]) & limit
        out,: ,|rpaths[i;!count]
        i+: 1
  ]
  out?:
  :out
}


/ find the difference between unimportant and important
/ for each molecule
finddif:{[]
  mysources: alphaallsources'all
  mysourcesimportant: alphaallsourcesimportant'all
  out: all,' differ'[mysources;mysourcesimportant]
  :out
}


/ Peter wants this for a sanity check
checktcmat:{[]
   out: ()
   y: #all
   x: y * y
   ii: & ,/tcmat < maxnum
   if[(0 = #ii) | (x = #ii)
       out,: ,"tcmat error"
   ]
   ii: & ,/tcmatimportant < maxnum
   if[(0 = #ii) | (x = #ii)
       out,: ,"tcmatimportant error"
   ]
   :out
}              


/ LOCATION-DEPENDENT PATH EXPLORE

/ #geneenzyme|gene|enzyme|gene_common|localization|subunit|DNA
/ #reactionenergy|reactionid|enzyme|freeenergysymbolic|fx_unit|num_protein
findlocation:{[reactionid]
    ii: & reactionenergy.reactionid = reactionid
    myenzymes: reactionenergy.enzyme[ii]
    jj: myenzymes ?/: geneenzyme.enzyme
    kk: & jj < #myenzymes
    :?geneenzyme.localization[kk]
}

/ make a directedge that is localized
/ `productid `substrateid `reactionid
/ So, in the result, substrateid will have a substrateid plus a location
/ productid will have a productid plus a location
/ reaction id will be as before
/ except that it will be the transportid for transportation
constructlocal:{[directedge]
   reactions: directedge.reactionid
   localizations: findlocation'reactions
   x.substrateid: ()
   x.productid: ()
   x.reactionid: ()
   i: 0
   while[i < #directedge.substrateid
	myloc: localizations[i]
	x.substrateid,: ` $ ($directedge.substrateid[i]),/: ($myloc)
	x.productid,: ` $ ($directedge.productid[i]),/: ($myloc)
	x.reactionid,: (#myloc) # directedge.reactionid[i]
	i+: 1
   ]
   kk: transporterbase.transportid ?/: transportmolecule.transportid
   x.substrateid,:` $ ($transportmolecule.molecule),'($transporterbase.base[kk])
   x.productid,:` $ ($transportmolecule.molecule),'($transporterbase.target[kk])
   x.reactionid,: transportmolecule.transportid
   :x
}


/ DATA

/ reactionenergy.reactionid: `"1.1.1.23",() 
/ reactionenergy.enzyme:  ,`"aspartate aminotransferase"
/ reactionenergy.freeenergy:  ,0
/ reactionenergy.freeenergysymbolic:  ,`L


\l peter.k
/recordtime["first input"]


if[mode _in `default `prepare `preparemin
 inputfromfile["reactionproduct.csv"]
  reactionproduct.stoich: 0.0 $ $ reactionproduct.stoich
  reactionproduct.importance: 0.0 $ $ reactionproduct.importance
  "reactionproduct" 1: reactionproduct
  inputfromfile["reactionsubstrate.csv"]
  reactionsubstrate.stoich: 0.0 $ $ reactionsubstrate.stoich
  reactionsubstrate.importance: 0.0 $ $ reactionsubstrate.importance
  "reactionsubstrate" 1: reactionsubstrate
  inputfromfile["directedge.csv"]
  jj: & (~ directedge.substrateid = `) & (~ directedge.productid = `)
  directedge.reactionid@: jj
  directedge.substrateid@: jj
  directedge.productid@: jj
  "directedge" 1: directedge
  inputfromfile["milestones.csv"]
  / milestones will be used as starting and ending points of a path.
  "mile" 1:  mile
  inputfromfile["reactionenergy.csv"]
  "reactionenergy" 1: reactionenergy
  inputfromfile["geneenzyme.csv"]
  "geneenzyme" 1: geneenzyme
  inputfromfile["transporterbase.csv"]
  "transporterbase" 1: transporterbase
  inputfromfile["transportmolecule.csv"]
  "transportmolecule" 1: transportmolecule
]

  all: ?directedge.substrateid, directedge.productid
  all@: < all
  maxnum: 2 * #all

/ EXECUTION


if[mode _in `default `prepare 
  recordtime["build matrix"]
  mat: build[all; directedge]
  recordtime["do transitive closure"]
  tcmat: tc[mat]
  recordtime["find roots and leaves"]
  allroots: findroots[tcmat]
  allleaves: findleaves[tcmat]
  allrootsalpha: all[allroots]
  allleavesalpha: all[allleaves]
  / localization stuff
  directedgelocal: constructlocal[directedge]
  / end of localization stuff

	/ all remains the original directedge elements.
  inindexes: ()
  i: 0
  while[i < #directedge.substrateid
	jj: & (reactionsubstrate.substrateid = directedge.substrateid[i]) & (reactionsubstrate.reactionid = directedge.reactionid[i])
	if[0 = #jj; 
		` 0: ,"No matching substrate"
		` 0: ,($ directedge.substrateid[i])
		` 0: ,($ directedge.reactionid[i])
		/ !-1
	] / peter messed up
	if[1 < #jj 
		` 0: ,"Too many matching substrates"
		` 0: ,($ directedge.substrateid[i])
		` 0: ,($ directedge.reactionid[i])
		/ !-1
	] / peter messed up
	jj: * jj
	badflag: reactionsubstrate.importance[jj] > 1
	if[~ badflag
	  jj: & (reactionproduct.productid = directedge.productid[i]) & (reactionproduct.reactionid = directedge.reactionid[i])
	  if[0 = #jj; 
		` 0: ,"No matching products"
		` 0: ,($ directedge.productid[i])
		` 0: ,($ directedge.reactionid[i])
		/ !-2
	   ] / peter messed up
	  if[1 < #jj 
		` 0: ,"Too many matching products"
		` 0: ,($ directedge.productid[i])
		` 0: ,($ directedge.reactionid[i])
		/ !-2
	  ] / peter messed up
	  jj: * jj
	  badflag: reactionproduct.importance[jj] > 1
	]
	if[~ badflag; inindexes,: i]
	i+: 1
  ]
  directedgeimportant.substrateid: directedge.substrateid[inindexes]
  directedgeimportant.productid: directedge.productid[inindexes]
  directedgeimportant.reactionid: directedge.reactionid[inindexes]
  recordtime["build important matrix"]
  matimportant: build[all; directedgeimportant]
  recordtime["do transitive closure on important matrix"]
  tcmatimportant: tc[matimportant]
  allmiles: mile.milestones
  allmilesnumbers: all _bin/: allmiles
  tcmatimportantstop: tcstop[matimportant; allmilesnumbers]
]


if[mode _in `default `prepare `preparemin
  / localization
  alllocal: ?directedgelocal.substrateid,directedgelocal.productid
  alllocal@: < alllocal
  maxnum: 2 * #alllocal
  matlocal: build[alllocal; directedgelocal]
  recordtime["do transitive closure of local"]
  tcmatlocal: tc[matlocal]
  recordtime["find roots and leaves of local"]
  allrootslocal: findroots[tcmatlocal]
  allleaveslocal: findleaves[tcmatlocal]
  allrootsalphalocal: alllocal[allrootslocal]
  allleavesalphalocal: alllocal[allleaveslocal]
  maxnum: 2 * #all / for other purposes
  "directedgelocal" 1: directedgelocal
  / end of localization
  inputfromfile["expression.csv"]
  expression.value: 0.0 $ $ expression.value
  "expression" 1: expression
  inputfromfile["affygene.csv"]
  "affygene" 1: affygene
  inputfromfile["oldaffygene.csv"]
  "oldaffygene" 1: oldaffygene
  inputfromfile["prodsubstrate.csv"]
  prodsubstrate.name: $ prodsubstrate.name
  prodsubstrate.lowername: lower'' prodsubstrate.name 
"prodsubstrate" 1: prodsubstrate
]

if[mode _in `query `preparemin
  tcmat: 1: "tcmat"
  tcmatimportant: 1: "tcmatimportant"
  tcmatimportantstop: 1: "tcmatimportantstop"
  allmilesnumbers: 1: "allmilesnumbers"
  allroots: 1: "allroots"
  allleaves: 1: "allleaves"
  allrootsalpha: 1: "allrootsalpha"
  allleavesalpha: 1: "allleavesalpha"
  . ("\ \\l "), filename
]
allleavesimportant: findleaves[tcmatimportant]
allleavesalphaimportant: all[allleavesimportant]
  
if[mode _in `prepare `default
  "mat" 1: mat
  "tcmat" 1: tcmat
  "allroots" 1: allroots
  "allleaves" 1: allleaves
  "allrootsalpha" 1: allrootsalpha
  "allleavesalpha" 1: allleavesalpha
  "tcmatimportant" 1: tcmatimportant
  "tcmatimportantstop" 1: tcmatimportantstop
  "allmilesnumbers" 1: allmilesnumbers
  . "\\\\"
]


checktcmat[]

` 0: ,"Set-up is done. Go ahead and play."

/ \l peter

/ "tmpout" 0: findenzymesval[2;`above;1]
/ "tmpout2" 0: findenzymesval[2;`above;0]

/ thosewithroutes: all[& alphahasroute'all]
/ thosewithimportantroutes: all[& alphahasrouteimportant'all]
/ differ[thosewithroutes; thosewithimportantroutes]