/ download K at www.kx.com. / Given / prodsubstrate(id, name) / reactionsubstrate( reaction id, substrateid, stoich, importance) / reactionproduct( reaction id , productid, stoich, importance) / directedge( reaction id, substrateid, productid) / reactionenergy( reactionid, enzyme, free energy symbolic, major path) / affygene( affy, gene) / geneenzyme( gene, enzyme) / experdesc( experiment, treat, conditions) / expervalue( experiment, treat, replica, affyid, value) / synpath(path, start, end) / mile(milestones) / Infer pathways: given starting substrate and ending value, / infer pathway between. / Find the affys that are there. / / Queries: / 1. given start and end, find path / 2. given end, how to produce \l db / PROCESS ARGUMENTS filename: "peter.k" allroutesflag: 0 mode: `default okflag: 1 / +q says give me a query after this / +p says stop after generating matrix / +pmin says don't regenerate matrix and keep going / +allroutes says all routes flag processargs:{[args] i: 0 while[(i < #args) & okflag okflag:: 0 addextra: 0 if[args[i] ~ "+q" filename:: args[i+1] mode:: `query addextra: 1 okflag:: 1 ] if[args[i] ~ "+p" mode:: `prepare okflag:: 1 ] if[args[i] ~ "+pmin" mode:: `preparemin okflag:: 1 ] if[args[i] ~ "+allroutes" allroutesflag:: 1 okflag:: 1 ] i+: 1 + addextra if[0 = okflag ` 0: "format is k transclos [+q filename | +p] [+allroutes]\n" ` 0: "filename is file for querying (e.g. peter.k)\n" ` 0: "+p says to read in csv and recreate tcmat matrix \n" ` 0: "+pmin says to read in csv, but don't recreate tcmat\n" ` 0: "+allroutes says to consider all routes, not just shortest routes. \n" . "\\\\" ] ] } / END PROCESS ARGUMENTS processargs[_i] if[ mode = `query expression: 1: "expression" affygene: 1: "affygene" oldaffygene: 1: "oldaffygene" reactionenergy: 1: "reactionenergy" geneenzyme: 1: "geneenzyme" prodsubstrate: 1: "prodsubstrate" reactionproduct: 1: "reactionproduct" reactionsubstrate: 1: "reactionsubstrate" directedge: 1: "directedge" directedgelocal: 1: "directedgelocal" mile: 1: "mile" transporterbase: 1: "transporterbase" / #transporterbase|transportid|protein|base|target transportmolecule: 1: "transportmolecule" / #transportmolecule|transportid|molecule^M ] / BASIC ROUTINES spit:{[list] (-1) _ ,/ ($list) ,\: ("|")} / find difference between list[0] and list[1] listdiff:{[list] :differ[list[0]; list[1]] } / returns one if x is a subset of y subset:{[x; y] i: y ?/: x : ~ (#y) _in i } / returns one if x is a subset of y subset:{[x;y] (#y) > |/ y ?/: x} differ:{[x;y] x,: () y,: () i: y ?/: x j: & i = #y :?x[j] } / A faster difference, yielding indexes in x that differ from y differindexes:{[x;y] i: y ?/: x j: & i = #y :j } /finds intersection of two lists / fastest of all intersect: {[x;y] x,: () y,: () i: x ?/: y :x[(?i) _dv #x] } /finds intersection of two lists / fastest of all intersect: {[x;y] x,: () y,: () if[(#x) < (#y) i: x ?/: y j: & i < #x :x[?i[j]] ] i: y ?/: x j: & i < #y :y[?i[j]] } /finds intersection of two lists / fastest of all hasintersect: {[x;y] x,: () y,: () i: x ?/: y : (&/i) < #x } / x is a proper subset of y propersubset:{[x;y] x,: () y,: () if[~ (#x) < (#y); :0] / must be smaller :(#x) = (#intersect[x;y]) } / x is a proper subset of y propersubset:{[x;y] x,: () y,: () if[~ (#x) < (#y); :0] / must be smaller :subset[x;y] } /finds indexes in x and y that intersect / If x and y are both sets, then the results will be of the same length / fastest of all intersectindexes: {[x;y] i: x ?/: y / where each y hits j: & i < #x / those ys that hit :(i[j];j) } /finds indexes in x that intersect with y intersectleftindexes: {[x;y] i: x ?/: y / where each y hits j: & i < #x / those ys that hit :i[j] } /finds indexes in x that intersect with y / even if there are dups in the left intersectleftindexesdup: {[x;y] i: y ?/: x j: & i < #y :j } even:{[x] (x % 2) = (_ (x % 2))} /finds intersection of two lists / and returns index pairs of matches. Assumes no duplicates / in either list intersectbothindexes: {[x;y] x,: () y,: () i: x ?/: y pairs: (i ,' (!#y)) k: & pairs[;0] < #x :pairs[k] } intersectbothindexesdup:{[x;y] x,: () y,: () uniqx: ?x partx: = x uniqy: ?y party: = y uniqpairs: intersectbothindexes[uniqx; uniqy] k: 0 outpairs: () while[k < #uniqpairs mypair: uniqpairs[k] outpairs,: ,/ partx[mypair[0]] ,/:\: party[mypair[1]] k+: 1 ] :outpairs } multiintersectallindexes:{[lists] x: multiintersect[lists] out: () i: 0 while[i < #lists y: lists[i] ?/: x out,: ,y i+: 1 ] :+out } / intersect many lists multiintersect:{[lists] size: #lists if[2 > size; :lists] first: lists[0],() jj: ,/ ?:' first (?/:)/: lists[1+ !(size-1)] / find indexes in first x: @[(1+#first) # 0; jj; + ; 1] x: (-1) _ x / delete missing entry kk: & x = size - 1 :first[kk] } / this is a set intersection so we remove duplicates multiintersect:{[lists] size: #lists if[2 > size; :lists] first: lists[0],() jj: first ?/: (,/ ?:' lists[1+ !(size-1)]) / find indexes in first x: @[(1+#first) # 0; jj; + ; 1] x: (-1) _ x / delete missing entry kk: & x = size - 1 :first[kk] } avg:{(+/ x) % # x} var:{avg[_sqr x] - _sqr avg[x]} std:{_sqrt var[x]} cov:{avg[x * y] - avg[x] * avg[y]} corr:{ (cov[x;y])%((std[x]) * (std[y]))} / delay based search corrdelay:{[delay;x;y] x: (-delay) _ x y: delay _ y (cov[x;y])%((std[x]) * (std[y]))} / END BASICS / FILE INPUT / Table schemas are written: / with a leading number sign, so # R| A |B| Car |Salary / for a table R(A,B,Car,Salary). / The values are written without a number sign / Interior blanks are significant, but leading and trailing blanks in / a field are not. / Null values should be represented by having only blanks in a field. / Tables are separated by a single blank line inputfromfile:{[filename] a: 0: filename globalvals:: () oldglobalvals:: () / should not be necessary justempty:: 1 / as if we've just seen an empty line. should process table a,: ,,"" processline'a } / a list is numeric if each member is either empty, a period / or a digit isnumeric:{[list] :0 / HACK: always return 0 here s: ,/list nums: "-1234567890." ii: & s = "." if[1 < #ii; :0] x: nums ?/: s :(#nums) > |/x / if greater then everything in s met its match } / parses a field based on vertical bars getfields:{[line] i: line = "|" j1: &i j2: &~i line @:j2 size: #j1 :(0,(j1 - !size)) _ line } / get rid of blanks at either end of the string delendblanks:{[string] if[0 = #string; :""] if[string ~ ,"" ; :""] i: & ~ string = " " if[(#string) = (#i); :string] if[0 = (#i); :""] string: (- ((#string) - (1 + *|i))) _ string :(*i) _ string } / convert all characters to lower case lower:{[let] if[0 ~ #let :let] v: _ic let if[(64 < v) & (v < 91) v+: (97-65) :_ci v ] :let } / Handles one line of input at a time according to table schema above. / A little more space efficient than the previous version. processline:{[line] emptyorblank:{[line] (0 = #delendblanks[line])} emptyflag: emptyorblank[line] if[~emptyflag justempty:: 0 if[line[0] = "#" newline: delendblanks'getfields[1 _ line] if[(1 < #newline) globaltable:: *newline globalatt:: 1 _ newline globalvals:: () / initialize ] / newline ] / table declaration if[~ line[0] = "#" newline: delendblanks'getfields[line] if[(1 < #newline) | (0 < #newline[0]) globalvals,: ,newline ] ] / end of test on data line ] / end of test on line is non-empty if[emptyflag & (0 = justempty) justempty:: 1 / if[ ((#globalatt) > 1) | (~ (#globalatt) = (^globalvals)[0]) / globalvals:: + globalvals / take transpose / ] if[~ (#globalatt) = (#globalvals[0]); !-1] numericflag:() hh: 0 while[hh < #globalatt numericflag,: isnumeric[globalvals[;hh]] hh+: 1 ] inindex:: 0 while[inindex < #globalatt / table.att[2] :: ` $ vals[3] string1: globaltable, (".") string1,: globalatt[inindex] teststring: ("0 = #"), string1 / For big sizes, uncomment following two statements / User will get an error message, but it will work. / x: @[.: ; teststring; :] / currentlyempty: *x / error means empty / For big sizes, comment following currentlyempty: 1 / if empty, then assign, else append string2: :[currentlyempty; (":: "); (",: ")] string3: :[0 = numericflag[inindex] "` $ " :[1 = numericflag[inindex]; "0.0 $ "; "0 $ "]] string3,: "globalvals[;inindex]" . string1, string2, string3 if[1 = numericflag[inindex] globalindexes:: & 0N = . string1 string3: string1, ("[globalindexes] :: `") . string3 ] inindex+: 1 ] ] } / DUMP TABLE dumptable / formstring takes a list and makes a string formstring:{[list] list,: () : (-1) _ ,/ ($list) ,\: (" ") } formstringvertbar:{[list] list,: () : (-1) _ ,/ ($list) ,\: ("|") } formstringcomma:{[list] list,: () : (-1) _ ,/ ($list) ,\: (",") } / Output a table (a variable) to a text file outfile (string) / e.g. output[`guide; guide; "foobar"] dumptable:{[tablename; table; outfile] out: ,("# "), ($tablename), ("|"), formstringvertbar[!table] first: *!table numofelements: . ("#"), ($tablename), ("."), ($first) i: 0 while[i < numofelements list: table[;i] x: formstring'list out,: , (-1) _ ,/x ,\: ("|") i+: 1 ] outfile 0: out } dumptablecsv:{[tablename; table; outfile] out: , formstringcomma[!table] first: *!table numofelements: . ("#"), ($tablename), ("."), ($first) i: 0 while[i < numofelements list: table[;i] x: formstring'list out,: , (-1) _ ,/x ,\: (",") i+: 1 ] outfile 0: out } / END OF DUMP TABLE / APPLICATION SPECIFIC / return 1 if x is a prefix of y (not necessarily proper) prefix:{[x;y] if[ (#x) < (#y) :x ~ y[!#x] ] :0 } / eliminate prefixes from a list squeezeprefix:{[list] if[2 > #list; :list] ind: !#list cross: ,/ ind ,/:\: ind cross@: & ~ cross[;0] = cross[;1] jj: & prefix'[list[cross[;0]]; list[cross[;1]]] badones: ?cross[;0][jj] :list[differ[ind;badones]] } / return 1 if x is a suffix of y (not necessarily proper) suffix:{[x;y] if[(#x) < (#y) :x ~ |((|y)[!#x]) ] :0 } / eliminate prefixes from a list squeezesuffix:{[list] if[2 > #list; :list] ind: !#list cross: ,/ ind ,/:\: ind cross@: & ~ cross[;0] = cross[;1] jj: & suffix'[list[cross[;0]]; list[cross[;1]]] badones: ?cross[;0][jj] :list[differ[ind;badones]] } / Build the matrix from a set of source-destination pairs. build:{[all; directedge] mat: () mat: ((#all);(#all)) # maxnum / extreme distance i: 0 while[i < #mat mat[i;i]: 0 i+: 1 ] jsource: all _bin/: directedge.substrateid jdest: all _bin/: directedge.productid j: 0 while[j < #jsource mat[jsource[j]; jdest[j]]: 1 j+: 1 ] :mat } / find the transitive closure of a matrix, giving the minimum / length of a path tc:{[mat] old: mat while[1 mat: square[mat] if[old ~ mat; :mat] old: mat ] } / must get rid of edgesleaving milestones after the initial / establishment of edges tcstop:{[mat; milenumbers] increment: mat i: 0 while[i < #milenumbers myindex: milenumbers[i] increment[myindex]: (#increment[0]) # maxnum increment[myindex;myindex]: 0 i+: 1 ] while[1 mat: matmult[mat; increment] if[old ~ mat; :mat] old: mat ] } matmultold:{[mat; mat2] new: &/'' mat +/:\: +mat2 :new &'' mat } matmult:{[mat; mat2] tmat: +mat2 new: mat i: 0 while[i < #mat new[i]: &/' (mat[i]) +/: tmat i+: 1 ] :new &'' mat } / testing data xmat: (0 1 9 9 1 9 9 9 0 1 9 9 9 9 9 9 0 1 9 9 9 9 9 9 0 1 9 9 9 9 9 9 0 1 9 9 9 9 9 9 0 1 1 9 9 9 9 9 0) / xmiles: 0 2 3 6 / maxnum: 90 / xmattc: tcstop[xmat; xmiles] / end of testing data / Finding the square of matrix and including the path length squareold:{[mat] new: &/'' mat +/:\: +mat :new &'' mat } square:{[mat] tmat: +mat new: mat i: 0 while[i < #mat new[i]: &/' (mat[i]) +/: tmat i+: 1 ] :new &'' mat } squarenew:{[mat] :mat(&/+)/:\:+mat } / Find shortest routes from a source to a destination / Input is the transitive closure as numbers. findroutesold:{[source; dest; excluded; tcmat] out: () dist: tcmat[source;dest] if[dist=maxnum; :()] / if[dist = 1; :,(source; dest)] directroute: () if[dist = 1; directroute,:,(source; dest)] intermediates: & (tcmat[;dest] = 1) intermediates: differ[intermediates _dv source; excluded] / to cut down infinite loop intermediates: intersect[intermediates; & tcmat[source;] < maxnum] if[0 = #intermediates; :()] excluded,: intermediates x: (findroutes[source;;excluded;tcmat]'intermediates) / attempt to find only routes that have no nulls qq: ,/x zz: 4::' ,/x izz: & zz = -1 qq@: izz if[0 = #qq; :directroute] y: (qq) ,\: dest / y: (*:'x) ,\: dest / if[dest = 3; !-2] / :y :directroute,y } / finds shortest routes findroutes:{[source; dest; excluded; tcmat] out: () dist: tcmat[source;dest] if[dist=maxnum; :()] if[dist = 1; :,(source; dest)] intermediates: & (tcmat[;dest] = 1) intermediates: differ[intermediates _dv source; excluded] / to cut down infinite loop intermediates: intersect[intermediates; & tcmat[source;] < maxnum] if[0 = #intermediates; :()] excluded,: intermediates x: (findroutes[source;;excluded;tcmat]'intermediates) / attempt to find only routes that have no nulls qq: ,/x zz: 4::' ,/x izz: & zz = -1 qq@: izz if[0 = #qq; :()] y: (qq) ,\: dest / y: (*:'x) ,\: dest / if[dest = 3; !-2] :y } nodup:{[list] (#list) = #?list} / find all non-redundant routes findallroutes:{[source; dest; tcmat] out: () dist: tcmat[source;dest] if[dist=maxnum; :()] if[dist = 1; out,: ,(source; dest)] i: 0 flag: 1 while[flag i+: 1 intermediates: & (tcmat[;dest] = i) intermediates: intermediates _dv source flag: 0 if[0 < #intermediates x: findroutes[source;;();tcmat]'intermediates z: findroutes[;dest;();tcmat]'intermediates jj: 0 new: () while[jj < #x if[0 < #x[jj] myz: 1 _' z[jj] toadd: ,/x[jj] ,/:\: myz zz: & nodup'toadd new,: toadd[zz] ] jj+: 1 ] if[0 < #new y: differ[new;out] if[0 < #y flag: 1 out,: y ] ] ] ] :out } / find routes through transitive closure matrix alphafindroutes:{[alphasource; alphadest] / if[alphadest = `"00108";!-1] source: all _bin alphasource dest: all _bin alphadest out: :[allroutesflag findallroutes[ source; dest; tcmat] findroutes[ source; dest; (); tcmat]] if[0 = #out; :()] :all[out] } / looks for routes among the important ones / This looks in tcmatimportant (which are the paths whose / product and substrates are all important) alphafindroutesimportant:{[alphasource; alphadest] / if[alphadest = `"00108";!-1] source: all _bin alphasource dest: all _bin alphadest out: :[allroutesflag findallroutes[ source; dest; tcmatimportant] findroutes[ source; dest; (); tcmatimportant]] if[0 = #out; :()] :all[out] } / don't specify the locations of source and dest alphafindrouteslocalgen:{[alphasource; alphadest] ii: & ($alllocal) _sm\: ($alphasource),("*") sourcelocs: alllocal[ii] ii: & ($alllocal) _sm\: ($alphadest),("*") destlocs: alllocal[ii] out: () i: 0 while[i < #sourcelocs j: 0 while[j < #destlocs out,: alphafindrouteslocal[sourcelocs[i]; destlocs[j]] j+: 1 ] i+: 1 ] :out } / find routes through transitive closure matrix alphafindrouteslocal:{[alphasourceloc; alphadestloc] / if[alphadest = `"00108";!-1] source: alllocal _bin alphasourceloc dest: alllocal _bin alphadestloc out: :[allroutesflag findallroutes[ source; dest; tcmatlocal] findroutes[ source; dest; (); tcmatlocal]] if[0 = #out; :()] :alllocal[out] } / looks for routes but stop at the first milestone / This looks in tcmatimportant (which are the paths whose / product and substrates are all important) alphafindroutesstop:{[alphasource; alphadest] z: () x: alphafindroutes[alphasource; alphadest] relmiles:mile.milestones _dv alphasource i: 0 while[i < #x jj: &/ x[i] ?/: relmiles if[jj < #x[i] x[i]@: !1+jj ] if[(|,/x[i])[0] = alphadest z,: ,x ] i+: 1 ] :?z } / looks for routes but stop at the first milestone / This looks in tcmatimportant (which are the paths whose / product and substrates are all important) alphafindroutesimportantstop:{[alphasource; alphadest] x: alphafindroutesimportant[alphasource; alphadest] relmiles: mile.milestones _dv alphasource i: 0 z: () while[i < #x jj: &/ x[i] ?/: relmiles if[jj < #x[i] x[i]@: !1+jj ] if[(|,/x[i])[0] = alphadest z,: ,x ] i+: 1 ] :?z } / ??? Peter: this looks only at milestones, but may not capture / the leaves properly alphafindroutesimportantstop:{[alphasource; alphadest] / if[alphadest = `"00108";!-1] source: all _bin alphasource dest: all _bin alphadest out: :[allroutesflag findallroutes[ source; dest; tcmatimportantstop] findroutes[ source; dest; (); tcmatimportantstop]] if[0 = #out; :()] :all[out] } / find all routes from all sources to a destination alphahowproduce:{[alphadest] dest: all _bin alphadest out: & tcmat[;dest] < maxnum :all[out] } / find all substrate predecessors of the destination / Always on the complete matrix alphaallroutesold:{[alphadest] dest: all _bin alphadest out: & tcmat[;dest] < maxnum / all ways to reach dest myroots: intersect[out; allroots] :,/alphafindroutes[;all[dest]]'all[myroots] } / get all those that lead to some alphadest alphaallsources:{[alphadest] dest: all _bin alphadest out: & tcmat[;dest] < maxnum / all ways to reach dest outnum: tcmat[out;dest] :all[out[>outnum]] / from farthest to closest } / get all those that lead to some alphadest but are important alphaallsourcesimportant:{[alphadest] dest: all _bin alphadest out: & (tcmatimportant[;dest] < maxnum) & (~0 = tcmatimportant[;dest]) / all ways to reach dest outnum: tcmatimportant[out;dest] :all[out[>outnum]] / from farthest to closest } / get all those that start from some source alphaalldests:{[alphasource] source: all _bin alphasource out: & tcmat[source] < maxnum / all ways to get to a dest outnum: tcmat[source;out] / actual numbers :all[out[>outnum]] / from farthest to closest } / get all those that start from some source up to some limit alphaalldestslimit:{[alphasource; limit] source: all _bin alphasource out: & tcmat[source] < limit+1 / all ways to get to a dest outnum: tcmat[source;out] / actual numbers :all[out[>outnum]] / from farthest to closest } / get all those that start from some source alphaalldestsimportant:{[alphasource] source: all _bin alphasource out: & tcmatimportant[source] < maxnum / all ways to get to a dest outnum: tcmatimportant[source;out] / actual numbers :all[out[>outnum]] } / limit is some number. Want only limit levels before alphadest alphaallrouteslimit:{[alphadest; limit] paths: alphaallroutes[alphadest] rpaths: |:' paths out: () i: 0 while[i < #rpaths count: (#rpaths[i]) & limit out,: ,|rpaths[i;!count] i+: 1 ] out?: :out } / Is there some route from a root to this alphahasroute:{[alphadest] dest: all _bin alphadest out: & tcmat[;dest] < maxnum / all ways to reach dest :hasintersect[out; allroots] } / find roots in the transitive closure matrix findroots:{[mat] myroots: () i: 0 while[i < #mat[0] / all columns col: mat[;i] col: col _dv 0 / take away self-loop if[maxnum = &/ col myroots,: i ] i+: 1 ] :myroots } / find leaves in the transitive closure matrix findleaves:{[mat] myleaves: () i: 0 while[i < #mat[;0] / all rows row: mat[i] row: row _dv 0 / take away self-loop if[maxnum = &/ row myleaves,: i ] i+: 1 ] :myleaves } / find the name of the product findname:{[symbol] ii: & prodsubstrate.id = symbol :prodsubstrate.name[ii] } findfirstname:{[symbol] ii: & prodsubstrate.id = symbol if[0 = #ii; :()] :prodsubstrate.name[*ii] } findcommonname:{[symbol] ii: & (prodsubstrate.id = symbol) if[0 = #ii; :()] jj: & (prodsubstrate.id = symbol) & (prodsubstrate.common = `yes) if[0 = #jj; :prodsubstrate.name[*ii]] / no common one, return first one :prodsubstrate.name[*jj] / return common one } / This outputs the /the name that matches the symbol findcommonnamesymbol:{[symbol] name: findcommonname[symbol] :(symbol;name) } / When bringing in prodsubstrate, create a new column / prodsubstrate.lowername: lower'' prodsubstrate.name / find substrate ids having name as a substring findsymbol:{[name] ii: & prodsubstrate.lowername _sm\: ("*"),(lower'$name),("*") :((` $ prodsubstrate.name[ii]),' prodsubstrate.id[ii]) } / find substrate ids having name as a substring findsymbolold:{[name] ii: & prodsubstrate.name _sm\: ("*"),($name),("*") :((` $ prodsubstrate.name[ii]),' prodsubstrate.id[ii]) } / find the gene from the affy findgene:{[affyid] affygene.gene[&affygene.affy = affyid]} / findaffy finds the affyid from the gene findaffy:{[gene] affygene.affy[& affygene.gene = gene]} / findreactionsthatuse finds reactions that use molecule / as a substrate findenzymesthatuse:{[symbol] ii: & reactionsubstrate.substrateid = symbol jj: & reactionenergy.reactionid _in\: reactionsubstrate.reactionid[ii] :reactionenergy.enzyme[jj] } /findenzymesthatproduce finds enzyme that have a molecule as a product findenzymesthatproduce:{[symbol] ii: & reactionproduct.productid = symbol jj: & reactionenergy.reactionid _in\: reactionproduct.reactionid[ii] :reactionenergy.enzyme[jj] } / find enzyme(s) with gene geneenzyme( gene, enzyme) findenzyme:{[gene] ii: & geneenzyme.gene = gene :geneenzyme.enzyme[ii] } findgenefromenzyme:{[enzyme] ii: & geneenzyme.enzyme = enzyme :geneenzyme.gene[ii] } / reactionenergy( reactionid, enzyme, freeenergysymbolic) / find reactionid from enzyme findreaction:{[enzyme] ii: & reactionenergy.enzyme = enzyme :reactionenergy.reactionid[ii] } findsubstratefromreaction:{[reactionid] ii: & reactionsubstrate.reactionid = reactionid :reactionsubstrate.substrateid[ii] } findproductfromreaction:{[reactionid] ii: & reactionproduct.reactionid = reactionid :reactionproduct.productid[ii] } /finds the expression value for a gene given the affyid using expression.csv findexpressionvalue:{[affyid] ii: & expression.affyid = affyid :expression.value[ii] } /findexpressionfromgene find the expression value for a gene directly findexpressionfromgene:{[gene] ii: & affygene.gene = gene jj: & expression.affyid _in\: affygene.affy[ii] : (expression.value[jj] ,\: gene) } /findexpressionfromenzyme find the expression value from an enzyme name findexpressionfromenzyme:{[enzyme] ii: & geneenzyme.enzyme = enzyme jj: & affygene.gene _in\: geneenzyme.gene[ii] kk: & expression.affyid _in\: affygene.affy[jj] : (expression.value[kk] ,\: enzyme) } / I've added a if statement (currently deactivated by a /). This statement /will return all reactionids, myenzymes, and genes if every gene doesn't /have an affy, but still returns info on genes w/ an affy. This creates /redundant info in the output. I'd like that redundancy eliminated: that /is out put only the complete set of info (including expression value for /gene with an affy findexpressionfromreactionold:{[reactionid] hh: & reactionenergy.reactionid = reactionid out: () i: 0 while[i < #hh myenzyme: reactionenergy.enzyme[hh[i]] ii: & geneenzyme.enzyme = myenzyme jj: & affygene.gene _in\: geneenzyme.gene[ii] noaffygene: differ[geneenzyme.gene[ii]; affygene.gene[jj]] if[0 < #noaffygene out,: reactionid,/: myenzyme,/: noaffygene ] j: 0 while[j < #jj mygene: affygene.gene[jj[j]] qq: & affygene.gene = mygene kk: & expression.affyid _in\: affygene.affy[qq] out,: reactionid,/: myenzyme,/:mygene,/:(expression.value[kk] ) j+: 1 ] i+: 1 ] :out } / finds regulated things (proteins, genes, molecules, proteins from genes) based on the x.ysymbol (e.g. proteins.prosymbol) / generated by createcombofiles findregtypeweb:{[reg; type; filename] ii: () out: () out,: createheader[filename] if[(type = `"gene") | (type = `"protein_from_gene") if[reg = `I reg_type: `"Up Regulated" ii: & expression.expsymbol = `UR ] if[reg = `R reg_type: `"Down Regulated" ii: & expression.expsymbol = `DR ] if[reg = `NC reg_type: `"Not Changed" ii: & expression.expsymbol = `NC ] ] if[~0 = #ii if[type = `"gene" affyid: expression.affyid[ii] call: expression.call[ii] value: expression.value[ii] expsymbol: expression.expsymbol[ii] gene: findgene' affyid jj: #:'gene ll: & jj = 0 gene[ll]: `"none entered" protein: findenzyme' ,/gene jj: #:'gene ll: & jj = 0 gene[ll]: `"none entered" if[~0 = (#proteins.protein) mm: intersectleftindexes[proteins.protein]'protein ] jj: #:'protein ll: & jj = 0 protein[ll]: `"none entered" ll: & jj = 2 protein_2: spitand' protein[ll] protein_2: ` $ protein_2 protein[ll]: protein_2 gene: ,/gene protein: ,/protein if[0 = (#proteins.protein) out,: ,($STB1), ($SR), ($SC), "Affymetrix ID", ($EC), ($SC), "Gene Expression Call", ($EC), ($SC), "P/A Call", ($EC), ($SC), "Gene Value", ($EC), ($SC), "NCBI gene name", ($EC), ($SC), "Protein(s)", ($EC), ($ER) out,: ,(SR),/:(SC),/:(affyid),'(EC),/:(SC),/:(expsymbol),'(EC),/:(SC),/:(call),'(EC),/:(SC),/:(value),'(EC),/:(SC),/:(gene),'(EC),/:(SC),/:(protein),\:(EC) out,: ,($ET) ] if[~0 = (#proteins.protein) jj: #:'mm ll: & jj = 0 proteinvalue: protein proteinsymb: protein proteinvalue[ll]: `"no data" proteinsymb[ll]: `"no data" kk: & jj > 0 proteinvalue[kk]: proteins.provalue[mm[kk]] proteinsymb[kk]: proteins.prosymbol[mm[kk]] kk: & jj > 1 temp_value: spitand' proteins.provalue[kk] temp_symb: spitand' proteins.prosymbol[kk] temp_value: ` $ temp_value temp_symb: ` $ temp_symb proteinvalue[kk]: temp_value proteinsymb[kk]: temp_symb out,: ,($STB1), ($SR), ($SC), "Affymetrix ID", ($EC), ($SC), "Gene Expression Call", ($EC), ($SC), "P/A Call", ($EC), ($SC), "Gene Value", ($EC), ($SC), "NCBI Gene Name", ($EC), ($SC), "Protein(s)", ($EC), ($SC), "Protein Value", ($EC), ($SC), "Protein Expression Call", ($EC), ($ER) out,: ,(SR),/:(SC),/:(affyid),'(EC),/:(SC),/:(expsymbol),'(EC),/:(SC),/:(call),'(EC),/:(SC),/:(value),'(EC),/:(SC),/:(gene),'(EC),/:(SC),/:(protein),'(EC),/:(SC),/:(proteinvalue),'(EC),/:(SC),/:proteinsymb,\:(EC) out,: ,($ET) ] out,: createtrailer[] out: ` $ ($out) out: ,//out out: ,:'out :spit' out ] if[type = `"protein_from_gene" jj: intersectbothindexesdup[affygene.affy; expression.affyid[ii]] jj: ?jj[;0] kk: intersectbothindexesdup[geneenzyme.gene; affygene.gene[jj]] kk: ?kk[;0] enzyme: geneenzyme.enzyme[kk] ll: intersectleftindexesdup[geneenzyme.enzyme; enzyme] total_enzyme: geneenzyme.enzyme[ll] total_uniqs: ?total_enzyme total_counts: #:' = total_enzyme uenzyme: ?enzyme counts: #:' = enzyme mm: < uenzyme uenzyme@: mm counts@: mm nn: < total_uniqs total_counts@: nn if[0 = (#proteins.protein) out,: ,($STB1), ($SR), ($SC), "Protein", ($EC), ($SC), "Number of Genes in Genome", ($EC), ($SC), "Number of", ($reg_type), "Genes", ($EC), ($ER) out,: ,(SR),/:(SC),/:(uenzyme),'(EC),/:(SC),/:(total_counts),'(EC),/:(SC),/:(counts),\:(EC) out,: ,($ET) ] if[~0 = (#proteins.protein) ll: intersectbothindexes[proteins.protein; uenzyme] oo: intersectleftindexes[uenzyme; uenzyme] proteinsymb: uenzyme proteinsymb[oo]: `"no data" proteinvalue: uenzyme proteinvalue[oo]: `"no data" temp_value: proteins.provalue[ll[;0]] temp_symbol: proteins.prosymbol[ll[;0]] proteinvalue[ll[;1]]: temp_value proteinsymb[ll[;1]]: temp_symbol out,: ,($STB1), ($SR), ($SC), "Protein", ($EC), ($SC), "Number of Genes in Genome", ($EC), ($SC), "Number of", ($reg_type), "Genes", ($EC), ($SC), "Protein Expression", ($EC), ($SC), "Protein Value", ($EC), ($ER) out,: ,(SR),/:(SC),/:uenzyme,'(EC),/:(SC),/:(total_counts),'(EC),/:(SC),/:(counts),'(EC),/:(SC),/:(proteinsymb),'(EC),/:(SC),/:(proteinvalue),\:(EC) ] out,: createtrailer[] out: ` $ ($out) out: ,//out out: ,:'out :spit' out ] ] } findexpressionfromreaction:{[reactionid] hh: & reactionenergy.reactionid = reactionid out: () i: 0 while[i < #hh myenzyme: reactionenergy.enzyme[hh[i]] myenzyme,: () ii: & geneenzyme.enzyme _in\: myenzyme if[0 = #ii mygene: `"there is no gene in Arabidopsis||" out,: spit'reactionid,/: myenzyme,'mygene ] jj: & affygene.gene _in\: geneenzyme.gene[ii] noaffygene: differ[geneenzyme.gene[ii]; affygene.gene[jj]] if[0 < #noaffygene hold: `"|" out,: spit'reactionid,/: myenzyme,/: noaffygene,'hold ] j: 0 genes: ?affygene.gene[jj] while[j < #genes mygene: genes[j] qq: & affygene.gene = mygene kk: & expression.affyid _in\: affygene.affy[qq] out,: spit'reactionid,/:myenzyme,/:mygene,/:(expression.value[kk]),/:(expression.expsymbol[kk]) j+: 1 ] i+: 1 ] :out } / findroutesfromaffy / starting from the affyid, find the gene, enzyme, reaction / Look at substrates and find the destinations of all those substrates. findroutesfromaffy:{[af] x: findsubstratefromreaction',/findreaction'findenzyme[findgene[af]] i:0 out: () while[i < #x zz: ,/ x[i],\:/: allleavesalpha routes: alphafindroutes'[zz[;0];zz[;1]] jj: & 0 < #:'routes / out,: ,(findname'x[i]; findname'''*:'routes[jj]) out,: ,(findfirstname'x[i]; findfirstname'''*:'routes[jj]) i+: 1 ] :out } findreactionfrompair:{[pair] ii: & (directedge.substrateid = pair[0]) & (directedge.productid = pair[1]) :,/(directedge.reactionid[ii]; pair[0]; pair[1]) } / given a path of substrate, product pairs, find the reaction ids findreactionsfrompath:{[path] pairs: ((-1) _ path) ,' (1 _ path) :findreactionfrompair'pairs } findenzymefromreaction:{[reactionid] ii: &reactionenergy.reactionid = reactionid :reactionenergy.enzyme[ii] } /Finds info on affys with expression below a value findaffysbelow:{[value] hh: & expression.value < value :(expression.affyid[hh] ,' expression.value[hh]) } /Finds info on affys with expression below a value findaffysabove:{[value] hh: & expression.value > value :(expression.affyid[hh] ,' expression.value[hh]) } / second argument is 1 if you want detail and 0 otherwise findenzymesabove:{[value; detailflag] :findenzymesval[value; `above; detailflag] } / second argument is 1 if you want detail and 0 otherwise findenzymesbelow:{[value] :findenzymesval[value; `below; detailflag] } / call findaffysbelow to see whether there are affys / with values below the argument. / Then see whether there are genes and enzymes. / If so, then present those. / e.g. findenzymes[2;`above] / third argument is 1 if you want detail and 0 otherwise findenzymesval:{[value; abovebelow; detailflag] if[abovebelow = `below x: findaffysbelow[value] ] if[abovebelow = `above x: findaffysabove[value] ] y: differ[x[;0]; affygene.affy] ii: intersectleftindexes[x[;0]; y] / those for which we have no gene out: x[ii] jj: intersectleftindexes[x[;0]; affygene.affy] if[0 = #jj; :out] x@: jj / only those with a gene kk: intersectleftindexes[affygene.affy; x[;0]] goodgenes: affygene.gene[kk] / find those genes x: goodgenes ,' x / now find genes that are not in enzymes y: differ[x[;0]; geneenzyme.gene] ii: intersectleftindexesdup[x[;0]; y] / those for which there is no enzyme out,: x[ii] jj: intersectleftindexesdup[x[;0]; ?geneenzyme.gene] / if there are duplicate genes on the left side, we will find them if[0 = #jj; :out] x@: jj kk: intersectleftindexesdup[geneenzyme.gene; ?x[;0]] / genes that have an enzyme goodenzymes: geneenzyme.enzyme[kk] / the enzymes goodgenes: geneenzyme.gene[kk] xout: () i: 0 while[i < #x jj: & goodgenes = x[i;0] xout,: goodenzymes[jj],\: x[i] i+: 1 ] x: xout / now find enzymes that are not in reactionenergy.enzyme y: differ[(?x[;0]); reactionenergy.enzyme] ii: intersectleftindexesdup[x[;0]; y] / those having no enzyme / in reactionenergy.enzyme out,: x[ii] jj: intersectleftindexesdup[x[;0]; ?reactionenergy.enzyme] if[0 = #jj; :out] x@: jj kk: intersectbothindexesdup[reactionenergy.enzyme; x[;0]] / enzymes that have a reactionenergy goodrxns: reactionenergy.reactionid[kk[;0]] zz: !#x zz: differ[zz; kk[;1]] out,: x[zz] x@: kk[;1] pathout: () i: 0 while[(i < #x) & detailflag qq: findimportantinfofromreaction[goodrxns[i]] pathout,: ,x[i] pathout,: ,,goodrxns[i] pathout,: qq i+: 1 ] out,: :[detailflag; pathout; x] :spit'out } /New by PMP find symbol that exactly matches name. Seems to be working. findsymbolexact:{[name] ii: & prodsubstrate.name _sm\: name : prodsubstrate.id[ii] } /finds a common name of a gene from gene findgene_common:{[gene] ii: & geneenzyme.gene = gene :geneenzyme.gene_common[ii] } /find gene from gene_common findgenefromgene_common:{[gene_common] ii: & geneenzyme.gene_common = gene_common :geneenzyme.gene[ii] } /find gene from gene_common findgenefromgene_common:{[gene_common] ii: & geneenzyme.gene_common = gene_common :geneenzyme.gene[ii] } /finds the localization of an enzyme from gene findlocfromgene:{[gene] ii: & geneenzyme.gene = gene :geneenzyme.localization[ii] } /finds the localization of an enzyme from gene findlocfromgene:{[gene] ii: & geneenzyme.gene = gene :geneenzyme.localization[ii] } /find the localization of an enzyme from gene_common findlocfromgene_common:{[gene_common] ii: & geneenzyme.gene_common = gene_common :geneenzyme.localization[ii] } recordtime:{[string] x: _ltime _t y: (string) , (": "), ($x[1]) :y } / New from PMP 2/28/02 /finds expression symbol findexpressionsymbol:{[affyid] ii: & expression.affyid = affyid :expression.expsymbol[ii] } findcall:{[affyid] ii: & expression.affyid = affyid :expression.call[ii] } whatcanbemadelimit:{[symbol;limit] paths: whatcanbemade[symbol] out: () i: 0 while[i < #paths count: (#paths[i]) & limit out,: ,paths[i;!count] i+: 1 ] out?: :out } / NEW 3/15/02 / NEW 3/22/02 / finds routes to a molecule using tcmat important alphaallroutesimportant:{[alphadest] dest: all _bin alphadest out: & tcmatimportant[;dest] < maxnum / all ways to reach dest myroots: intersect[out; allroots] :squeezesuffix ,/alphafindroutesimportant[;all[dest]]'all[myroots] } / Is there some route from a root to this / NOT SURE STILL USEFUL alphahasrouteimportant:{[alphadest] dest: all _bin alphadest out: & tcmatimportant[;dest] < maxnum / all ways to reach dest :hasintersect[out; allroots] } / finds routes to a molecule using alphaallroutesimportant, but then cuts / the routes off at a milestone if there is one in the route alphaallroutesimportantstopold:{[alphadest] paths: alphaallroutesimportant[alphadest] rpaths: |:' paths i: 0 out: () while[i < #rpaths jj: intersectleftindexes[1 _ rpaths[i]; mile.milestones] if[0=#jj out,: ,paths[i] / if there is no milestone, give me path ] if[0<#jj jj: (&/jj) + 1 out,: ,|rpaths[i;!jj+1] ] i+: 1 ] out?: / eliminate dups :out } / finds routes to a molecule using alphaallroutesimportant, but then cuts / the routes off at a milestone if there is one in the route alphaallroutesimportantstop:{[alphadest] dest: all _bin alphadest out: & tcmatimportantstop[;dest] < maxnum / all ways to reach dest myroots: intersect[out; allroots,allmilesnumbers] / y:,/alphafindroutesimportant[;all[dest]]'all[myroots] y:,/alphafindroutesimportantstop[;all[dest]]'all[myroots] out: () i: 0 while[i < #y kk: mile.milestones ?/: y[i] ind: &kk < #mile.milestones mymiles: (y[i])[ind] if[((#?mymiles) = #mymiles) & (3 > #mymiles) out,: ,y[i] ] i+: 1 ] / :squeezesuffix[?,/out] :squeezesuffix[?out] } / finds expression info on the enzymes in the paths generated by / alphaallroutesimportantstop, but only enzymes in which the prod and / substrate were given an importance of 1 findexpinfobiosynold:{[alphadest] out: () x: alphaallroutesimportantstop[alphadest] i: 0 while[i < #x out,: ,"New Path " ii: () pairs: ((-1) _ x[i]) ,' (1 _ x[i]) j: 0 while[j < #pairs ii: & (reactionsubstrate.substrateid = pairs[j;0]) & (reactionsubstrate.importance = 1) goodsub: reactionsubstrate.reactionid[ii] jj: & (reactionproduct.productid = pairs[j;1]) & (reactionproduct.importance = 1) goodprod: reactionproduct.reactionid[jj] goodrxn: intersect[goodprod; goodsub] / for this substrate-product pair, may be several / reaction ids where the pair is important. expinfo: findexpressionfromreaction' goodrxn subname: findcommonname[pairs[j;0]] prodname: findcommonname[pairs[j;1]] out,: ,($subname), (" to "), ($prodname) out,: spit'expinfo j+: 1 ] out,: ,"New Path" i+: 1 ] :out } / finds expression info on the enzymes in the paths generated by / alphaallroutesimportantstop, but only enzymes in which the prod and / substrate were given an importance of 1 findregfromgenelist:{[list; reg; method; val1; val2; use_or_not] out: () ii: intersectleftindexes[affygene.gene; list] if[method = `affy if[use_or_not = `not if[reg = `I jj: & (expression.expsymbol = `I) | (expression.expsymbol = `MI) nn: & affygene.affy _in\: expression.affyid[jj] oo: intersect[ii; nn] out: ?affygene.gene[oo] ] if[reg = `D jj: & (expression.expsymbol = `D) | (expression.expsymbol = `MD) nn: & affygene.affy _in\: expression.affyid[jj] oo: intersect[ii; nn] out: ?affygene.gene[oo] ] if[reg = `NC jj: & (expression.expsymbol = `NC) nn: & affygene.affy _in\: expression.affyid[jj] oo: intersect[ii; nn] out: ?affygene.gene[oo] ] ] if[use_or_not = `use if[reg = `I jj: & (expression.expsymbol = `I) | (expression.expsymbol = `MI) kk: & expression.value > val1 ll: intersect[jj; kk] nn: & affygene.affy _in\: expression.affyid[ll] oo: intersect[ii; nn] out: ?affygene.gene[oo] ] if[reg = `D jj: & (expression.expsymbol = `D) | (expression.expsymbol = `MD) kk: & expression.value < val2 ll: intersect[jj; kk] nn: & affygene.affy _in\: expression.affyid[ll] oo: intersect[ii; nn] out: ?affygene.gene[oo] ] if[reg = `NC jj: & expression.expsymbol = `NC kk: & (expression.value > val2) | (expression.value = val2) nn: & (expression.value < val1) | (expression.value = val1) kk: intersect[kk; nn] ll: intersect[jj; kk] nn: & affygene.affy _in\: expression.affyid[ll] oo: intersect[ii; nn] out: ?affygene.gene[oo] ] ] ] if[method = `fold_change if[reg = `I jj: & expression.value > val1 kk: & affygene.affy _in\: expression.affyid[jj] kk: intersect[ii; kk] out: ?affygene.gene[kk] ] if[reg = `D jj: & expression.value < val2 kk: & affygene.affy _in\: expression.affyid[jj] kk: intersect[ii; kk] out: ?affygene.gene[kk] ] if[reg = `NC jj: & (expression.value < val1) | (expression.value = val1) ll: & (expression.value > val2) | (expression.value = val2) jj: intersect[jj; ll] kk: & affygene.affy _in\: expression.affyid[jj] kk: intersect[ii; kk] out: ?affygene.gene[kk] ] ] :out } findexpinfobiosyn:{[alphadest; method; val1; val2; use_or_not] firstsub: () lastprod: () out: () num_steps: () num_rxn: () tg: () indgene: () ncgene: () repgene: () x: alphaallroutesimportantstop[alphadest] i: 0 while[i < #x firstsub,:` $ findcommonname[x[i;0]] rx: |x[i] lastprod,: ` $ findcommonname[rx[0]] j: 0 out,: , "Pathway ", ($(i+1)), ": ", ($firstsub[i]), " to ", ($lastprod[i]) rxns: findreactionsfrompathimportant[x[i]] num_steps,: #rxns frxns: intersect[reactionenergy.reactionid; ,/rxns] num_rxn,: #frxns enz: findenzymefromreaction' frxns gene: ?,/findgenefromenzyme' ,/enz indgene,: ,(findregfromgenelist[gene; `I; method; val1; val2; use_or_not]) repgene,: ,(findregfromgenelist[gene; `D; method; val1; val2; use_or_not]) ncgene,: ,(findregfromgenelist[gene; `NC; method; val1; val2; use_or_not]) tg,: ,(gene) while[j < #rxns sub: findcommonname' rxns[j;0] prod: findcommonname' rxns[j;1] out,: sub " to " prod, k: 2 while[k < #rxns[j] expinfo: findexpressionfromreaction' rxns[j;k] out,: ,expinfo k+: 1 ] j+: 1 ] i+: 1 ] i: 0 out1: , ("Pathway #|# of steps in pathway|# of reactions in pathway|# of genes that encode enzymes in the pathway|# of induced genes|# of repressed gene|#not changed genes") while[i < #num_steps out1,: , ($(i+1)), ("|"), ($num_steps[i]), ("|"), ($num_rxn[i]), ("|"), ($(#tg[i])), ("|"), ($(#indgene[i])), ("|"), ($(#repgene[i])), ("|"), ($(#ncgene[i])) i+: 1 ] out1,: out :out1 } /finds expression info for enzymes that make a molecule for an entire path findreactionsfrompathimportant:{[path] out: () pairs: ((-1) _ path) ,' (1 _ path) j: 0 while[j < #pairs ii: & (reactionsubstrate.substrateid = pairs[j;0]) & (reactionsubstrate.importance = 1) goodsub: reactionsubstrate.reactionid[ii] jj: & (reactionproduct.productid = pairs[j;1]) & (reactionproduct.importance = 1) goodprod: reactionproduct.reactionid[jj] goodrxn: intersect[goodprod; goodsub] out,: ,pairs[j], goodrxn j+: 1 ] :out } /finds expression info for enzymes on where a molecule can go for an entire path findexpinfobioprod:{[alphadest; method; val1; val2; use_or_not] firstsub: () lastprod: () out: () num_steps: () num_rxn: () tg: () indgene: () ncgene: () repgene: () x: whatcanbemadeimportantstop[alphadest] i: 0 while[i < #x firstsub,:` $ findcommonname[x[i;0]] rx: |x[i] lastprod,: ` $ findcommonname[rx[0]] j: 0 out,: ,"Pathway ", ($(i+1)), ": ", ($firstsub[i]), " to ", ($lastprod[i]) rxns: findreactionsfrompathimportant[x[i]] num_steps,: #rxns frxns: intersect[reactionenergy.reactionid; ,/rxns] num_rxn,: #frxns enz: findenzymefromreaction' frxns gene: ?,/findgenefromenzyme' ,/enz indgene,: ,(findregfromgenelist[gene; `I; method; val1; val2; use_or_not]) repgene,: ,(findregfromgenelist[gene; `D; method; val1; val2; use_or_not]) ncgene,: ,(findregfromgenelist[gene; `NC; method; val1; val2; use_or_not]) tg,: ,(gene) while[j < #rxns sub: findcommonname' rxns[j;0] prod: findcommonname' rxns[j;1] out,: sub " to " prod, k: 2 while[k < #rxns[j] expinfo: findexpressionfromreaction' rxns[j;k] out,: ,expinfo k+: 1 ] j+: 1 ] i+: 1 ] i: 0 out1: , ("Pathway #|# of steps in pathway|# of reactions in pathway|# of genes that encode enzymes in the pathway|# of induced genes|# of repressed gene|#not changed genes") while[i < #num_steps out1,: , ("Pathway "), ($(i+1)), ("|"), ($num_steps[i]), ("|"), ($num_rxn[i]), ("|"), ($(#tg[i])), ("|"), ($(#indgene[i])), ("|"), ($(#repgene[i])), ("|"), ($(#ncgene[i])) i+: 1 ] out1,: out :out1 } / get the stoichiometry of a reaction viewreaction:{[rxn] out: () ii: & reactionsubstrate.reactionid = rxn jj: & reactionproduct.reactionid = rxn subs: ($reactionsubstrate.stoich[ii]),' ("X"),/: (findcommonname'reactionsubstrate.substrateid[ii]) strsub: (-3) _ ,/subs ,\: (" + ") prods: ($reactionproduct.stoich[jj]),' ("X"),/: (findcommonname'reactionproduct.productid[jj]) strprod: (-3) _ ,/prods ,\: (" + ") :strsub, (" --> "), strprod } / finds what can be made from a molecule, but only uses alphafindroutesimportant whatcanbemadeimportant:{[substrate] routes: alphafindroutesimportant[substrate]'allleavesalphaimportant ii: & 0 < #:' routes :squeezeprefix ,/routes[ii] } / same as above, but stops at first milestone whatcanbemadeimportantstop:{[alphasource] source: all _bin alphasource out: & tcmatimportantstop[source] < maxnum / all ways to leave from source mydests: intersect[out; allleavesimportant,allmilesnumbers] / y:,/alphafindroutesimportant[all[source]]'all[mydest] y:,/alphafindroutesimportantstop[all[source]]'all[mydest] out: () i: 0 while[i < #y kk: mile.milestones ?/: y[i] ind: &kk < #mile.milestones mymiles: (y[i])[ind] if[((#?mymiles) = #mymiles) & (3 > #mymiles) out,: ,y[i] ] i+: 1 ] :squeezeprefix[?out] } / Find the pathways for important reactions findreactionsthatuseimportant:{[symbol] ii: & (reactionsubstrate.substrateid = symbol) & (reactionsubstrate.importance = 1) :reactionsubstrate.reactionid[ii] } findreactionsthatuse:{[symbol] ii: & reactionsubstrate.substrateid = symbol :reactionsubstrate.reactionid[ii] } findreactionsthatproduce:{[symbol] ii: & reactionproduct.productid = symbol :reactionproduct.reactionid[ii] } /findreactionsthatproduce finds enzyme that have a molecule as a product findreactionsthatproduceimportant:{[symbol] ii: & (reactionproduct.productid = symbol) & (reactionproduct.importance = 1) :reactionproduct.reactionid[ii] } importantsubstratesfromreaction:{[rxn] ii: & (reactionsubstrate.reactionid = rxn) & (reactionsubstrate.importance = 1) :reactionsubstrate.substrateid[ii] } importantproductsfromreaction:{[rxn] ii: & (reactionproduct.reactionid = rxn) & (reactionproduct.importance = 1) :reactionproduct.productid[ii] } / for reaction rxn, find the sources of each substrate and / the end products of each product and put them out. findimportantinfofromreaction:{[rxn] subs: importantsubstratesfromreaction[rxn] i: 0 outsubs: () while[i < # subs sub_routes: alphaallroutesimportantstop[subs[i]] if[0 < # sub_routes outsubs,:(subs[i]),/:(sub_routes[;0]),'(#:'sub_routes) ] i+: 1 ] if[0 < #outsubs outsubs[;0]: findcommonname'outsubs[;0] outsubs[;1]: findcommonname'outsubs[;1] ] prods: importantproductsfromreaction[rxn] i: 0 outprods: () while[i < #prods prod_routes: whatcanbemadeimportantstop[prods[i]] if[0 < # prod_routes outprods,:(prods[i]),/:(*:'|:'prod_routes),'(#:'prod_routes) ] i+: 1 ] if[0 < #outprods outprods[;0]: findcommonname'outprods[;0] outprods[;1]: findcommonname'outprods[;1] ] x: (,,`"Substrates: "), outsubs, (,,`"Products: "), outprods :x } / finds all the enzymes that are in the same path as a given enzyme findpathenzymes:{[enz] enzymesubs: () enzymeprods: () rxnids: findreaction[enz] i: 0 while[i < #rxnids subs: findsubstratefromreaction[rxnids[i]] j: 0 while[j < #subs path: alphaallroutesimportantstop[subs[j]] k: 0 while[k < #path rxns: findreactionsfrompathimportant[path[k]] rxns: ,/rxns ii: intersectleftindexes[reactionenergy.reactionid; rxns] enzymesubs,: reactionenergy.enzyme[ii] k+: 1 ] j+: 1 ] i+: 1 ] i:0 while[i < #rxnids prods: findproductfromreaction[rxnids[i]] j: 0 while[j < #prods path: whatcanbemadeimportantstop[prods[j]] k: 0 while[k < #path rxns: findreactionsfrompathimportant[path[k]] rxns: ,/rxns ii: intersectleftindexes[reactionenergy.reactionid; rxns] enzymeprods: reactionenergy.enzyme[ii] k+: 1 ] j+: 1 ] i+: 1 ] x: (,,`"Substrate enzymes: "), enzymesubs, (,,`"Products enzymes: "),enzymeprods :x } /finds out what it is `affy; `geneid, `genename, `protein, `moleculename /requires everything to match exactly what's in the database identifyit:{[input] type: `unknown affy: intersect[expression.affyid; input] geneid: intersect[affygene.gene; input] geneid,: intersect[geneenzyme.gene; input] genename: intersect[geneenzyme.gene_common; input] protein: intersect[geneenzyme.enzyme; input] reactionid: intersect[reactionenergy.reactionid; input] moleculename: findsymbolexact' input localization: intersect[geneenzyme.localization; input] if[~0 = #affy type: `affy ] if[~0 = #geneid type: `geneid ] if[~0 = #genename type: `genename ] if[~0 = #protein type: `protein ] if[~0 = #reactionid type: `reactionid ] if[(~0 = #moleculename) & (0 = #protein) type: `moleculename ] if[~0 = #localization type: `localization ] :type } findencoding:{[input] ii: & input = geneenzyme.gene :geneenzyme.DNA[*ii] } findsubunit:{[input] ii: & input = geneenzyme.gene :geneenzyme.subunit[ii] } findfx_unit:{[input] out: ` ii: & reactionenergy.enzyme = input if[~0 = #ii out: reactionenergy.fx_unit[*ii] ] :out } linkfunction:{[input] type: identifyit' input chrom: () out: () if[~0 = #type if[type = `affy expsymbol: findexpressionsymbol' input call: findcall' input geneid: findgene' input protein: findenzyme'' findgene' input protein: ,/protein genename: *findgene_common'' *findgene' input exp: findexpressionvalue' input loc: *findlocfromgene'' *findgene' input out: , ("Affy ID: "), ($input), ("; "), ("absent/present call: "), ($*call), ("; "), ("expresssion call: " ),($*expsymbol), ("; "), ("fold change: "), ($*exp) if[~0 = #geneid if[*genename = ` genename: `"none entered" ] if[*loc = ` loc: `"none entered" ] chrom: *findencoding'' *findgene' input rxn: ` vrxn: ` out,: , ("NCBI gene name: "), ($*geneid), ("; "), ("common gene name: "), ($*genename) out,: , (" protein sub-cellular localization: "), ($*loc), ("; encoded by: "), ($*chrom) protein,: () i: 0 if[0 = #protein out,: , ("no protein entered") ] while[i < #protein unit: () subunit: () ii: & (geneenzyme.gene = *geneid) & (geneenzyme.enzyme = protein[i]) subunit: geneenzyme.subunit[*ii] if[subunit = ` out,: , ("protein name: "), ($protein[i]) ] if[~subunit = ` out,: , ("protein name: "), ($protein[i]), (" "), ($geneenzyme.subunit[*ii]), (" subunit") ] unit: findfx_unit' protein[i] if[unit = ` unit: `"none entered" ] out,: , (" functional unit of protein: "), ($*unit) rxn: findreaction' protein[i] j: 0 if[0 = #rxn out,: , ("no reaction for protein") ] rxn: ,/rxn rxn,: () while[j < #rxn vrxn: viewreaction' rxn[j] out,: , ("reaction id: "), ($rxn[j]) out,: ,("reaction: "), ($vrxn) j+: 1 ] i+: 1 ] ] ] if[type = `geneid protein: findenzyme' input protein: ,/protein genename: findgene_common' input affy: findaffy' input affy: ,/affy loc: findlocfromgene' input chrom: *findencoding' input if[*genename = ` genename: `"none entered" ] if[*loc = ` loc: `"none entered" ] out: , ("NCBI gene name: "), ($input), ("; "), ("common gene name: "), ($*genename) out,: , ("sub-cellular localization of protein: "), ($*loc), ("; encoded by: "), ($*chrom) if[0 = #affy out,: , ("no Affy ID") ] i: 0 affy,: () while[i < #affy exp: findexpressionvalue' affy[i] expsymbol: findexpressionsymbol' affy[i] call: *findcall' affy out,: , ("Affy ID: "), ($affy[i]), ("; "), ("absent/present call: "), ($*call), ("; "), ("expresssion call: " ), ($*expsymbol), ("; "), ("fold change: "), ($*exp) i+: 1 ] i: 0 if[0 = #protein out,: , ("no protein entered") ] while[i < #protein subunit: () unit: () ii: & (geneenzyme.gene = input) & (geneenzyme.enzyme = protein[i]) subunit: geneenzyme.subunit[*ii] if[subunit = ` out,: , ("protein name: "), ($protein[i]) ] if[~subunit = ` out,: , ("protein name: "), ($protein[i]), (" "), ($geneenzyme.subunit[*ii]), (" subunit") ] unit: findfx_unit' protein[i] if[unit = ` unit: `"none entered" ] out,: , (" functional unit of protein: "), ($*unit) rxn: findreaction' protein[i] j: 0 if[0 = #rxn out,: , ("no reaction for protein") ] rxn: ,/rxn rxn,: () while[j < #rxn vrxn: viewreaction' rxn[j] out,: , ("reaction id: "), ($rxn[j]) out,: ,("reaction: "), ($vrxn) j+: 1 ] i+: 1 ] ] if[type = `genename geneid: *findgenefromgene_common' input protein: findenzyme' geneid protein: ,/protein affy: findaffy' geneid affy: ,/affy loc: findlocfromgene' geneid chrom: *findencoding' geneid if[*loc = ` loc: `"none entered" ] out,: , ("NCBI gene name: "), ($*geneid), ("; "), ("common gene name: "), ($*genename) out,: , (" protein sub-cellular localization: "), ($*loc), ("; encoded by: "), ($*chrom) if[0 = #affy out,: , ("no Affy ID entered") ] i: 0 affy,: () while[i < #affy exp: findexpressionvalue' affy[i] expsymbol: findexpressionsymbol' affy[i] call: *findcall' affy out,: , ("Affy ID: "), ($affy[i]), ("; "), ("absent/present call: "), ($*call), ("; "), ("expresssion call: " ), ($*expsymbol), ("; "), ("fold change: "), ($*exp) i+: 1 ] i: 0 if[0 = #protein out,: , ("no protein entered") ] while[i < #protein unit: () subunit: () ii: & (geneenzyme.gene = *geneid) & (geneenzyme.enzyme = protein[i]) subunit: geneenzyme.subunit[*ii] if[subunit = ` out,: , ("protein name: "), ($protein[i]) ] if[~subunit = ` out,: , ("protein name: "), ($protein[i]), (" "), ($geneenzyme.subunit[*ii]), (" subunit") ] unit: findfx_unit' protein[i] if[unit = ` unit: `"none entered" ] out,: , (" functional unit of protein: "), ($*unit) rxn: findreaction' protein[i] rxn: ,/rxn j: 0 if[0 = #rxn out,: , ("no reaction for protein") ] rxn,: () while[j < #rxn vrxn: viewreaction' rxn[j] out,: , ("reaction id: "), ($rxn[j]) out,: ,("reaction: "), ($vrxn) j+: 1 ] i+: 1 ] ] if[type = `protein out: , ("protein name: "), ($input) unit: findfx_unit' input if[unit = ` unit: `"none entered" ] out,: , (" functional unit of protein: "), ($*unit) rxn: findreaction' input rxn: ,/rxn j: 0 if[0 = #rxn out,: , ("no reaction for protein") ] rxn,: () while[j < #rxn vrxn: viewreaction' rxn[j] out,: , ("reaction id: "), ($rxn[j]) out,: ,("reaction: "), ($vrxn) j+: 1 ] gene: findgenefromenzyme' input gene: ,/gene i: 0 while[i < #gene genename: findgene_common' gene[i] affy: findaffy' gene[i] affy: ,/affy loc: findlocfromgene' gene[i] if[*genename = ` genename: `"none entered" ] if[*loc = ` loc: `"none entered" ] chrom: *findencoding'' *findgene' input out,: , ("NCBI gene name: "), ($gene[i]), ("; "), ("common gene name: "), ($*genename) out,: , ("sub-cellular localization of protein: "), ($*loc), ("; encoded by: "), ($*chrom) if[0 = #affy out,: , ("no Affy ID") ] j: 0 affy,: () while[j < #affy exp: findexpressionvalue' affy[j] expsymbol: findexpressionsymbol' affy[j] call: *findcall' affy out,: , ("Affy ID: "), ($affy[j]), ("; "), ("absent/present call: "), ($*call), ("; "), ("expresssion call: " ), ($*expsymbol), ("; "), ("fold change: "), ($*exp) j+: 1 ] i+: 1 ] mols: *findsymbolexact' input if[~0 = #mols rxns: findreactionsthatuse' mols rxns,: findreactionsthatproduce' mols i: 0 if[~0 = #rxns out,: , ("Enzymes that use or produce "), ($input) while[i < #rxns enz: findenzymefromreaction' rxns[i] enz: ,/enz out,: , ((-2) _ ,/($enz) ,\: (", ")) , ("; reaction ID: "), ($rxns[i]) viewrxn: viewreaction' rxns[i] out,: , (" reaction: "), viewrxn i+: 1 ] ] ] ] if[type = `reactionid vrxn: viewreaction' input out: , ("reaction id: "), ($input) out,: ,("reaction: "), ($vrxn) protein: findenzymefromreaction' input protein: ,/protein k: 0 protein,: () while[k < #protein unit: () unit: *findfx_unit' protein[k] if[unit = ` unit: `"none entered" ] out,: ,("protein name: "), ($protein[k]), ("; functional unit: "), ($unit) gene: findgenefromenzyme' protein[k] gene: ,/gene gene,: () i: 0 while[i < #gene genename: findgene_common' gene[i] affy: findaffy' gene[i] affy: ,/affy loc: findlocfromgene' input if[*genename = ` genename: `"none entered" ] if[*loc = ` loc: `"none entered" ] out,: , ("NCBI gene name: "), ($gene[i]), ("; "), ("common gene name: "), ($*genename), ("; "), ("sub-cellular localization: "), ($*loc) if[0 = #affy out,: , ("no Affy ID") ] j: 0 affy,: () while[j < #affy exp: findexpressionvalue' affy[j] expsymbol: findexpressionsymbol' affy[j] call: *findcall' affy out,: , ("Affy ID: "), ($affy[j]), ("; "), ("absent/present call: "), ($*call), ("; "), ("expresssion call: " ), ($*expsymbol), ("; "), ("fold change: "), ($*exp) j+: 1 ] i+: 1 ] k+: 1 ] ] if[type = `moleculename moleculeid: findsymbolexact' input jj: & prodsubstrate.id = *moleculeid class: prodsubstrate.classification[*jj] if[class = `small class: `"small molecule" ] ii: & mile.milestones = *moleculeid yorn: `no if[#ii > 0 yorn: `yes ] out: ,("molecule name: "), ($input), ("; "), ("molecule id: "), ($*moleculeid), ("; "), ("mile stone: "), ($yorn), ("; "), ("classification: "), ($class) ii: & reactionsubstrate.substrateid = *moleculeid ii,: () out,: ,("reactions in which "), ($input), (" is a substrate:") i: 0 while[i < #ii rxn: reactionsubstrate.reactionid[ii[i]] importance: reactionsubstrate.importance[ii[i]] out,: ,(" reaction ID: "), ($rxn), ("; "), ("importance: "), ($importance) vrxn: viewreaction' rxn out,: ,("reaction: "), ($vrxn) i+: 1 ] ii: & reactionproduct.productid = *moleculeid ii,: () out,: ,("reactions in which "), ($input), (" is a product") i: 0 while[i < #ii rxn: reactionproduct.reactionid[ii[i]] importance: reactionproduct.importance[ii[i]] out,: ,(" reaction ID: "), ($rxn), ("; "), ("importance: "), ($importance) vrxn: viewreaction' rxn out,: ,(" reaction: "), ($vrxn) i+: 1 ] ] ] :out } showme:{[list; val1; val2] ii: () i: val1 while[val2 > i ii,: i i+: 1 ] :list[ii] } searchdatabase:{[input; type] out: () genecommon: ` gene: ` affy: ` geneid: ` protein: ` exp: ` reaction: ` vrnx: ` input: ` $ lower' ($input) if[type = `affy affyid: ` $ lower'' ($expression.affyid) ii: & affyid = input expsymbol: ` call: ` affy: expression.affyid[ii] exp: expression.value[ii] expsymbol: expression.expsymbol[ii] call: expression.call[ii] jj: & affygene.affy = input if[~0 = #jj geneid: affygene.gene[jj] kk: & geneenzyme.gene _in\: affygene.gene[jj] if[~0 =#kk protein: geneenzyme.enzyme[kk] genecommon: geneenzyme.gene_common[kk] ] ] out: affy, geneid, genecommon, protein, exp, expsymbol, call :spit[out] ] if[type = `gene_systematic ii: & affygene.gene = ` affygene.gene[ii]: `"?" lcgene: ` $ lower'' ($affygene.gene) ii: & lcgene = input if[~0 = #ii geneid: affygene.gene[ii] geneid: ?geneid affy: affygene.affy[ii] exp: findexpressionvalue' affy ] affy_exp: spit' affy,'exp ii: & geneenzyme.gene = ` geneenzyme.gene[ii]: `"?" lcgene: ` $ lower'' ($geneenzyme.gene) jj: & lcgene = input if[~0 = #jj protein: geneenzyme.enzyme[jj] genecommon: geneenzyme.gene_common[jj] geneid: geneenzyme.gene[jj] ] out: geneid, genecommon, protein, affy_exp :spit[out] ] if[type = `gene_common ii: & geneenzyme.gene_common = ` geneenzyme.gene_common[ii]: `"?" lcgene: ` $ lower'' ($geneenzyme.gene_common) ii: & lcgene _sm\: ("*"),($input),("*") ii,: () i: 0 while[i < #ii if[~0 = #ii genecommon: ,geneenzyme.gene_common[ii[i]] geneid: ,geneenzyme.gene[ii[i]] protein: ,geneenzyme.enzyme[ii[i]] ] jj: & affygene.gene = geneenzyme.gene[ii[i]] if[~0 = #jj affy: affygene.affy[jj] exp: findexpressionvalue' affy ] affy_exp: spit' affy,'exp affy_exp: ,affy_exp out,: spit' genecommon,'geneid,'protein,'affy_exp i+: 1 ] ii: < out :out[ii] ] if[type = `protein tempout: () ii: & geneenzyme.enzyme = ` geneenzyme.enzyme[ii]: `"?" lcenzyme: ` $ lower'' ($geneenzyme.enzyme) ii: lcenzyme _sm\: ("*"),($input),("*") jj: & ~0 = ii protein: geneenzyme.enzyme[jj] protein: ?protein i: 0 while[i < #protein reaction: findreaction' protein[i] reaction: ,/reaction rxn_vrxn: () if[~0 = #reaction vrxn: viewreaction' reaction ] j: 0 while[j < #reaction rxn_vrxn,: , spit[reaction[j], ,vrxn[j]] j+: 1 ] tempout,: spit' (protein[i]) ,/: ` $ rxn_vrxn i+: 1 ] ii: < tempout :tempout[ii] ] if[type = `reactionid out: () ii: & reactionenergy.reactionid _sm\: ("*"),($input),("*") reaction,: () i: 0 reaction: ?reactionenergy.reactionid[ii] while[i < #reaction protein: ,reactionenergy.enzyme[ii[i]] if[~0 = #ii vrxn: ,viewreaction' reaction[i] ] out,: spit' reaction[i],'(,vrxn),'protein i+: 1 ] ii: < out :out[ii] ] if[type = `moleculename names: findsymbol' input if[~ 0 = (#names) jj: intersectleftindexes[names[;0]; reactionenergy.enzyme] names: differ[names; names[jj]] ii: < names[;0] :spit' names[ii;0],'names[ii;1] ] if[0 = (#names) :"not found" ] ] if[type = `moleculeid name: `"not in database" nametemp: ,findcommonname' input if[~0 = #nametemp nametemp: name ] :spit[input, name] ] if[type = `all ii: & expression.affyid _sm\: ("*"),($input),("*") out: () out,: ,(expression.affyid[ii]) ii: & affygene.gene _sm\: ("*"),($input),("*") out,: affygene.gene[ii] ii: & geneenzyme.gene_common = ` geneenzyme.gene_common[ii]: `"?" lcgene: ` $ lower'' ($geneenzyme.gene_common) ii: & lcgene _sm\: ("*"),($input),("*") out,: ,geneenzyme.gene_common[ii] ii: & geneenzyme.gene = ` geneenzyme.gene[ii]: `"?" lcgene: ` $ lower'' ($geneenzyme.gene) ii: & lcgene _sm\: ("*"),($input),("*") out,: ,geneenzyme.gene[ii] ii: & geneenzyme.enzyme = ` geneenzyme.enzyme[ii]: `"?" lcenzyme: ` $ lower'' ($geneenzyme.enzyme) ii: & lcenzyme _sm\: ("*"),($input),("*") out,: geneenzyme.enzyme[ii] ii: & reactionenergy.enzyme _sm\: ("*"),($input),("*") out,: reactionenergy.enzyme[ii] ii: & prodsubstrate.name _sm\: ("*"),($input),("*") out,: ` $ prodsubstrate.name[ii] ii: & prodsubstrate.id _sm\: ("*"),($input),("*") out,: ,prodsubstrate.id[ii] ii: & reactionenergy.reactionid _sm\: ("*"),($input),("*") out,: ,reactionenergy.reactionid[ii] out: ?,/out out1: out ii: & out1 _in\: geneenzyme.enzyme out1[ii]: `protein ii: & out1 _in\: geneenzyme.gene out1[ii]: `"NCBI gene name" ii: & out1 _in\: geneenzyme.gene_common out1[ii]: `"common gene name" ii: & out1 _in\: reactionenergy.enzyme out1[ii]: `protein ii: & out1 _in\: reactionenergy.reactionid out1[ii]: `"reaction ID" ii: & out1 _in\: affygene.gene out1[ii]: `"NCBI gene name" ii: & out1 _in\: expression.affyid out1[ii]: `"Affy ID" ii: & ($out1) _in\: prodsubstrate.name out1[ii]: `molecule ii: & out1 _in\: prodsubstrate.id out1[ii]: `"molecule ID" ii: < out out: out[ii] out1: out1[ii] :spit' out,'out1 ] } / to find the enzymes in a path with their counts. / zz: findpathenzymes'reactionenergy.enzyme[!5] / qq: ,/ zz / (?qq) ,' (#:' = qq) / to change records given an index set. / ii -- is the index set / reactionenergy.reactionid[ii]: ` $ ("p") ,/: ($reactionenergy.reactionid[ii]) / Suppose you are putting in information from rows ii of / reactionenergy into prodsubstrate / prodsubstrate.name,: reactionenergy.reactionid[ii] / prodsubstrate.commonname,: (#ii) #`yes / April 16, 2002. Eliminate roots and leaves. There are a lot of loops / find everything that symbol can lead to and then get / an irredundant subset whatcanbemade:{[symbol] xx: alphaalldests[symbol] routes: alphafindroutesminimal[symbol; xx] ii: &0 < #:' routes :,/routes[ii] } whatcanbemadeimportant:{[symbol] xx: alphaalldestsimportant[symbol] routes: alphafindroutesminimalimportant[symbol; xx] ii: &0 < #:' routes :routes[ii] } / looks at alphadests in order and finds the minimal / set of paths (if a dest is in a previous path then don't / compute the path to it). alphafindroutesminimal:{[alphasource; alphadests] mydests: alphadests out: () while[0 < #mydests xx: alphafindroutes[alphasource; *mydests] mydests: differ[mydests; ?,/xx] out,: xx ] :out } /finds important routes designed so that it will handle the cases /where either dest or sources are list / Note that if both are lists of size greater than 1, this doesn't work. alphafindroutesminimalimportant:{[alphasources; alphadests] mydests: alphadests mysources: alphasources out: () if[1 < #mydests while[0 < #mydests xx: alphafindroutesimportant[mysources; *mydests] mydests: differ[mydests; ?,/xx] out,: xx ] ] if[1 < #mysources while[0 < #mysources xx: alphafindroutesimportant[*mysources; mydests] mysources: differ[mysources; ?,/xx] out,: xx ] ] if[(1 = #mysources) & (1 = #mydests) xx: alphafindroutesimportant[*mysources; *mydests] out,: xx ] :out } /finds routes where only mysources is list alphaallroutesalphafindroutesminimal:{[alphasource; alphadests] mysources: alphasource out: () mydests: alphadests while[0 < #mysources xx: alphafindroutes[*mysources; *mydests] mysources: differ[mysources; ?,/xx] out,: xx ] :out } alphaallroutesimportant:{[alphadest] xx: alphaallsourcesimportant[alphadest] routes: alphafindroutesminimalimportant[xx; alphadest] ii: &0 < #:' routes :squeezesuffix ?routes[ii] } alphaallroutes:{[alphadest] xx: alphaallsources[symbol] routes: alphaallroutesalphafindroutesminimal[xx; symbol] routes: ?routes ii: &0 < #:' routes :squeezesuffix routes[ii] } / Peter, ??? This should be faster if you use the function I just sent. alphaallrouteslimit:{[alphadest; limit] paths: alphaallroutes[alphadest] rpaths: |:' paths out: () i: 0 while[i < #rpaths count: (#rpaths[i]) & limit out,: ,|rpaths[i;!count] i+: 1 ] out?: :out } / find the difference between unimportant and important / for each molecule finddif:{[] mysources: alphaallsources'all mysourcesimportant: alphaallsourcesimportant'all out: all,' differ'[mysources;mysourcesimportant] :out } / Peter wants this for a sanity check checktcmat:{[] out: () y: #all x: y * y ii: & ,/tcmat < maxnum if[(0 = #ii) | (x = #ii) out,: ,"tcmat error" ] ii: & ,/tcmatimportant < maxnum if[(0 = #ii) | (x = #ii) out,: ,"tcmatimportant error" ] :out } / LOCATION-DEPENDENT PATH EXPLORE / #geneenzyme|gene|enzyme|gene_common|localization|subunit|DNA / #reactionenergy|reactionid|enzyme|freeenergysymbolic|fx_unit|num_protein findlocation:{[reactionid] ii: & reactionenergy.reactionid = reactionid myenzymes: reactionenergy.enzyme[ii] jj: myenzymes ?/: geneenzyme.enzyme kk: & jj < #myenzymes :?geneenzyme.localization[kk] } / make a directedge that is localized / `productid `substrateid `reactionid / So, in the result, substrateid will have a substrateid plus a location / productid will have a productid plus a location / reaction id will be as before / except that it will be the transportid for transportation constructlocal:{[directedge] reactions: directedge.reactionid localizations: findlocation'reactions x.substrateid: () x.productid: () x.reactionid: () i: 0 while[i < #directedge.substrateid myloc: localizations[i] x.substrateid,: ` $ ($directedge.substrateid[i]),/: ($myloc) x.productid,: ` $ ($directedge.productid[i]),/: ($myloc) x.reactionid,: (#myloc) # directedge.reactionid[i] i+: 1 ] kk: transporterbase.transportid ?/: transportmolecule.transportid x.substrateid,:` $ ($transportmolecule.molecule),'($transporterbase.base[kk]) x.productid,:` $ ($transportmolecule.molecule),'($transporterbase.target[kk]) x.reactionid,: transportmolecule.transportid :x } / DATA / reactionenergy.reactionid: `"1.1.1.23",() / reactionenergy.enzyme: ,`"aspartate aminotransferase" / reactionenergy.freeenergy: ,0 / reactionenergy.freeenergysymbolic: ,`L \l peter.k /recordtime["first input"] if[mode _in `default `prepare `preparemin inputfromfile["reactionproduct.csv"] reactionproduct.stoich: 0.0 $ $ reactionproduct.stoich reactionproduct.importance: 0.0 $ $ reactionproduct.importance "reactionproduct" 1: reactionproduct inputfromfile["reactionsubstrate.csv"] reactionsubstrate.stoich: 0.0 $ $ reactionsubstrate.stoich reactionsubstrate.importance: 0.0 $ $ reactionsubstrate.importance "reactionsubstrate" 1: reactionsubstrate inputfromfile["directedge.csv"] jj: & (~ directedge.substrateid = `) & (~ directedge.productid = `) directedge.reactionid@: jj directedge.substrateid@: jj directedge.productid@: jj "directedge" 1: directedge inputfromfile["milestones.csv"] / milestones will be used as starting and ending points of a path. "mile" 1: mile inputfromfile["reactionenergy.csv"] "reactionenergy" 1: reactionenergy inputfromfile["geneenzyme.csv"] "geneenzyme" 1: geneenzyme inputfromfile["transporterbase.csv"] "transporterbase" 1: transporterbase inputfromfile["transportmolecule.csv"] "transportmolecule" 1: transportmolecule ] all: ?directedge.substrateid, directedge.productid all@: < all maxnum: 2 * #all / EXECUTION if[mode _in `default `prepare recordtime["build matrix"] mat: build[all; directedge] recordtime["do transitive closure"] tcmat: tc[mat] recordtime["find roots and leaves"] allroots: findroots[tcmat] allleaves: findleaves[tcmat] allrootsalpha: all[allroots] allleavesalpha: all[allleaves] / localization stuff directedgelocal: constructlocal[directedge] / end of localization stuff / all remains the original directedge elements. inindexes: () i: 0 while[i < #directedge.substrateid jj: & (reactionsubstrate.substrateid = directedge.substrateid[i]) & (reactionsubstrate.reactionid = directedge.reactionid[i]) if[0 = #jj; ` 0: ,"No matching substrate" ` 0: ,($ directedge.substrateid[i]) ` 0: ,($ directedge.reactionid[i]) / !-1 ] / peter messed up if[1 < #jj ` 0: ,"Too many matching substrates" ` 0: ,($ directedge.substrateid[i]) ` 0: ,($ directedge.reactionid[i]) / !-1 ] / peter messed up jj: * jj badflag: reactionsubstrate.importance[jj] > 1 if[~ badflag jj: & (reactionproduct.productid = directedge.productid[i]) & (reactionproduct.reactionid = directedge.reactionid[i]) if[0 = #jj; ` 0: ,"No matching products" ` 0: ,($ directedge.productid[i]) ` 0: ,($ directedge.reactionid[i]) / !-2 ] / peter messed up if[1 < #jj ` 0: ,"Too many matching products" ` 0: ,($ directedge.productid[i]) ` 0: ,($ directedge.reactionid[i]) / !-2 ] / peter messed up jj: * jj badflag: reactionproduct.importance[jj] > 1 ] if[~ badflag; inindexes,: i] i+: 1 ] directedgeimportant.substrateid: directedge.substrateid[inindexes] directedgeimportant.productid: directedge.productid[inindexes] directedgeimportant.reactionid: directedge.reactionid[inindexes] recordtime["build important matrix"] matimportant: build[all; directedgeimportant] recordtime["do transitive closure on important matrix"] tcmatimportant: tc[matimportant] allmiles: mile.milestones allmilesnumbers: all _bin/: allmiles tcmatimportantstop: tcstop[matimportant; allmilesnumbers] ] if[mode _in `default `prepare `preparemin / localization alllocal: ?directedgelocal.substrateid,directedgelocal.productid alllocal@: < alllocal maxnum: 2 * #alllocal matlocal: build[alllocal; directedgelocal] recordtime["do transitive closure of local"] tcmatlocal: tc[matlocal] recordtime["find roots and leaves of local"] allrootslocal: findroots[tcmatlocal] allleaveslocal: findleaves[tcmatlocal] allrootsalphalocal: alllocal[allrootslocal] allleavesalphalocal: alllocal[allleaveslocal] maxnum: 2 * #all / for other purposes "directedgelocal" 1: directedgelocal / end of localization inputfromfile["expression.csv"] expression.value: 0.0 $ $ expression.value "expression" 1: expression inputfromfile["affygene.csv"] "affygene" 1: affygene inputfromfile["oldaffygene.csv"] "oldaffygene" 1: oldaffygene inputfromfile["prodsubstrate.csv"] prodsubstrate.name: $ prodsubstrate.name prodsubstrate.lowername: lower'' prodsubstrate.name "prodsubstrate" 1: prodsubstrate ] if[mode _in `query `preparemin tcmat: 1: "tcmat" tcmatimportant: 1: "tcmatimportant" tcmatimportantstop: 1: "tcmatimportantstop" allmilesnumbers: 1: "allmilesnumbers" allroots: 1: "allroots" allleaves: 1: "allleaves" allrootsalpha: 1: "allrootsalpha" allleavesalpha: 1: "allleavesalpha" . ("\ \\l "), filename ] allleavesimportant: findleaves[tcmatimportant] allleavesalphaimportant: all[allleavesimportant] if[mode _in `prepare `default "mat" 1: mat "tcmat" 1: tcmat "allroots" 1: allroots "allleaves" 1: allleaves "allrootsalpha" 1: allrootsalpha "allleavesalpha" 1: allleavesalpha "tcmatimportant" 1: tcmatimportant "tcmatimportantstop" 1: tcmatimportantstop "allmilesnumbers" 1: allmilesnumbers . "\\\\" ] checktcmat[] ` 0: ,"Set-up is done. Go ahead and play." / \l peter / "tmpout" 0: findenzymesval[2;`above;1] / "tmpout2" 0: findenzymesval[2;`above;0] / thosewithroutes: all[& alphahasroute'all] / thosewithimportantroutes: all[& alphahasrouteimportant'all] / differ[thosewithroutes; thosewithimportantroutes]