/ recordtime["starting peter.k"]
/ findcommonnamesymbol[`"00051"]

inputfromfile["expression.csv"]
expression.value: 0.0 $ $ expression.value


/find the things regulate reg are `I, `R, `NC, types are `affy, `gene_systematic, 
/and protein, method affymetrix or fold_change, when method is fold_change 
/exp_value1 is used to find induced genes exp_value 2 is used to find repressed genes
findregtype:{[reg; type; method; exp_value1; exp_value2]
     ii: ()
     out: ()
     if[reg = `I
          if[method = `affymetrix
            ii: & (expression.expsymbol = `I) | (expression.expsymbol = `MI) 
          ]
          if[method = `fold_change
            ii: & expression.value  > exp_value2
          ]
     ]
     if[reg = `R
          if[method = `affymetrix
            ii: & (expression.expsymbol = `D) | (expression.expsymbol = `MD)
          ]
          if[method = `fold_change
            ii: & expression.value  < exp_value2
          ]
     ]
     if[reg = `NC
          if[method = `affymetrix
            ii: & expression.expsymbol = `NC
          ]
          if[method = `fold_change
            ii: & ~ expression.value > exp_value1
            jj: & ~ expression.value < exp_value2
            ii: intersect[ii; jj]
          ]      
     ]
     if[type = `affy
           out: spit' expression.affyid[ii],'expression.expsymbol[ii],'expression.call[ii],'expression.value[ii]
           :out
          ]
     if[type = `gene_systematic
          jj: intersectbothindexesdup[affygene.affy; expression.affyid[ii]]
          oo: intersectleftindexes[expression.affyid; affygene.affy[jj[;0]]]
          out: spit' affygene.gene[jj[;0]],'expression.affyid[oo],'expression.expsymbol[oo],'expression.call[oo],'expression.value[oo]             
          :out
          ]
      if[type = `protein
          jj: intersectbothindexesdup[affygene.affy; expression.affyid[ii]]
          jj: ?jj[;0]
          kk: intersectbothindexesdup[geneenzyme.gene; affygene.gene[jj]]
          kk: ?kk[;0]
          enzyme: geneenzyme.enzyme[kk]
          ll: intersectleftindexesdup[geneenzyme.enzyme; enzyme]
          total_enzyme: geneenzyme.enzyme[ll]
          total_uniqs: ?total_enzyme
          total_counts: #:' = total_enzyme
          uniqs: ?enzyme
          counts: #:' = enzyme
          mm: < uniqs
          uniqs@: mm
          counts@: mm
          nn: < total_uniqs
          total_counts@: nn
          out: spit' uniqs,'counts,'total_counts
          :out         
       ]
}


showme:{[list; val1; val2] 
ii: ()
i: val1
while[val2 > i
  ii,: i
  i+: 1
]
:list[ii]
}
 
cartprod:{[listoflists]
    numlists: #listoflists
    start: listoflists[0]
    i: 1
    while[i < numlists
        start: ,/start ,/:\: (listoflists[i],())
        i+: 1
    ]
    :start
}

/this function is designed to take all the enzymes that fit a desired regulation (I, R, NC) find their substrates
/and products then build all the pathways from them.  Then return only pathways that contain at least one enzyme
/that fit the original desired regulation.  It also returns the info on the regulation of all the enyzmes into the 
/pathway and the molecules that start and end the pathway and out puts the pathway based on % of steps that fit 
/into the original desired regulation 
findregpathways:{[reg; type; method; exp_value1; exp_value2]
   out: ()
   reg: `I
   type: `protein
   method: `affymetrix
   exp_value1: 0
   exp_value2: 0
 /this finds enzymes that matched the desired regulation 
 / I could make this not like this and use a series of
  /ifs and do it when I do the rest.  
  / It outputs how many genes for that enzyme matched the 
  / desired regulation and 
  /how many genes for that enzyme are into the genome
   reglist: findregtype[reg; type; method; exp_value1; exp_value2]
   regenzymes: ()
   indenzymes: ()
   repenzymes: ()
   ncenzymes: ()
   i: 0
/gets just the enzymes out of reglist
   while[i < #reglist
      jj: & reglist[i] = "|"
      regenzymes,: ,reglist[i;!jj[0]]
      i+: 1
   ]
   xx: intersectleftindexes[$reactionenergy.enzyme; regenzymes]
   regrxn: reactionenergy.reactionid[xx]
  /similar to reglist whic is above    
   indlist: findregtype[`I; type; method; exp_value1; exp_value2]
   replist: findregtype[`R; type; method; exp_value1; exp_value2]
   nclist: findregtype[`NC; type; method; exp_value1; exp_value2]
   i: 0
/gets just the enzymes out of indlist
   while[i < #indlist
      jj: & indlist[i] = "|"
      indenzymes,: ,indlist[i;!jj[0]]
      i+: 1
   ] 
   i: 0
   while[i < #replist
      jj: & replist[i] = "|"
      repenzymes,: ,replist[i;!jj[0]]
      i+: 1
   ] 
   i: 0
   while[i < #nclist
      jj: & nclist[i] = "|"
      ncenzymes,: ,nclist[i;!jj[0]]
      i+: 1
   ]
/finds the molecules that are substrates and products for all the reactions for all the enzymes that are of the 
/desired regulation
   mollist: findsubstratefromreaction' regrxn
   mollist,: findproductfromreaction' regrxn
   mollist: ?,//mollist
   routes: ()
/this loop takes a molecule and identifies routes it's into.  To be sure I don't get only parts of paths I ask how 
/to make the molecule take the first thing from that and ask what can be made from the molecule so the final 
/route will go from milestone to milestone
   while[~0 = #mollist
     listroute: ()
     temproutes: alphaallroutesimportantstop[*mollist]
     if[0 < #,/temproutes
        j: 0

        while[j < #temproutes
          vv: temproutes[j;0]
          routes,: whatcanbemadeimportantstop[vv]
          listroute,: ,//routes
          j+: 1
        ]
      ]
     listroute,: *mollist
     mollist: differ[mollist; listroute] 
    ] 
    routes: ?routes 
/if no routes were generated than there is no reason to forward
    if[~0 = #routes
       keeppaths: ()
       m: 0
/this loop generates what I'm calling a path for this function.  A path starts with the molecule that started the 
/route then the last molecule into the route and then all the reactions necessary for the conversion.  Into the end 
/all the paths are put into keeppaths
       while[m < #routes
                pairs: ((-1) _ routes[m]) ,' (1 _ routes[m])
                n: 0
                goodrxns: ()
                paths: ()
                while[n < #pairs
                     ii: & (reactionsubstrate.substrateid = pairs[n;0]) & (reactionsubstrate.importance = 1)
                     goodsub:  reactionsubstrate.reactionid[ii]
                     jj: & (reactionproduct.productid = pairs[n;1]) & (reactionproduct.importance = 1)
                     goodprod: reactionproduct.reactionid[jj]
                     goodrxns,: ,intersect[goodprod; goodsub]
                     n+: 1
               ]
               rm: #routes[m]
               goodpaths: cartprod[goodrxns]
               keeppaths,: ,(routes[m;0],'routes[m;(rm-1)] ,/: ?goodpaths)
                m+: 1
       ]
       keeppaths: ?,/keeppaths
       enzpath: ()
       per_reg: ()
       sub_prod: ()
       l: 0
/the objective of this loop is to start to assemple the final out put.  All the reactions have to be converted 
/to enzymes.  The regulation of the enzymes have to be determined (this info can be gotten from indlist, 
/replist and nclist, and the number of genes for that enzyme has to be found
       while[l < #keeppaths
           tempenz: findenzymefromreaction' keeppaths[l]
           num_reg: intersect[$,/tempenz; regenzymes]
/if none of the enzymes is the path match the desired regulation the path can be discarded
           if[~0 = #num_reg
              per_reg,: (#num_reg)%(#,/tempenz) /this determines the percent of the steps into the path that match the desired 
/regulation into the end the output will be sorted by this percentage
              ip: intersectbothindexes[$,/tempenz; indenzymes] /find places where the enzymes into the path match 
/the induced enzymes
              rp: intersectbothindexes[$,/tempenz; repenzymes] /same for repressed 
              ncp: intersectbothindexes[$,/tempenz; ncenzymes] /same for not changed
              num_pos: intersectleftindexes[,/tempenz; ,/tempenz]  /this creates something to put the number of 
/induced repressed and not changed into                          
              num_pos[num_pos]: 0 /this sets them to 0
              ind_num:num_pos /creates a counter for induced
              rep_num:num_pos /repressed
              nc_num:num_pos /not changed
              genome_num: num_pos /the number of genes for that enzyme into the genome
              p: 0
/this loops gets the number of times induced for each enzyme into the path.  It also gets the number of times the 
/enzyme has a gene into the genome if the enzyme was induced.  It puts those number in ind_num and genome_num
              while[p < #ip
                rlist: |indlist[ip[p;1]]
                ii: & rlist = "|"
                genome_num[ip[p;0]]: ,(|rlist[!ii[0]])
                ind_num[ip[p;0]]: ,(|showme[rlist;(ii[0]+1);ii[1]])
                p+: 1
              ]
              p: 0
              while[p < #rp 
                rlist: |replist[rp[p;1]]
                ii: & rlist = "|"
                genome_num[rp[p;0]]: ,(|rlist[!ii[0]])
                rep_num[rp[p;0]]: ,(|showme[rlist;(ii[0]+1);ii[1]]) 
               p+: 1
              ]
               p: 0
              while[p < #ncp
                rlist: |nclist[ncp[p;1]]
                ii: & rlist = "|"
                genome_num[ncp[p;0]]: ,(|rlist[!ii[0]])
                nc_num[ncp[p;0]]: ,(|showme[rlist;(ii[0]+1);ii[1]])
                p+: 1
              ]
/if after going throught the indlist, nclist, and replist a gene for an enzyme has still not been found we go to 
 /geneenzme.enzyme to find genes
              genome_num: 0 $ $ genome_num
              gzero: & (,/genome_num) = 0
              r: 0
              while[r < #gzero
                  ng: & geneenzyme.enzyme = (,/tempenz)[g0[r]]
                  genome_num[g0[r]]: #ng
                  r+: 1
              ]
              temp_mol: findcommonname' keeppaths[l] /this finds the name of the molecules that start and end the 
/path
              sub: ` $ ,(temp_mol[0]) /puts the first substrate into sub
              prod: ` $ ,(temp_mol[1]) /puts the final product into prod
              outtemp: ,"New Path" /states that a new path is starting
              outtemp,: spit' sub,'prod /gives the molecules that start and end the path
              outtemp,: spit' (,/tempenz),'ind_num,'rep_num,'nc_num,'genome_num /gives the enzymes and their regulation is 
              out,: ,(outtemp)
/the path
          ]               
          l+: 1
      ]
   ]        
ii: > per_reg /finds where per_reg is the highest
out@: ii /sorts out so the paths with the highest percentage of enzymes with desired regulation are first in the 
/out put
:,/out
}

"tmpout" 0: findregpathways[`I; `protein; `affymetrix; 2; 3]