/ cutoff determines bootstrap value below which we ignore branches / The idea here is that we will take a case where there is / a master tree and a lot of individual trees and try to see where / there is the most likelihood of interbreeding. / For each individual gene, we compare the master with the / individual. If they are incompatible, we try to determine / which graph would make the most sense. / S1: M1 G1 / S2: M2 G2 / S3: M1 G2 / This seems to suggest M1 --> G / and G2 --> M so there is a cycle. / We could leave out any of these. / Suppose we left out S2. / M G / M1 G / M1 G1 / M1 G2 / This would tell us that M G is an ancestor as is M1 G2. / M1 G2 is interesting because it's very close to an existent species. / Maybe we could register which master nodes are taken. spit:{[list] ,/ ($list) ,\: (" ")} spitcomma:{[list] (-1) _ ,/ ($list) ,\: (",")} spitvert:{[list] (-1) _ ,/ ($list) ,\: ("|")} spithyphen:{[list] (-1) _ ,/ ($list) ,\: ("-")} spitstrip:{[list] x: stripbrackets'list :spit[x] } stripbrackets:{[el] x: $el ii: & ~ x _in\: "[] " :` $ x[ii] } processold:{[line] if[2 > #line; :()] out: () i: 0 while[i < #line char: line[i] flag: char _in ";0123456789" if[~flag out,: :[char = ","; " "; char] ] i+: 1 ] :out } / cut out any line below bootstrap cutoff but otherwise parse is the / same. / Also get gene list for yeast genes processold2:{[line] if[2 > #line; :()] if[("Y") = line[0] namelist,: ,line :() ] out: () doingnumber: 0 nums: () curnumstring: () i: 0 while[i < #line char: line[i] numflag: char _in "0123456789" flag: (char = (";")) | numflag if[~flag out,: :[char = ","; " "; char] ] if[numflag if[(~doingnumber); curnumstring: char] / starting number if[doingnumber; curnumstring,: char] / continuing number ] if[doingnumber & (~numflag) x: 0 $ curnumstring nums,: x ] doingnumber: :[numflag; 1; 0] i+: 1 ] goodlines,: cutoff < &/nums resultlines,: ,out :() } / get rid of final paren pair / e.g. from "((((z bkjklj) jk)" / to "(((z bkjklj) jk" huntparens:{[line] rline: |line i: rline ? ")" oline: rline[!i] rline: (i+1) _ rline i: rline ? "(" oline,: rline[!i] rline: (i+1) _ rline oline,: rline :|oline } / cut out any line below bootstrap cutoff but otherwise parse is the / same. / Also get gene list for yeast genes process:{[line] if[2 > #line; :()] if[("Y") = line[0] namelist,: ,line :() ] out: () doingnumber: 0 nums: () curnumstring: () origlines,: ,line i: 0 while[i < #line char: line[i] numflag: char _in "0123456789" flag: (char = (";")) | numflag if[~flag out,: :[char = ","; " "; char] ] if[numflag if[(~doingnumber); curnumstring: char] / starting number if[doingnumber; curnumstring,: char] / continuing number ] if[doingnumber & (~numflag) x: 0 $ curnumstring nums,: x if[x < cutoff; out: huntparens[out]] ] doingnumber: :[numflag; 1; 0] i+: 1 ] / goodlines,: cutoff < &/nums / don't do this resultlines,: ,out :() } add:{[num; line] :line, (" [gene"),($num),("]") } / look at the phylogenies and get the ones that are most frequent findfreq:{[phylogenies; namelist] part: = phylogenies mynamelist: () firstnamelist: () allnamelist: () k: 0 while[k < #part mynamelist,: ` $ (-1) _ spit[namelist[part[k]]] firstnamelist,: ` $ ("["), ((namelist[part[k]])[0]), ("]") allnamelist,: ` $ ("["), (spitcomma (namelist[part[k]])), ("]") k+: 1 ] firsts: (-1) + *:'part uniqs: ?phylogenies counts: #:' part ii: & 0 < counts / every tree when 0 "tmpgroups" 1: (stripbrackets'firstnamelist),'(stripbrackets'allnamelist) x:(` $ uniqs[ii]),'(counts[ii]),'(firstnamelist[ii]),'(mynamelist[ii]) ii: > x[;1] :x[ii] } / PROCESSING cutoff: 80 / any bootstrap value less than this, then ignore that split if[1 > #_i ` 0: ,"k driver graphlife2 (or some such input file)" . "\\\\" ] a: 0: _i[0] resultlines: () origlines: () goodlines: () namelist: () x: process'a namelist: (,"master"), namelist if[0 / we no longer kill lines goodlines[0]: 1 / master must be one of them ii: & goodlines / get the cutoff resultlines@: ii goodlines@: ii namelist@: ii ] "tmporig" 0: origlines,'namelist most: findfreq[resultlines; namelist] most rest: 1 _ resultlines // genenum: !#rest // resultlines2: add[;]'[genenum;rest] // out: ,"rm result*" // i: 0 // while[i < #resultlines2 // a: (all; resultlines2[i]) // (("tmp"),($i)) 0: a // out,: ,("k graphlife tmp"),$i // i+: 1 // ] // "runall" 0: out "minigraph" 0: spit'((most[;0]),'(most[;2])) xx: most[& most[;1] > 1] yy: spitstrip[xx[;2]] / if[~ `"[master]" _in xx[;2] / x: ("k graphlife minigraph master "), yy / ] / if[`"[master]" _in xx[;2] x: ("k graphlife minigraph "), yy / ] "minirun" 0: x \\