/ Combinatorial design
/ Given a set of attributes with their values, generate all
/ possible experiments such that every combination of every pair
/ of attribute values is present in at least one experiment.
/ That goes into the table tmpexpercsv.
/ Here is a typical input format. The last line concerning pivots is optional.
/ foo 1 2 3
/ bar 1 2 3 4
/ zoo 1 2
/ dodo a b
/ zin c
/ fump c f
/ pivot: zoo 


x: _gtime _t
y: ((x[1]) ! 1003) _draw 300


/ One way to do this is that table expandexper varies each variable through
/ all its values and gets the combinations of the others from table exper.
/ It does this for attribute A by considering each value of A vi
/ and creating all rows in exper with A replaced by vi
/ and does this for each A value.
/ At the end, duplicates are removed.
/ This can be used as follows. If we want to test a particular variable.
/ This tells us how to test it. e.g. to test illumination look at experexpand.
/ That one tells under lots of other variations what is going on.
/ But that isn't optimal. Some rows are still there just for the sake
/ of the removed attribute. 

/ But the philosophy is correct.
/ A better way if you want to expand around some 
/ set of attributes A, B etc. is to reduce these to single values
/ in the attvalues list. 
/ Then take each possible set of pivot attributes with all their values
/ and repeat the exper table for each of those.
/ No duplicate removal is necessary.


/ March, 2002: adding randomness improves the design greatly.
/ Set up:
/ 1. Must be an even number of attributes in attnames. 
/ If not, add an attribute Dummy with one value.
/ 2. experexpand must have same attributes as exper.
/ 3. attnames and attnamesexpand must be lists (so enlist if necessary).
/ 4. Input specifies attnamesexpand just as a bunch of arguments.
` 0: ,"Usage: k genericcomb file [number of iterations]"
` 0: ,"File format: {attname values}+ [pivot: attnames]"


/ BASIC ROUTINES


/ find difference between list[0] and list[1]
listdiff:{[list]
  :differ[list[0]; list[1]]
}

/ returns one if x is a subset of y
subset:{[x; y]
  i: y ?/: x
  : ~ (#y) _in i
}

/ returns one if x is a subset of y
subset:{[x;y] (#y) > |/ y ?/: x}        

differ:{[x;y]
  x,: ()
  y,: ()
  i: y ?/: x
  j: & i = #y
  :?x[j]
}

/ A faster difference, yielding indexes in x that differ from y
differindexes:{[x;y]
  i: y ?/: x
  j: & i = #y
  :j
}


/finds intersection of two lists
/ fastest of all
intersect: {[x;y]
  x,: ()
  y,: ()
  i: x ?/: y
  :x[(?i) _dv #x]
}

/finds intersection of two lists
/ fastest of all
intersect: {[x;y]
  x,: ()
  y,: ()
  if[(#x) < (#y)
    i: x ?/: y
    j: & i < #x
    :x[?i[j]]
  ]
  i: y ?/: x
  j: & i < #y
  :y[?i[j]]
}

/finds intersection of two lists
/ fastest of all
hasintersect: {[x;y]
  x,: ()
  y,: ()
  i: x ?/: y
  : (&/i) < #x
}

/ x is a proper subset of y
propersubset:{[x;y]
  x,: ()
  y,: ()
  if[~ (#x) < (#y); :0] / must be smaller
  :(#x) = (#intersect[x;y])
}

/ x is a proper subset of y
propersubset:{[x;y]
  x,: ()
  y,: ()
  if[~ (#x) < (#y); :0] / must be smaller
  :subset[x;y]
}

/finds indexes in x and y that intersect
/ If x and y are both sets, then the results will be of the same length
/ fastest of all
intersectindexes: {[x;y]
  i: x ?/: y / where each y hits
  j: & i < #x / those ys that hit
  :(i[j];j)
}

/finds indexes in x that intersect with y
intersectleftindexes: {[x;y]
  i: x ?/: y / where each y hits
  j: & i < #x / those ys that hit
  :i[j]
}

/finds intersection of two lists
/ and returns index pairs of matches. Assumes no duplicates
/ in either list
intersectbothindexes: {[x;y]
  x,: ()
  y,: ()
  i: x ?/: y
  pairs: (i ,' (!#y))
  k: & pairs[;0] < #x
  :pairs[k]
}

/ finds intersection of two lists that may have duplicates
bagintersectbothindexes:{[x;y]
  alreadyused: out: ()
  i: 0
  while[i < #x
	my: x[i]
	jj: & y ~\: my
	jj: differ[jj; alreadyused]
	if[0 < #jj
		out,: ,(i;*jj)
		alreadyused,: *jj
	]
	i+: 1
  ]
  :out
}

/ intersect many lists
multiintersect:{[lists]
 size: #lists
 if[2 > size; :lists]
 first: lists[0],()
 jj: ,/ ?:' first (?/:)/: lists[1+ !(size-1)] / find indexes in first
 x: @[(1+#first) # 0; jj; + ; 1]
 x: (-1) _ x / delete missing entry
 kk: & x = size - 1
 :first[kk]
}

/ this is a set intersection so we remove duplicates
multiintersect:{[lists]
 size: #lists
 if[2 > size; :lists]
 first: lists[0],()
 jj:  first ?/: (,/ ?:' lists[1+ !(size-1)]) / find indexes in first
 x: @[(1+#first) # 0; jj; + ; 1]
 x: (-1) _ x / delete missing entry
 kk: & x = size - 1
 :first[kk]
}

avg:{(+/ x) % # x}

var:{avg[_sqr x] - _sqr avg[x]}
std:{_sqrt var[x]}
cov:{avg[x * y] - avg[x] * avg[y]}
corr:{ (cov[x;y])%((std[x]) * (std[y]))}
/ delay based search
corrdelay:{[delay;x;y] 
  x: (-delay) _ x
  y: delay _ y
  (cov[x;y])%((std[x]) * (std[y]))}


/ END BASICS


/ DUMP TABLE dumptable


/ formstring takes a list and makes a string
formstring:{[list]
  list,: ()
  : (-1) _ ,/ ($list) ,\: (" ")
}

formstringvertbar:{[list]
  list,: ()
  : (-1) _ ,/ ($list) ,\: ("|")
}

formstringcomma:{[list]
  list,: ()
  : (-1) _ ,/ ($list) ,\: ("|")
}


/ Output a table (a variable) to a text file outfile (string)
/ e.g.   output[`guide; guide; "foobar"]
dumptable:{[tablename; table; outfile]
  out: ()
  first: *!table
  numofelements: . ("#"), ($tablename), ("."), ($first)
  i: 0
  while[i < numofelements
	list: table[;i]
	x: formstrincomma'list
	out,: , ($x[0]), (":	"), (-1) _ ,/(1 _ x) ,\: (" ")
	i+: 1
  ]
  outfile 0: out
}

dumpmatcsv:{[mat; outfile]
	if[`disregard _in mat[0]
	  mat: (-1) _' mat
	]
	othernums: 1 _ !#mat[;0]
	mat[0]: (`num), mat[0]
  out: ()
  / first: *!table
  numofelements: #mat[;0]
  i: 1
  while[i < numofelements
	list: mat[i]
	list: list[0 1], `WP, `S, `T, list[2 3], list[ 3 4 5]
	x: formstring'list
	out,: , (-1) _ ,/x ,\: ("|")
	i+: 1
  ]
  outfile 0: out
}

/ APPLICATION SPECIFIC

/ given attnames, attvalues, genpairs. Fill the exper table.
genexper:{[attpairs]
  while[0 < #attpairs
	pair: finddisjoint[attpairs]
	attpairs: pair[1]
	addexper[pair[0]]
  ]
}


/ given a set of pairs, find a disjoint set
/ we will process each such disjoint set
finddisjoint:{[x]
  x: x[(#x) _draw -#x] / may want this in any case
  out: ()
  remaining: ()
  out,: ,*x
  current: ,/out
  x: 1 _ x
  while[0 < #x
	cands: *x
	flag: hasintersect[cands; current]
	if[flag; remaining,: ,*x]
	if[~flag
		out,: ,*x
		current,: *x
	]
	x: 1 _ x
  ]
  :(out; remaining)
}

permute:{[list]
  if[0 = #list; :list]
  :list[(#list) _draw -#list]
}

/ Given the pairs, generate the rows for the experiment by seeing
/ what is left to be done and then doing it.
/ Keep track of what can be collapsed.
addexper:{[pairs]
 origpairs: pairs
 x: differ[!#allatts; ,/pairs]
 pairs,: (_ (#x) % 2; 2) # x
 valsleftall: stilltodo'pairs / gives list of lists of values left
 maxcount: |/ #:'valsleftall
 if[0 < maxcount
  j: 0
  while[j < #valsleftall
	pair: pairs[j]
	valsleft: valsleftall[j]
	if[maxcount > #valsleft
		rest: (maxcount - #valsleft) # ,(`X;`X)
		valsleft,: rest
	]
	if[0 < #valsleft
	 vals:: valsleft[;0]
	 outmat[pair[0]],: vals
	 vals:: valsleft[;1]
	 outmat[pair[1]],: vals
	]
	j+: 1
  ]
 ]
}


/ collect attributes being compared
collect:{[indexpairs]
  out: ()
  i: 0
  while[i < #indexpairs
	pair: indexpairs[i]
	out,: ("["),($attnames[pair[0]]), (", "), ($attnames[pair[1]]), ("]")
	i+: 1
  ]
  :` $ out
}
  

/ Given a pair, see what is left to be done and what more I need to do
/ The pair are indexes into the attribute names.
/ Find out all that has to be done. Take away what's already done.
/ Try to fit in some places that haven't been done and report back
/ what's left.
stilltodo:{[pair]
  valuestodo: ,/attvalues[pair[0]] ,/:\: attvalues[pair[1]]
  valsdone: outmat[pair[0]] ,' outmat[pair[1]]
  valsleft: differ[valuestodo; ?valsdone]
  if[~ firsttimeflag; valsleft: permute[valsleft]]
  if[0 < #valsleft
	/ try to compress based on pairs of values that are free
	jj: & valsdone ~\: (`X;`X)
	/ change routine
	minnum: (#jj) & (#valsleft)
	if[0 < minnum
	 jj@: !minnum
	 valsdone[jj;0]: valsleft[!minnum; 0]
	 outmat[pair[0]]:: valsdone[;0]
	 valsdone[jj;1]: valsleft[!minnum; 1]
	 outmat[pair[1]]:: valsdone[;1]
	 valsleft: minnum _ valsleft
	]
 	/ end of change routine
	/ look for cases where just one has X and can use
      if[0 < #valsleft
	www: bagintersectbothindexes[valsdone;  `X ,/: valsleft[;1]]
	/ change routine
	minnum: (#www)
	if[0 < minnum
	 jj: www[;0]
	 xjj: www[;1]
	 valsdone[jj;0]: valsleft[xjj; 0]
	 vals:: valsdone[;0]
	 / vals:: valsdone[;1]
	 outmat[pair[0]]:: vals
	 valsleft: valsleft[differ[!#valsleft ; xjj]]
	]
      ]
      if[0 < #valsleft
	www: bagintersectbothindexes[valsdone;  valsleft[;0] ,\: `X]
	/ change routine
	minnum: (#www)
	if[0 < minnum
	 jj: www[;0]
	 xjj: www[;1]
	 valsdone[jj;1]: valsleft[xjj; 1]
	 vals:: valsdone[;1]
	 outmat[pair[1]]:: vals
	 valsleft: valsleft[differ[!#valsleft ; xjj]]
	]
      ]
  ]
  :valsleft
}

/ After processing exper, replace the X values with actual values.
/ IF WE WANT A BETTER DISTRIBUTION, then WE CAN GET IT HERE
/ DONE, March 2002
replaceanyvalue:{[]
  i: 0
  while[i < #attnames
	name: attnames[i]
	/ newvalue: attvalues[i] / replace anyvalue with that
	xx: attvalues[i]
	newvalue: * (xx[1 _draw #xx])
		/ JUST CHOOSE ONE AT RANDOM
  	string: ("exper."),($name)
	vals:: . string
	jj: & vals = `X
	vals[jj]:: newvalue
  	string: ("exper."),($name), (":: vals")
	. string
	i+: 1
  ]
}

/ experexpand has, for each attribute, all possible pairwise
/ combinations of the other attributes.
/ But it does this by looking at the exper table where the values
/ were complete even for the pivot.
/ UNUSED
genexperexpandold:{[]
  myatts: attnames
  j: 0
  while[j < #myatts
     myattvalues: attvalues[j]
     k: 0
     while[k < #myattvalues
  	string: ("experexpand."),($myatts[j])
	string,: (",: countexper # `\"")
	string,: ($myattvalues[k])
	string,: ("\"")
	. string
	i: 0
	while[i < #myatts
	  if[~ j = i
  	   string: ("experexpand."),($myatts[i]), (",: ")
  	   string,: ("exper."),($myatts[i])
	   . string
	  ]
	  i+: 1
	] / end of i (attributes other than j)
	k+: 1
     ] / end of k (values of current attribute)
     j+: 1
  ]
  / construct table for partitioning
  string: ("= ")
  j: 0
  while[j < #myatts
  	string,: ("experexpand."),($myatts[j]), (",' ")
	j+: 1
  ]
  string: (-3 _ string)
  part: . string
  indexes:: *:'part
  / shrink to uniques
  j: 0
  while[j < #myatts
  	string: ("experexpand."),($myatts[j]), ("@: indexes ")
	. string
	j+: 1
  ]
}


/ experexpand has, for each attribute, all possible pairwise
/ combinations of the other attributes.
/ This uses attnamesexpand and attvaluesexpand
/ If empty then stop.
/ countexper is the number of rows in exper
genexperexpand:{[]
 if[0 = #attnamesexpand; :()]
 myatts: attnamesexpand
 otheratts: differ[attnames; myatts]
 if[1 = #myatts
  j: 0
  while[j < #myatts
     myattvalues: attvaluesexpand[j]
     k: 0
     while[k < #myattvalues
  	string: ("experexpand."),($myatts[j])
	string,: (",: countexper # `\"")
	string,: ($myattvalues[k])
	string,: ("\"")
	. string
	i: 0
	while[i < #otheratts
  	   string: ("experexpand."),($otheratts[i]), (",: ")
  	   string,: ("exper."),($otheratts[i])
	   . string
	  i+: 1
	] / end of i 
	k+: 1
     ] / end of k (values of current attribute)
     j+: 1
  ]
 ] / single attribute pivot
 / multi-attribute pivot
 if[1 < #myatts
   crossvals: crosssets[attvaluesexpand] / does a cross-product of those
   j: 0
   while[j < #myatts
     myattvalues: crossvals[;j]
     k: 0
     / handle each expanded value
     while[k < #myattvalues
  	string: ("experexpand."),($myatts[j])
	string,: (",: countexper # `\"")
	string,: ($myattvalues[k])
	string,: ("\"")
	. string
	k+: 1
     ]
     j+: 1
   ] / end of k values of one attribute
   / As of now, all the expanded attributes are in experexpand.
   / Next put in the unexpanded attributes.
   k: 0
   while[k < #myattvalues
	i: 0
	while[i < #otheratts
  	   string: ("experexpand."),($otheratts[i]), (",: ")
  	   string,: ("exper."),($otheratts[i])
	   . string
	  i+: 1
	] / end of i 
	k+: 1
    ] / end of k (number of cross-products)
 ] / multi-attribute pivot
}

/ find cross product of a list of value lists
/ It may have one element or many
crosssets:{[listofvallists]
  listofvallists,: ()
  out: listofvallists[0]
  if[1 = #listofvallists; :,:' listofvallists[0]]
  i: 1
  while[i < #listofvallists
        out: ,/ out ,/:\: listofvallists[i]
        i+: 1
  ]
  :out
}

/ INPUT GENERIC DATA


/ parses a field based on spaces
getfieldssymb :{[symbline]
  line: $symbline
  i: line = " "
  j1: &i
  j2: &~i
  line @:j2
  size: #j1
  :` $ (0,(j1 - !size)) _ line
}

/ get rid of blanks at either end of the string
delendblanks:{[string]
  if[0 = #string; :""]
  if[string ~ ,"" ; :""]
  i: & ~ string = " "
  if[(#string) = (#i); :string]
  if[0 = (#i); :""]
  string: (- ((#string)  - (1 + *|i))) _ string
  :(*i) _ string
}
  
spit:{[line] (-2) _ ,/ ($line) ,\: (", ")}


/ Handles one line of input at a time 
/ in format 
/ attname value(s)
/ or
/ pivot: attname(s)
processline:{[line]
 emptyorblank:{[line] (0 = #delendblanks[line])}
 emptyflag: emptyorblank[line]
 if[~emptyflag
  myline: getfieldssymb[delendblanks[line]]
  pivotflag: ($myline[0]) _sm "*pivot*"
  if[~pivotflag
	attnames,: myline[0]
	x: 1 _ myline
	x: x[(#x) _draw -#x]
	attvalues,: ,x
  ]
  if[pivotflag
	pivot,: 1 _ myline
	if[~ subset[pivot; attnames]
		x: (" The pivots ("), (spit[pivot]),(") are not a subset of ")
		x,: (" the attributes ("), (spit[attnames]), (").")
		` 0: ,x
		. "\\\\"
	]
  ]
 ]
}

even:{[num] (num % 2) = _ (num % 2)}


/ origmat has format att and values
/ the pivot attributes have willbepivot and X throughout.
/ These must be changed to the values in the cross-product.
/ attindexes are those indexes of attributes to be given the pivotvals.
insertpivots:{[origmat; pivotvals; attindexes]
  firsts: *:' origmat
  out: ,:' firsts
  otherindexes: differ[(!#origmat); attindexes] / non-pivots
  startmat: 1 _' origmat / withoutlabels
  mycross: crosssets[pivotvals] 
  i: 0
  while[i < #mycross
	thiscross: mycross[i]
	x: startmat
	j: 0
	while[j < #attindexes
		k: attindexes[j]
		out[k],: ((#x[k]) ) # thiscross[j]
		j+: 1
	]
	j: 0
 	while[j < #otherindexes
		k: otherindexes[j]
		out[k],: x[k]
		j+: 1
	]
	i+: 1
  ]
  :out
}
  

/ DATA

attnames: ()
attvalues: () / will be list of lists
pivot: ()
file: _i[0] / should be a file with attributes and possibly a pivot

x: processline' 0: file

/ Number of attnames must be even. We add a dummy called disregard if needed. 
if[~ even[#attnames]
  attnames,: `Light
  attvalues,: ,,`Y
]

origattvalues: attvalues
if[0 < #pivot
  ii: intersectleftindexes[attnames; pivot]
  pivotvals: (attvalues[ii]) ,()
  j: 0
  while[j < #ii
  	attvalues[ii[j]]: ,`willbepivot
	j+: 1
  ]
]

allatts: attnames


/ EXECUTION
if[0 < #pivot
  ii: intersectleftindexes[allatts; pivot]
  j: 0
  while[j < #ii
	allatts[ii[j]]: ` $ ("*"), ($allatts[ii[j]]), ("*")
	j+: 1
  ]
]
outmat:  ,:'(allatts)  / then we will fill in line as needed
	/ at the end we will do dumpmatcsv to get the csv file
bestmat: outmat
bestcountexper: 999999999
vals: () / used as a global
indexes: () / used as a global

genpairs: ,/(!#attnames) ,/:\: (!#attnames)


genpairs@: & genpairs[;0] < genpairs[;1]

firsttimeflag: 1
genexper[genpairs]
if[0 < |/#:'stilltodo'genpairs; !-1] / some genpairs are missing
countexper: #outmat[0]
if[countexper < bestcountexper
  bestcountexper: countexper / the number to beat
  outmatbest: outmat
]
firsttimeflag: 0 / no randomness above
numiter: 100
if[1 < #_i; numiter: 0 $ _i[1]]
do[numiter
  outmat:  ,:'(allatts)  / then we will fill in line as needed
  genexper[genpairs]
  countexper: #outmat[0]
  if[countexper < bestcountexper
        bestcountexper: countexper
  	outmatbest: outmat
  ]
]

outmat: outmatbest
if[0 < |/#:'stilltodo'genpairs; !-1] / some genpairs are missing
outmat: +outmat
i: 0
while[i < #outmat
	jj: & outmat[i] = `X
	k: 0
	while[k < #jj / eliminate X
		outmat[i;jj[k]]: outmat[i-1;jj[k]]
		k+: 1
	]
	i+: 1
]

replace:{[symbol; line]
  i: & line = `willbepivot 
  line[i]: symbol
  :line
}

if[`willbepivot _in outmat[1] 
   x1: replace[`"N"]'outmat
   x2: replace[`"Y"]'1 _ outmat
   outmat: x1, x2
]
outmat@: < outmat
dumpmatcsv[(outmat); "tmpexpercsv"]
"outmatall" 5: ,outmat
. "\\sleep 1"

\\