/ Code author: Dennis Shasha, 2006
/ Joint work with Tsong-Li Wang and Kaizhong Zhang
/ schedmatch.k
/ Usage k schedmatch [-v] [SCHEDNAME]
/
/ where SCHEDNAME is replaced by one name in the file schedin
/ and -v gives many possible solutions.
/ If file schedin is not present, the program declares an error.
/ 
/ This file computes the dependency distance between two schedules.
/ Suppose you have two schedules S1 and S2.
/ Each is represented by a set of dependency edges
/ with redundant transitive edges removed.
/ (Note however that if there are edges from task A to task B and
/ from B to C,
/ there may still be an edge from A to C if that edge presents
/ a separate constraint from what is implied by the path through B.)
/ The software assumes that each schedule by itself is acyclic
/ (i.e. if there is a dependency from A to B either directly
/ or transitively, then there is no dependency from B to A).
/ 
/ The problem is to reconcile the two schedules.
/ Our definition of reconcilation is to find
/ as few edges as possible to remove from either S1 or S2
/ to render the two schedules consistent.
/ Informally, consistency means that if one schedule insists on
/ ordering task A before B then the other schedule does not
/ insist on ordering B before A.
/ Formally, consistency means that there is some total order
/ of the tasks that both can agree on.
/ An equivalent formal definition is that two schedules
/ are consistent if combining them introduces no cycles.
/
/ The basics of the algorithm are to combine the schedules and then
/ to remove as few permitted edges as possible.
/ If there are only two schedules, then any edge  shared by both
/ cannot be removed.
/ In the case of more than two schedules,
/ an edge is permitted unless all schedules have it.


/ 
/ Representation: 

/ bob|A|B
/ bob|B|C
/ bob|C|D
/ bob|B|E
/ bob|E|D
/ bob|A|D
/ bob|F|D
/ alice|A|B
/ alice|B|C
/ alice|D|C
/ alice|B|E
/ alice|D|E
/ alice|G|D


/ BASICS

spit:{[list] (-1) _ ,/ ($list) ,\: (" ") }
spitbar:{[list] (-1) _ ,/ ($list) ,\: ("|") }
spitunder:{[list] (-1) _ ,/ ($list) ,\: ("_") }


/finds intersection of two lists
/ fastest of all
intersect: {[x;y]
  x,: ()
  y,: ()
  if[(#x) < (#y)
    i: x ?/: y
    j: & i < #x
    :x[?i[j]]
  ]
  i: y ?/: x
  j: & i < #y
  :y[?i[j]]
}

/finds intersection of two lists
/ fastest of all
hasintersect: {[x;y]
  x,: ()
  y,: ()
  i: x ?/: y
  : (&/i) < #x
}

/ x is a proper subset of y
propersubset:{[x;y]
  x,: ()
  y,: ()
  if[~ (#x) < (#y); :0] / must be smaller
  :(#x) = (#intersect[x;y])
}

subset:{[x;y] (#x) = (#intersect[x;y])}

subset:{[x;y] 
  j: y ?/: x
   / if any js are as big as #y, then there is a member of x not in y
  :0 = |/ j = #y
}

/ proportion of x that y covers
approxsubset:{[x;y]
  j: y ?/: x
  / any equal to #y are in x but not in y
  :(# & j < (#y)) % (#x)
}

/ finds indexes in x and y that intersect
/ If x and y are both sets, then the results will be of the same length
/ fastest of all
intersectindexes: {[x;y]
  i: x ?/: y / where each y hits
  j: & i < #x / those ys that hit
  :(i[j];j)
}

/finds indexes in x that intersect with y
intersectleftindexes: {[x;y]
  i: x ?/: y / where each y hits
  j: & i < #x / those ys that hit
  :i[j]
}

/finds intersection of two lists
/ and returns index pairs of matches. Assumes no duplicates
/ in either list
intersectbothindexes: {[x;y]
  x,: ()
  y,: ()
  i: x ?/: y
  pairs: (i ,' (!#y))
  k: & pairs[;0] < #x
  :pairs[k]
}


/ this is a set intersection so we remove duplicates
multiintersect:{[lists]
 size: #lists
 if[2 > size; :lists]
 first: lists[0],()
 jj:  first ?/: (,/ ?:' lists[1+ !(size-1)]) / find indexes in first
 x: @[(1+#first) # 0; jj; + ; 1]
 x: (-1) _ x / delete missing entry
 kk: & x = size - 1
 :first[kk]
}

/ A faster difference
differ:{[x;y]
  x,: ()
  y,: ()
  i: y ?/: x
  j: & i = #y
  :?x[j]
}

/ Count the difference
differcount:{[x;y]
  i: y ?/: x
  :# & i = #y
}

avg:{(+/ x) % # x}

var:{avg[_sqr x] - _sqr avg[x]}
std:{_sqrt var[x]}
cov:{avg[x * y] - avg[x] * avg[y]}
corr:{ (cov[x;y])%((std[x]) * (std[y]))}
/ delay based search

/ END OF BASICS


/ a list is numeric if each member is either empty, a period
/ or a digit
isnumeric:{[list]
  s: ,/list
  nums: "1234567890"
  x: nums ?/: s
  y: (#nums) > |/x / if greater then everything in s met its match
  if[1 = y; :2] / 2 means integer
  nums: "1234567890."
  x: nums ?/: s
  :(#nums) > |/x / if greater then everything in s met its match
	/ 1 means float and 0 means not a number
}

/ parses a field based on vertical bars
getfields:{[line]
  i: line = "|"
  j1: &i
  j2: &~i
  line @:j2
  size: #j1
  :(0,(j1 - !size)) _ line
}

/ get rid of blanks at either end of the string
delendblanks:{[string]
  if[0 = #string; :""]
  if[string ~ ,"" ; :""]
  i: & ~ string = " "
  if[(#string) = (#i); :string]
  if[0 = (#i); :""]
  string: (- ((#string)  - (1 + *|i))) _ string
  :(*i) _ string
}
  

/ Handles one line of input at a time according to table schema above.
/ A little more space efficient than the previous version.
processline:{[line]
 emptyorblank:{[line] (0 = #delendblanks[line])}
 emptyflag: emptyorblank[line]
 if[~emptyflag
  justempty:: 0
  if[line[0] = "#"
    newline: delendblanks'getfields[1 _ line]
    if[(1 < #newline) 
	globaltable:: *newline
	alltables,: ` $ globaltable
	globalatt:: 1 _ newline
	globalvals:: () / initialize
    ] / newline
  ] / table declaration
  if[~ line[0] = "#"
	newline: delendblanks'getfields[line]
	if[(1 < #newline) | (0 < #newline[0]) 
		globalvals,: ,newline
	]
  ] / end of test on data line
 ] / end of test on line is non-empty
 if[emptyflag & (0 = justempty)
	justempty:: 1
	/ if[ ((#globalatt) > 1) | (~ (#globalatt) = (^globalvals)[0])
		/ globalvals:: + globalvals / take transpose
	/ ]
	if[~ (#globalatt) = (#globalvals[0]); !-1]
	numericflag:()
	hh: 0
	while[hh < #globalatt
		numericflag,: isnumeric[globalvals[;hh]] 
		hh+: 1
	]
	inindex:: 0
	while[inindex < #globalatt
	  / table.att[2] :: ` $ vals[3]
		string1: globaltable, (".")
		string1,: globalatt[inindex]
		teststring: ("0 = #"), string1
		/ For big sizes, uncomment following two statements
		/ User will get an error message, but it will work.
		/ x: @[.: ; teststring; :]
		/ currentlyempty: *x / error means empty
		/ For big sizes, comment following
		currentlyempty: 1
		/ if empty, then assign, else append
		string2: :[currentlyempty; (":: "); (",: ")] 
		string3: :[0 = numericflag[inindex]
			"` $ "
			:[1 = numericflag[inindex]; "0.0 $ "; "0 $ "]]
		string3,: "globalvals[;inindex]"
		. string1, string2, string3
		if[1 = numericflag[inindex]
			globalindexes:: & 0N = . string1
			string3: string1, ("[globalindexes] :: `")
			. string3
		]
		inindex+: 1
	]
 ]
}


inputfromfile:{[filename]
  a: 0: filename
  firstline: "#schedtab|sched|source|target"
  a: (,firstline),a
  globalvals:: ()
  oldglobalvals:: () / should not be necessary
  justempty:: 1 / as if we've just seen an empty line. should process table
  a,: ,,""
  processline'a
}

/ ARGUMENT PROCESSING

verbosemode: 0
permitted: `either / can remove edges from either file.
okflag: 1
processargs:{[args]
  i: 0
  while[(i < #args) & okflag
	okflag:: 0
	addextra: 0
	if[args[i] ~ "-v"
		verbosemode:: 1
		okflag:: 1
	]
	if[~ args[i;0] ~ "-"
		permitted:: ` $ args[i]
		okflag:: permitted _in schedtab.sched
	]
	i+: 1 + addextra
	if[0 = okflag
		` 0: ,"format is k schedmatch [-v] [SCHEDNAME]"
		` 0: ,"Remember that schedin is the name of the input file"
		` 0: ,"SCHEDNAME should be a name in schedin"
		` 0: ,"Also each schedule should be acyclic."
	]
  ]
}

/ APPLICATION SPECIFIC

/ This algorithm takes as input two graphs
/ A and B.
/ It takes a union of the nodes and for each edge e,
/ it marks it as either one schedule or both.
/ First schedule if it comes from graph A; 
/ Second schedule if it comes from graph B;
/ both if in both graphs.
/ It also gives a variable permitted which says which schedule's edges 
/ you can remove.
/ The algorithm is to construct a topological sort and remove
/ edges as needed, choosing roots with a minimum number of incoming
/ edges as the next candidate.
/ There may be a choice in which case one chooses randomly.


delblanks:{[x] 
 ii: & ~ x = " "
 :x[ii] 
}


/ This function combines two graphs.
/ It takes the union of the nodes and marks the edges as coming
/ from a single schedule or both.
/ We assume that edges are represented with symbols not numbers
/ and that these symbols are good for both graphs.
/ Not really ready for three or more schedules
combine:{[NA; EA; NB; EB; allscheds]
 N: ? NA, NB
 x: intersect[EA; EB]
 marks: (# x) # `both
 y: differ[EA; EB]
 marks,: (# y) # allscheds[0]
 z: differ[EB; EA]
 marks,: (# z) # allscheds[1]
 E: x,y,z
 :(N; E; marks) }

/ This function combines several graphs.
/ It takes the union of the nodes and marks the edges as coming
/ all schedules ("both").
/ or only "some"
/ The only requirement will be that edges marked "both" can't be deleted.
multicombine:{[allnodes; alledges; allscheds]
 N: ?,/ allnodes
 E: multiintersect[alledges]
 marks: (#E) # `both
 x: ,/alledges
 x: differ[x;E]
 E,: x
 marks,: (#x) # `some
 :(N; E; marks) }

link:{[pairs; label] pairs,\: label}

multicombine:{[allnodes; alledges; allscheds]
 N: ?,/ allnodes
 E: multiintersect[alledges]
 marks: (#E) # `both
 alledges: differ[;E]'alledges
 edgelabel: ,/ link'[alledges;allscheds]
 part: = edgelabel[;0] ,' edgelabel[;1]
 marks,: ` $ spitunder'edgelabel[part;2]
 E,: ? edgelabel[;0] ,' edgelabel[;1]
 :(N; E; marks) }


deleteedge:{[E; marks; e] 
  i: E ? e
  newmarks: (marks[!i]),((i+1) _ marks)
  newE: (E[!i]),((i+1) _ E)
  :(newE; newmarks)}
   
  
/ delete a root n that has no incoming edges (must be checked beforehand)
/ and then delete all edges that have n as a source.
/ Delete their marks as well.
deleteroot:{[N; E; marks; n]
  N: differ[N; ,n]
  if[0 = #E; :(N; E; marks)] / nothing to delete
  ii: & E[;0] = n
  left: differ[!# E; ii]
  E@: left
  marks@: left
  :(N; E; marks)
  } 

/ find candidate root such that causing it
/ to have no edges entails deleting a permitted type of edge
/ or no edge at all.
/ If a permitted type of edge, then add that edge to the deletededges.
findcand:{[N; E; marks; permitted; allscheds]
  todelete: ()
  res: ()
  trips: evalcand[E;marks;permitted; allscheds]'N
	/ returns number of deletions necessary and which edges are involved.
	/ large number means not possible.
  ii: & (*:' trips) = 0
  / trips: ((0;());(1; ,`e`g);(0;()))
  / ii: ,0
  if[0 < # ii
	j: *1 _draw #ii / choosing randomly
	i: ii[j]
  	res: (N[i];trips[i;1]; trips[i;2])
  ]; 
  if[0 = # ii
     minincount: &/ trips[;0]
     if[minincount > # E; !-1] / bad news
     ii: & trips[;0] < minincount+2 / a little suboptimal but good
     j: *1 _draw # ii / choosing randomly
     i: ii[j] 
     res: (N[i]; trips[i;1]; trips[i;2])
  ]
  :res
  }

/ determine whether a node n is a good candidate for being the next root.
/ if so, return the edges that would need to be deleted
/ If we are doing an intersect of several schedules, then permitted 
/ is automatically set to "either".
evalcand:{[E; marks; permitted; allscheds; n]
  if[0 = # E; :(0; (); ())]
  todelete: ()
  ii: & E[;1] = n
  if[0 = # ii; :(0; todelete; todelete)]
  mymarks: marks[ii]
  myedges: E[ii]
  / either for permitted means you can delete edges from any schedule
  / but no edge labeled both
  countboth: # & mymarks = `both
  if[0 < countboth; :(1 + # E; todelete; todelete)]; / can't be done
	/ return a large edge count because we are not permitted to 
	/ delete edges marked both
  if[permitted = `either; :(# ii; myedges; mymarks)]
  uniqs: allscheds
  countA: # & mymarks = uniqs[0]
  if[(0 < countA) & (permitted = uniqs[1]); :(1 + # E; todelete; todelete)]
	/ if there are any uniqs[0] edges and you have permission
	/ to delete uniqs[1] edges only, then you may not do it
  countB: # & mymarks = uniqs[1]
  if[(0 < countB) & (permitted = uniqs[0]); :(1 + # E; todelete; todelete)]
  :(# ii; myedges; mymarks)
  }

/ find the optimal removal of edges
findopt:{[N; E; marks; permitted; allscheds]
 orignodes: N
 origedges: E
 origmarks: marks
 deletededges: ()
 nodeorder: ()
 out: ()
 while[0 < # N
   trip: findcand[N; E; marks; permitted; allscheds]
   out,: trip[0]
   nodeorder,: trip[0]
   deletededges,: trip[1],'trip[2]
   trip: deleteroot[N; E; marks; trip[0]]
   N: trip[0]
   E: trip[1]
   marks: trip[2]
 ]
 deletededges@: <deletededges
 if[0 < #deletededges
   x:(("Consistent ordering: "), delblanks spit nodeorder 
	("Deleted edges: "),(-1) _ ,/ (spit'deletededges) ,\: ("|"))
 ]
 if[0 = #deletededges
   x:(("Consistent ordering: "), delblanks spit nodeorder 
	("Deleted edges: none"))
 ]
 y: x
 all: (origedges[;0]),'(origedges[;1]),'origmarks
 z: differ[all; deletededges]
 ii: & z[;2] = `both
 z[ii;2]: ` $ spitunder allscheds
 z: (z[;2]),'(z[;0]),'(z[;1])
 y,: spitbar'z
 y,: ,"                  "
 y,: ,"                  "
 outall,: ,y
 :x
 }


/ EXECUTION

filename: "schedin"
inputfromfile[filename]

processargs[_i]

nodes:()
edges:()
part: = schedtab.sched
allscheds: ?schedtab.sched
if[2 < #allscheds
  permitted: `either / can remove edges that are not shared by everyone
]
i: 0
while[i < #allscheds
	nodes,: ,?(schedtab.source[part[i]]),(schedtab.target[part[i]])
 	edges,: ,(schedtab.source[part[i]],'schedtab.target[part[i]])
	i+: 1
]

NA: nodes[0]
EA: edges[0]
NB: nodes[1]
EB: edges[1]
if[2 = #allscheds
  trip: combine[NA; EA; NB; EB; allscheds]
]
if[2 < #allscheds
  trip: multicombine[nodes;edges;allscheds]
]
N: trip[0]
E: trip[1]
marks: trip[2]

outall: () / collect data for schedout
out: ()
do[1000; out,: ,findopt[N; E; marks; permitted; allscheds] ]
countvertbar:{[line] (# & line = "|") + (# & line = "_")}
edgecount: countvertbar'out[;1]
mincount: &/edgecount
ii: & edgecount = mincount
out@: ii
outall@: ii

if[verbosemode = 0
 ` 0: ,"Non-verbose mode gives just one solution. Add -v to commandline for verbose."
 ` 0: ,"Combined graph can be found in schedout"
 ` 0: *out
 "schedout" 0: *outall
]
if[verbosemode
 ` 0: ,"Verbose output"
 ` 0: ,"Combined graphs can be found in schedout"
 part: = out[;1]
 first: out[*:'part]
 "schedout" 0: ,/outall[*:'part]
 ` 0: ,/first
]

\\