/ This is a little file that does relational algebra.
/ selection returns a single list of rowids.
/ join returns two lists of rowids (here called indexes) where
/ the ith element of each list constitutes a match.
/ projection takes a list of indexes, a table, and attributes
/ and projects those indexes on the table.
/ The basic idea is that using rowids (indexes) permits
/ us to compose selects and joins. Projections are
/ the final operation.
/
/ For example, suppose we want to achieve
/ create T(F,R,A,N) as 
/ select distinct R1.U1, R1.U2, R1.U3, R2.Y1
/ from R1, R2
/ where R1.A = 5
/ and R1.B < R2.D
/ and R1.C = R2.E 
/ a program could work this way.
/ ind1: select["R1.A = 5"]
/ twolist: join[R1; `B `C; ind1; "<="; R2; `D `E; !#R2.D] 
    / uses index from first select for  the join
    / !#R2.D are all indexes for R2
/ T[`F `R `A]: project[R1; `U1 `U2 `U3; twolist[0]]
/ T[`N]: project[R2; `Y1; twolist[1]]
    / twolist[0] are the matching rowids in R1
    / twolist[1] are the matching rowids in R2
/ T[`F `R `A `N]: dupremove[T]

/
/ We have striven for the highest performance possible.
/ We could improve performance further by using key information.
/ especially on selections.
/ selection: & "(T.A = 5) | (T.B < 5)"
/ projection: "T[A B C; indexes]"
/ join[tab1; atts1; index1; relops; tab2; atts2; index2]
/ z: join[S1; `B `A;  0 2 3; "=>"; S2; `B `A; 0 1 2]
/ There are different ways to do a join.
/ Let's consider a join on a single variable.

/ finds the non-duplicated union of two lists
union:{[x;y]; ?x,y}

/finds intersection of two lists with duplicate removal
intersect:{[x;y]; ?x[& x _in\: y]}

/ finds intersection of two lists with duplicate removal
/ Much faster.
intersect: {[x;y]
 i:  x ?/: y
 j: & i < #x
 :?y[j]
}

/finds set difference of two lists x - y
differ:{[x;y]; ?x[&~ x _in\: y]}

/ A faster difference
differ:{[x;y]
  i: y ?/: x
  j: & i = #y
  :?x[j]
}

/ Single attribute equi-join where T1.att1 is a key
/ Give indexes of T1 and T2
fkjoin:{[T1; att1; index1; T2; att2; index2]
  size1: #index1
  i: T1[att1;index1] ?/: T2[att2;index2] 
	/ cardinality is as big as T2.att2[index2]
	/ the jth element says where in T1.att1[index1]
	/ there is a match for the jth element of T2.att2[index2].
	/ if i[j] = size1 then there is no match.
  goods: & i < size1
  :((index1[i[goods]]) ; index2[goods])
}


/ This means: find index pairs in tab1 and tab2
/ such that tab1.attsin1 matches via equality tab2.attsin2.
/ Here we  assume the join is on one attribute.
/ Result is a pair of lists whose ith members match.
eqjoinone:{[tab1; attsin1; index1;  tab2; attsin2; index2]
	size: # ? tab1[(*attsin1); index1]
	if[size = #index1
	   :fkjoin[tab1;(*attsin1);index1; tab2;(*attsin2);index2]
	]
	size: # ? tab2[(*attsin2); index2]
	if[size = #index2
	   :| fkjoin[tab2;(*attsin2);index2; tab1;(*attsin1);index1]
	]
	v1: ? tab1[(*attsin1); index1]
	part1: = tab1[(*attsin1); index1]
	indexval1: !#v1
	i: v1 ?/: tab2[(*attsin2); index2]
	goods: & i < #v1
	x: ((index1[part1[i[goods]]]); index2[goods])
		/ x has lists of indexes of table 1 with
		/ corresponding indexes of table 2
	counts: #:' x[0]
	x[0]: ,/ x[0]
	x[1]: ,/ counts #' x[1] / flatten out
	:x
}

/ This means: find index pairs in tab1 and tab2
/ such that tab1.attsin1 match via equality tab2.attsin2.
/ So, we can use a find each right.
/ Result is a pair of lists whose ith elements match.
eqjoin:{[tab1; attsin1; index1;  tab2; attsin2; index2]
	v1: ? +tab1[attsin1; index1]
	part1: = +tab1[attsin1; index1]
	indexval1: !#v1
	i: v1 ?/: +tab2[(attsin2); index2]
	goods: & i < #v1
	x: ((index1[part1[i[goods]]]); index2[goods])
		/ x has lists of indexes of table 1 with
		/ corresponding indexes of table 2
	counts: #:' x[0]
	x[0]: ,/ x[0]
	x[1]: ,/ counts #' x[1] / flatten out
	:x
}

/ cross-product takes two index sets and gives the cross-product.
/ This is for the completely general case
crossproduct:{[index1; index2]
  :+,/ index1 ,/:\: index2
}

/ Given a set of pairs of indexes, find out if remaining
/ inequality relops are satisfied.
joinselect:{[tab1; atts1; relops; pairs; tab2; atts2]
  table1:: tab1
  table2:: tab2
  pairs1:: pairs[0]
  pairs2:: pairs[1]
  s1: ($relops) ,'  ("table2.") ,/: ($atts2) ,\: ("[pairs2]) & ")
  s: ("& "), ,/ (" (") ,/: ("table1.") ,/: ($atts1) ,' ("[pairs1]") ,/: s1
  s: (-2) _ s / get rid of final &
  i: . s
  :(pairs1[i]; pairs2[i])
}

/ This does a general conjunctive join, using equality 
/ and keys wherever possible.
join:{[tab1; attsin1; index1; relops; tab2; attsin2; index2]
  if[~ (#attsin1) = (#attsin2); :()]
   relops,: ()
   attsin1,: ()
   attsin2,: ()
   e: & relops = ("=")
   if[(#e) = (#relops) / all equals
	if[1 = #relops
	  :eqjoinone[tab1; attsin1; index1;  tab2; attsin2; index2]
	]
	:eqjoin[tab1; attsin1; index1;  tab2; attsin2; index2]
   ]
   if[0 < #e
	x: :[1 = #e
	  eqjoinone[tab1;attsin1[e]; index1;tab2;attsin2[e];index2]
	  eqjoin[tab1;attsin1[e]; index1;tab2;attsin2[e];index2]]
	ne: & ~ relops = ("=")
	:joinselect[tab1;attsin1[ne];relops[ne];x;tab2;attsin2[ne]]
   ]
   x: crossproduct[index1; index2]
   :joinselect[tab1;attsin1;relops;x;tab2;attsin2]
}

/ project without duplicate removal
/ If you have indexes from many tables,
/ then you can do a ,' to bring them together.
project:{[tab;  atts; indexes]
  :tab[atts; indexes]
}

/ remove duplicates from a table
dupremove:{[tab]
  :  + ? + tab[!tab]
}


/ find indexes indexes of tab, atts, based on conditions
/ We do nothing special.
select:{[selectstring]
 :. ("& "), selectstring
}

size: 10000

/ This is a good timing test.
/ Thanks to it, we know that eqjoinone is more efficient
/ than eqjoin because it makes much less use of disk
/ so this optimization is well worth it.
build:{[size]
T1.A: size _draw -10*size

T2.B: (2*size) _draw size

index1: !#T1.A
index2: !#T2.B

now: _t
x: eqjoin[T1; ,`A; index1; T2; ,`B; index2]
 out: ("Result size is "), ($#x[0])
 out,: (". This took "), ($ (_t) - now), (" seconds.")
:out
}

S1.A: 10 20 30 10 20 30 30
/ S1.A: 10 20 30 40 50 60 70
S1.C: S1.A
S1.B: `a `b `c `d `e `f `g
S2.A: 10 10 10 10 30 30 30 30 10 30
S2.C: S2.A
S2.B: `m `n `o `p `q `r `s `t `u `v

x: eqjoin[S1; ,`A; !#S1.A; S2; ,`A; !#S2.A]
x1: join[S1; ,`A; !#S1.A; ,"="; S2; ,`A; !#S2.A]
project[S1; `A `B; x[0]]
project[S2; `A `B; x[1]]


y: eqjoin[S1; `A `C; 0 2 3; S2; `A `C; 1 2 3 4]
y1: join[S1; `A `C; 0 2 3; "=="; S2; `A `C; 1 2 3 4]
project[S1; `A `C `B; y[0]]
project[S2; `A `C `B; y[1]]

T1.A: 100 200 300 400
T1.B: `aa `bb `cc `dd
T2.A: 200 300 200 100
T2.B: `aa `bb `cc `dd
z: join[T1; `B `A;  0 2 3; "=>"; T2; `B `A; 0 1 2]
project[T1; `A `B; z[0]]
project[T2; `A `B; z[1]]

R1.A: 1 2 5 1 2 5
R1.B: 2 2 2 2 2 2
R1.C: `a `b `c `d `e `f
R1.U1: 10 20 30 40 50 60
R1.U2: 10 * R1.U1
R1.U3: 10 * R1.U2
R2.D: !12
R2.E: R1.C, R1.C
R2.Y1: -R1.U1, R1.U1
ind1: select["R1.A = 5"]
twolist: join[R1; `B `C; ind1; "<="; R2; `D `E; !#R2.D] 
    / uses index from first select for  the join
    / !#R2.D are all indexes for R2
T[`F `R `A]: project[R1; `U1 `U2 `U3; twolist[0]]
T[`N]: project[R2; `Y1; twolist[1]]
    / twolist[0] are the matching rowids in R1
    / twolist[1] are the matching rowids in R2
T[`F `R `A `N]: dupremove[T]
/ the end
`show $ `R1
`show $ `R2
`show $ `T