#!/usr/bin/env perl

$lagandir = $ENV{LAGAN_DIR};

# Status
#   -- extension problems

if (@ARGV < 2) {
    print ("usage:\n rechaos seqfile1 seqfile2 [-chaos \"chaos flags\"] [-recurse \"(wl1,nd1,co1),(wl2,nd2,co2),...\"] [-out \"filename\"] [-lazy] [-maskedonly] [-debug] [-translate] [-fastreject]\n");
    exit(1);
}

$recurfl = "(12,0,30,0)x,(13,1,30,0)x,(3,0,30,0)xt,(8,1,30,0)x,(7,1,30,0)x";
#$recurfl = "(12,0,10,200)x,(12,0,10,150)x,(3,0,10,150)xt,(8,0,10,150)x,(12,0,25,0),(13,1,30,0),(3,0,30,0)t,(8,1,30,0),(7,1,25,0)";
$minbox = 0;
$seq1 = $ARGV[0];
$seq2 = $ARGV[1];
$tofile = 0;
$masker = 1;
$lazycheck = 0;
$fastreject = 0;
$frminlevel = 0;
$frmaxlevel = 3;
@frseq1 = (50000, 20000, 7000, 5000);
@frseq2 = (50000, 20000, 7000, 5000);

$dounmasked = 1;
$filename;
$debug = 0;
$anchparams = "";
$translate = 0;

sub max {
    my ($a, $b) = @_;
    return $a if ($a > $b);
    return $b;    
}

sub min {
    my ($a, $b) = @_;
    return $a if ($a < $b);
    return $b;    
}

$i = 2;
while ($i < @ARGV) {
    if ($ARGV[$i] =~ /-chaos/) {
	$chaosfl = $chaosfl." ".$ARGV[++$i];
    }
    elsif ($ARGV[$i] =~ /-ext/) {
	$chaosfl = $chaosfl." -ext";
    }
    elsif ($ARGV[$i] =~ /-recurse/) {
	$recurfl = $ARGV[++$i];
    }
    elsif ($ARGV[$i] =~ /-lazy/) {
	$lazycheck = 1;
    }
    elsif ($ARGV[$i] =~ /-nomask/) {
	$masker = 0;
    }
    elsif ($ARGV[$i] =~ /-out/) {
	$tofile = 1;
	$filename = $ARGV[++$i];
    }
    elsif ($ARGV[$i] =~ /-maskedonly/) {
	$dounmasked = 0;
    }
    elsif ($ARGV[$i] =~ /-fastreject/) {
	$fastreject = 1;
    }
    elsif ($ARGV[$i] =~ /-debug/) {
	$debug = 1;
    }
    elsif ($ARGV[$i] =~ /-translate/) {
	$translate = 1;
    }
    elsif ($ARGV[$i] =~ /-gap/){
	$anchparams = $anchparams." -gap ".$ARGV[++$i];
	$anchparams = $anchparams." ".$ARGV[++$i];
    }
    else { 
	die ("Unrecognized option $ARGV[$i]\n");
    }
    $i++;
}

if ($lazycheck) {
    if (-f $filename) {
	die ("Output file already exists, lazy mode exit!\n");
    }
}

$extracase1 = 0;
$extracase2 = 0;
if (-e "$seq1.masked") { $extra1 = $seq1; $seq1 = "$seq1.masked"; $extracase1 = 1; }
if (-e "$seq2.masked") { $extra2 = $seq2; $seq2 = "$seq2.masked"; $extracase2 = 1; }
if (! $dounmasked){ $extracase1 = 0; $extracase2 = 0; }

open(SEQ1, "$seq1");
open(SEQ2, "$seq2");

$line1 = <SEQ1>;
while ($line1 = <SEQ1>) {
    chomp $line1;
    $seq1len += length($line1);
}

$line2 = <SEQ2>;
while ($line2 = <SEQ2>) {
    chomp $line2;
    $seq2len += length($line2);
}

$b1[0] = $b2[0] = 1;
$e1[0] = $seq1len;
$e2[0] = $seq2len;

$cumanchs = 0;
$app_str = "";

$i = 0;
while (1) {
    $goodanchs = 0;
    $totalanchs = 0;
    
    $stillmore = ($recurfl =~ /\((\d+)\,(\d+)\,(\d+)\,(\d+)\)(\w*)(.*)/);
    if (! $stillmore) {
	if ($extracase1 || $extracase2) {
	    if ($extracase1) { $seq1 = $extra1; $extracase1 = 0; }
	    if ($extracase2) { $seq2 = $extra2; $extracase2 = 0; }
	}
	else {
	    last;
	}
    }
    else {
	$wordlen = $1;
	$degeneracy = $2;
	$cutoff = $3;
	$extcutoff = $4;
	$tail = $5;
	
	$extraparams = "";
	$extraparams = $extraparams." -t" if ((index ($tail, "t") != -1) && ($translate));
#	$extraparams = $extraparams." -ext $extcutoff" if (index ($tail, "x") != -1);
    }

    $recurfl = $6;
    next if ((index ($tail, "t") != -1) && (!$translate));

    print STDERR "Using $seq1 $seq2 ($wordlen, $degeneracy, $cutoff, $extcutoff) $tail\n";

    open (PFILE, ">$$.anchs.pairs");
    for ($j = 0; $j < @b1; $j++) {
	print PFILE "-s1 $b1[$j] $e1[$j] -s2 $b2[$j] $e2[$j]\n";
    }
    close (PFILE);
#    print STDERR "PAIRS hits\n";
#    print STDERR `cat $$.anchs.pairs`;
#    print STDERR "-----------------\n";
#    print STDERR `cat $$.anchs.pairs`;
#    print STDERR "-----------------\n";
    `$lagandir/chaos $seq1 $seq2 -wl $wordlen -nd $degeneracy -co $cutoff $extraparams $chaosfl -pairs $$.anchs.pairs > $$.anchtemp`;
    if ($?) { exit(1); }
    open (OFILE, ">>$$.anchtemp");
    print OFILE $app_str;
    close (OFILE);

#    `wc $$.anchtemp` =~ /(\d+)/x;
#    $totalanchs = $totalanchs + $1;	
#    print STDERR "CHAOS hits\n";
#    print STDERR `cat $$.anchtemp`;
    `$lagandir/anchors $$.anchtemp $anchparams | sort -n +1 > $$.anchs.sorted`;
    if ($?) { exit(1); }
#    `wc $$.anchs` =~ /(\d+)/x;
#    print STDERR "ANCHS hits\n";
#    print STDERR `cat $$.anchs.sorted`;
    $goodanchs = $goodanchs + $1;

    if ($?) { exit(1); }
    open(SFILE, "$$.anchs.sorted");
    @anchors = <SFILE>;
    close(SFILE);

    @b1new = 0;
    @b2new = 0;
    @e1new = 0;
    @e2new = 0;
    @scores = 0;
    $app_str = "";

    $b1new[0] = $b2new[0] = 1;
    $area = 0;
    $maxarea = 0;
    $lastpass =  ($#anchors == -1);

    $seq1ending = $seq1len;
    $seq2ending = $seq2len;

#   print STDERR "s1 $seq1len s2 $seq2len ($frseq1[$i] $frseq2[$i])\n";
    if ($fastreject && $i >= $frminlevel && $i <= $frmaxlevel && $seq1len > $frseq1[$i] && $seq2len > $frseq2[$i]) {
	if ($lastpass) { exit(3); }
	$anchors[0] =~ /\((\d+) (\d+)\)=\((\d+) (\d+)\)/;
	if ($1 * $3 > $frseq1[$i] * $frseq2[$i]) {
	    $b1new[0] = $1 - $frseq1[$i];
	    $b2new[0] = $3 - $frseq2[$i];
	    if ($b1new[0]<2) { $b1new[0] = 2; } 
	    if ($b2new[0]<2) { $b2new[0] = 2; } 
	    $bb1 = $b1new[0]-1;
	    $bb2 = $b2new[0]-1;
	    $app_str = $app_str."seq1 1 $bb1; seq2 1 $bb2; score = 25.42 (+)\n";
	}
	$anchors[$#anchors] =~ /\((\d+) (\d+)\)=\((\d+) (\d+)\)/;
	if (($seq1len-$2) * ($seq2len-$4) > $frseq1 * $frseq2) {
	    $seq1ending = $2 + $frseq1[$i];
	    $seq2ending = $4 + $frseq2[$i];
	    if ($seq1ending > ($seq1len-1)) { $seq1ending = $seq1len-1; } 
	    if ($seq2ending > ($seq2len-1)) { $seq2ending = $seq2len-1; } 
	    $ee1 = $seq1ending+1;
	    $ee2 = $seq2ending+1;
	    $app_str = $app_str."seq1 $ee1 $seq1len; seq2 $ee2 $seq2len; score = 25.42 (+)\n";
	}
    }
    for ($k = 0, $m = 0; $lastpass || $m < @anchors; $m++, $k++) {

	if ($lastpass){
	    $e1new[$k] = $seq1ending;
	    $e2new[$k] = $seq2ending;
	}
	else {
	    $anchors[$m] =~ /\((\d+) (\d+)\)=\((\d+) (\d+)\)/;
	    $e1new[$k] = $1 - 1;
	    $e2new[$k] = $3 - 1;
	}
	
	$boxarea = ($e1new[$k] - $b1new[$k] + 1) * ($e2new[$k] - $b2new[$k] + 1);

	$area = $area + $boxarea;
	$maxarea = $boxarea if ($boxarea > $maxarea);
	if (!$lastpass) {
	    if ($boxarea < $minbox || $e1new[$k] <= $b1new[$k] || $e2new[$k] <= $b2new[$k]) {
		$k--;
		$lastpass = ($m >= $#anchors);
		next;
	    }
	}
	if (!$lastpass){
	    $anchors[$m] =~ /\((\d+) (\d+)\)=\((\d+) (\d+)\) (\d+)/;
	    $b1new[$k + 1] = $2 + 1;
	    $b2new[$k + 1] = $4 + 1;
	    $scores[$k + 1] = $5;
	}
	else {
	    last;
	}

	$lastpass = ($m >= $#anchors);
    }

    for ($k = 0; $k < @e1new - 1; $k++) {
	$ee1 = $e1new[$k]+1;
	$bb1 = $b1new[$k+1]-1; 
	$ee2 = $e2new[$k]+1;
	$bb2 = $b2new[$k+1]-1;
	$app_str = $app_str."seq1 $ee1 $bb1; seq2 $ee2 $bb2; score = $scores[$k+1] (+)\n";
    }

    @b1 = @b1new;
    @b2 = @b2new;
    @e1 = @e1new;
    @e2 = @e2new;
    if ($debug) {
	print STDERR "Level $i Summary:\n";
	print STDERR "   Using $seq1 $seq2 ($wordlen, $degeneracy, $cutoff)\n";
	if ($totalanchs == 0) {
	    $percentage = 0;
	}
	else {
	    $percentage = $goodanchs / $totalanchs * 100.0;
	}
	print STDERR "   $goodanchs good out of $totalanchs total anchors ($percentage%)\n";
	$area = $area / 1000000;
	$maxarea = $maxarea / 1000000;
	print STDERR "   Total area left = $area (max = $maxarea)\n";
    }
    $cumanchs = $cumanchs + $goodanchs;
    $i++;
}

$res = `sort -nr +1 $$.anchs.sorted`;
if ($?) { exit(1); }

`rm $$.*`;

if($tofile) {
    open(OUTFILE, ">$filename");
    print OUTFILE "$res";
    close OUTFILE;
}
else {
    print "$res";
}

print STDERR "$cumanchs cumulative anchors\n"

