#!/usr/bin/perl -w
use strict;
use warnings;
use Getopt::Std;
my %opts = ();

$opts{f}='infile';
$opts{b}='100000';
$opts{q}='.';
getopt('fbq', \%opts);
my $infile = $opts{f};
my $binsize=$opts{b};
my $querytype=$opts{q};
open (REP, "$infile") or die "No such file! $!\n";
my $outfile = 'out';
if (length $querytype >1){
	$outfile = $querytype;
}
open (OUT, ">$outfile".'.txt') or die "No such file! $!\n";
my $maxlength = 0;
my $maxpos = 0;
my $totmatch=0;
my $bin = $binsize;
my %bins = ();
for (my $i=0; $i<6; ++$i){
	<>;
}
while (<>){
        my $line = $_;
        chomp $line;
        my @elements = split(/ +/,$line);
        my $refpos = $elements[0];
        my $qlength = $elements[12];
	my $matchlength = $elements[6];
	$totmatch += $matchlength;
	if ($matchlength > $maxlength && ($matchlength/$qlength>0.5)){
		$maxlength = $matchlength;
		$maxpos = $refpos;
	}
        if ($refpos > $bin){
                $bins{$bin}="$refpos\t$totmatch\t$maxlength\t";
                $bin = $bin + $binsize;
                $maxlength=0;
		$totmatch=0;
		$refpos=0;
        }
}
my $repcount=0;
$bin=$binsize;
while (<REP>){
        my $line = $_;
        chomp $line;
        my @elements = split(/\t/,$line);
        my $refpos = $elements[1];
        my $type = $elements[3];
        if ($type =~ m/$querytype/){
                $repcount += 1;
        }
        if ($refpos > $bin){
		
                $bins{$bin}= $bins{$bin}."$repcount\n";
                $bin = $bin + $binsize;
                $repcount=0;
        }
}
close(REP);
foreach my $key (sort keys %bins){
	print OUT "$key\t$bins{$key}";
}
close(OUT);
# this is a very ugly solution but does not require installation of R-packages or
# Perl modules; for proper API see the Perl module Statistics::R
open (RIN,'>Rin.R') or die "can not open file for writing $!\n";
print RIN 'rep<-read.table("'.$outfile .'.txt", header=F)'."\n";
print RIN 'cor.test(rep$V4[rep$V3>'.$binsize/2 .'],(rep$V5[rep$V3>'.$binsize/2 .']/('. $binsize.'/rep$V3[rep$V3>'. $binsize/2 .'])))'."\n";
print RIN 'png("'.$outfile.'png",width=500, height=500)'."\n";
print RIN 'scatter.smooth(rep$V4[rep$V3>'.$binsize/2 .'],(rep$V5[rep$V3>'.$binsize/2 .']/('. $binsize.'/rep$V3[rep$V3>'. $binsize/2 .'])),col = "dark red", xlab="maximum local scaffold size",ylab="local corrected density of repeats")'."\n";
print RIN 'dev.off()'."\n";
`R CMD BATCH Rin.R`;
exit;

