#!/usr/bin/perl -w
#################################
# calcN50.pl
# H.J. Megens
# v. 13-12-2012
# usage: cat asm.contig | perl calcN50.pl -m 200
# asm.contig: a fasta file with contigs or scaffolds
# -m: minimum size of contigs to consider
#################################
use strict;
use warnings;
use Getopt::Std;
my %opts = ();

getopt('m', \%opts);
my $mincontiglength = $opts{m};

my $seq = '';
my $id = '';
my $length=0;
my @unsorted=();
while (<>){
        my $line = $_;
        chomp $line;
        if ($line =~ m/^>/){
                if ($id){

			$length = length $seq;
			push(@unsorted, $length);
                }

                $id = $line;
                $seq = '';
        }
        else {
                $seq = $seq . $line;
        }
}
$length = length $seq;
push(@unsorted, $length);
#print "$length\n";
my $totlength=0;
my @sorted = sort { $b <=> $a } @unsorted;
print "largest contig: ".$sorted[0]."\n";
my $totlength2=0;
for $length (@sorted){
        if ($length >$mincontiglength){
                $totlength2+=$length;
        }
}

print "Total length of contigs larger than $mincontiglength bp: $totlength2 bp\n";
$totlength=$totlength2;
$totlength2=0;
my $n10=0;
my $n50=0;
my $n90=0;
my $n75=0;
for $length (@sorted){
        $totlength2+=$length;
	if (($totlength2/$totlength > 0.1) && $n10==0){
		$n10=$length;
	}
	if (($totlength2/$totlength > 0.5) && $n50==0){
                $n50=$length;
        }
	if (($totlength2/$totlength > 0.9) && $n90==0){
                $n90=$length;
        }
	if (($totlength2/$totlength > 0.75) && $n75==0){
                $n75=$length;
        }

}
print "N10=$n10\nN50=$n50\nN75=$n75\nN90=$n90\n";
exit;

