#!/usr/bin/perl
#################################
#Author: Jack Min
#Program: countAStype.pl
# 	the program is used to parse the AS_event.gtf file to get count of each type
#
#############################

if ($#ARGV != 1)
{
	print "Usage: countAStype.pl AS.gtf AS_type_count \n";
	print "input: gtf\n";
	print "output: AS types count\n";
	exit;
}

$inFile = $ARGV[0];
$outFile = $ARGV[1];

open (INPUT, $inFile) || die ("Cannot open file $inFile !!\n");
@content = <INPUT>;
close (INPUT);

@idArray = ();
$id = '';
print "In progress ...\n";
$flag = 0;
foreach $line1 (@content)
{
	$flag = 0;
	chomp($line1);
	@tmp = split(/\t/, $line1);
	$id1 = $tmp[1];
	$id2 = $tmp[2];
	foreach $line2 (@idArray)
	{
		chomp($line2);
		@tmp2 = split(/\t/, $line2);
		$id1a = $tmp2[1];
		$id2a = $tmp2[2];

		if ((($id1 eq $id1a) && ($id2 eq $id2a)) || (($id1 eq $id2a) && ($id2 eq $id1a)))
		{
			$flag = 1;
			#print "duplicate: ".$line1."\n";
			print $num++."\n";
			last;
		}
	}
	if ($flag ==0)
	{
		push(@idArray, $line1."\n");
		push (@newArray, $line1."\n");
	}
}


#open (OUTPUT, ">$outFile") || die ("Cannot open for writing file $outFile !\n");
#print OUTPUT (@newArray);
#close OUTPUT;

#open (INPUT, $outFile) || die ("Cannot open file $outFile !!\n");
#@content2 = <INPUT>;
#close (INPUT);
foreach $line (@newArray) #(@content2)
{
		chomp($line);
		@tmp2 = split(/\t/, $line);
		$code = $tmp2[6];
		$code =~ s/[\s\n\f]//;
		if ($code eq "1-2^,0")
        {
                $type = "exon skipping"; $es++;
        }
        elsif ($code eq "1^,2^")
        {
                $type = "alternative donor site"; $ad++;
        }
        elsif ($code eq "1-,2-")
        {
                $type = "alternative acceptor site"; $aa++;
        }
        elsif ($code eq "1^2-,0")
        {
                $type = "intron retention"; $ir++;
        }
        else
        {
                $type = "complex events"; $ce++;
        }
}

#$sumF = "new_summary.events";
open(OUT, ">$outFile");
print OUT ("exon skipping"."\t".$es."\n");
print OUT ("alternative donor sites"."\t".$ad."\n");
print OUT ("alternative acceptor sites"."\t".$aa."\n");
print OUT ("intron retention"."\t".$ir."\n");
print OUT ("others (complex events)"."\t".$ce."\n");
close (OUT);

print "The work is done!\n";

exit;


