#!/usr/local/bin/perl -w #main { use FileHandle; STDERR->autoflush(1); use English; use File::Basename; use lib dirname($PROGRAM_NAME); use READ_RDB; ($n, $alignmentsfile, $hitsfile) = @ARGV; print STDERR "Looking for $n hits in $hitsfile, choosing alignments from $alignmentsfile\n"; %FSSPRepHash = (); %SCOPDomainHash = (); %rank_of_hit = (); $num_hits = 0; #get the ids of the top five hits and put in rank_of_hit open(HITSFILE, $hitsfile) || die "Error: top_reported_alignments can't open $hitsfile for reading.\n"; read_rdb_header(*HITSFILE{IO}); $idcolnum = $col_num{"Sequence_ID"}; # $lencolnum = $col_num{"Length"}; # $BEcolnum = $col_num{"Best_Evalue"}; #unused in html output # $Ecolnum = $col_num{"Evalue"}; # $FSSPcolnum = $col_num{"FSSP-rep"}; # $SCOPcolnum = $col_num{"SCOP_domain"}; # $suidcolnum = $col_num{"SCOP_suid"}; while($line = ) { chomp $line; @cols = split('\t', $line); #skip rows with no id in the line (additional domain rows) next if (!($cols[$idcolnum] =~ /\w+/)); $rank_of_hit{$cols[$idcolnum]} = ++$num_hits; } close(HITSFILE); print STDERR "read $num_hits hits\n"; open(ALIGNMENTSFILE, "<$alignmentsfile") || die "Error: top_reported_alignments can't open $alignmentsfile for reading.\n"; read_rdb_header(*ALIGNMENTSFILE{IO}); $aligncolnum = $col_num{"Alignment"}; # $Ecolnum = $col_num{"Evalue"}; $FSSPcolnum = $col_num{"FSSP-rep"}; $SCOPcolnum = $col_num{"SCOP-domain"}; while($line = ) { chomp $line; @cols = split('\t', $line); # We will print the top $n different hits. # Conditions for printing a row: # the hit is from different SCOP superfamily or # has a different FSSP representative from what # we've already seen AND # is a local, adpstyle 5 alignment to 100-30-str HMM my $align = $cols[$aligncolnum]; my $FSSP = $cols[$FSSPcolnum]; my $SCOP = $cols[$SCOPcolnum]; $align =~ /\w+-(\w+)-.*/ && ($chainID = $1); if(defined($rank_of_hit{$chainID})) { # print STDERR "fssprep:$FSSP SCOPsuperfam:$SCOP name: $align\n"; if ( !defined($FSSPRepHash{$FSSP}) && !defined($SCOPDomainHash{$SCOP}) && ($align =~ /\S+-STR-local-adpstyle5.pw.a2m.gz/)) { #put in hash of rows to be printed, where key is the # rank of the hit print STDERR "adding $chainID"; $rowsToPrint{$rank_of_hit{$chainID}} = $line; # mark the fssprep or superfam in hash if its not blank if($FSSP =~ /\S+/) { $FSSPRepHash{$FSSP} = 1; } if($SCOP =~ /\S+/) { $SCOPDomainHash{$SCOP} = 1; } } } } close(ALIGNMENTSFILE); @sorted = sort {$a <=> $b} keys %rowsToPrint; print STDERR "There are 0..$#sorted keys in rowsToPrint\n"; print "Alignment\tEvalue\tFSSP-rep\tSCOP_domain\n"; print "50S\t10N\t5S\t10S\n"; for (my $i = 0; $i<$n && $i<=$#sorted; $i++) { print "$rowsToPrint{$sorted[$i]}\n"; } }