#!/usr/local/bin/perl -w #Usage: casp_summary_report_html foo-casp_alignments.rdb foo-predicted_alignments.rdb foo-combined-scores_rdb workdir target baseurl > casp_summary_report.html { use FileHandle; STDERR->autoflush(1); use English; use File::Basename; use lib dirname($PROGRAM_NAME); use READ_RDB; if ($#ARGV < 3 ) { &print_usage_exit; } ($predict_aligns_rdb, $combined_scores_rdb, $WORKDIR, $TARGET, $BASEURL, $warning) = @ARGV; # the two rdb files are as follows # foo-predicted_alignments.rdb #Alignment Evalue FSSP-rep #70S 10N 5S #1qjvA-T0100-fssp-global.pw.a2m.gz 1.87e-103 1qjvA #T0100-1qjvB-local.pw.a2m.gz 2.47e-76 1qjvA #T0100-1qjvA-local.pw.a2m.gz 2.47e-76 1qjvA #T0100-1qjvB-vit.pw.a2m.gz 2.47e-76 1qjvA #foo.combined-scores.rdb #Sequence_ID Length Evalue FSSP-rep SCOP_domain SCOP_suid #5S 5N 12N 8S 12S 12N #1qjvA 342 4.10e-72 1qjvA b.80.1.5 28035 #1qjvB 342 4.10e-72 1qjvA b.80.1.5 28036 #1qjvA 343 2.177e-67 1qjvA b.80.1.5 28035 #1qjvA 342 1.56e-53 1qjvA b.80.1.5 28035 #1qjvA 342 1.42e-36 1qjvA b.80.1.5 28035 #We want an html page with a table which tells us for each top alignment: # - link to a2m formatted alignment # - link to al formatted alignment # - alignment E-value # fssp rep of template (plus link) # all SCOP domains found in the template (plus link) #strategy is to build # a hash that we can use to look-up fssp reps # and domains for our templates from foo.combined-scores.rdb # then iterate over the hits in foo-predicted_alignments.rdb # supplement the information in this file with what we get # from the hash and print our summary results %combined_scores = (); open(COMBINED_SCORES, "$combined_scores_rdb") || die "Can't open $combined_scores_rdb for reading\n"; read_rdb_header(*COMBINED_SCORES{IO}); $idcolnum = $col_num{"Sequence_ID"}; # $lencolnum = $col_num{"Length"}; # $BEcolnum = $col_num{"Best_Evalue"}; #unused in html output $Ecolnum = $col_num{"Evalue"}; # $FSSPcolnum = $col_num{"FSSP-rep"}; $SCOPcolnum = $col_num{"SCOP_domain"}; $suidcolnum = $col_num{"SCOP_suid"}; #store SCOP domain and SCOP suid in a data structure #to be accessed by template name while($line = ) { chomp($line); @cols = split("\t", $line); $SCOP = $cols[$SCOPcolnum]; $SCOP = "" if ! defined($SCOP); $combined_scores{$cols[$idcolnum]}{$SCOP} = $cols[$suidcolnum]; } close(COMBINED_SCORES); &begin_html_table; open(PREDICT_ALIGNS, "$predict_aligns_rdb") || die "Can't open $predict_aligns_rdb for reading\n"; read_rdb_header(*PREDICT_ALIGNS{IO}); $aligncolnum = $col_num{"Alignment"}; $Ecolnum = $col_num{"Evalue"}; $FSSPcolnum = $col_num{"FSSP-rep"}; while ($line = ) { chomp $line; @cols = split("\t", $line); #get template name from alignment name if ($cols[$aligncolnum] =~ /^$TARGET/) { $cols[$aligncolnum] =~ /^\w+-(\w+)-.*/; $templateid = $1; } else { $cols[$aligncolnum] =~ /^(\w+)-.*/; $templateid = $1; } $alfilename = $cols[$aligncolnum]; $alfilename =~ s/.a2m.gz/.al/; #check to see if $alfilename was built #report a blank in the table if it was not built if ( ! -e "$WORKDIR/$templateid/$alfilename") { $alfilename = ""; } #build an array of all fssp reps and their suids for this template undef @scopdomains; for $dom ( keys %{ $combined_scores{$templateid} }) { if (defined ($combined_scores{$templateid}{$dom})) { push @scopdomains, "$dom:$combined_scores{$templateid}{$dom}"; } } &print_data_row($templateid, $cols[$aligncolnum], $alfilename, $cols[$Ecolnum], $cols[$FSSPcolnum], \@scopdomains); } close(PREDICT_ALIGNS); &end_html_table; } sub print_data_row { ($template, $a2mfile, $alfile, $Eval, $fssprep, $scopinforef ) = @_; print "\n"; print "$a2mfile\n"; print "$alfile\n"; print "$Eval\n"; print ""; if(defined $fssprep) { print "$fssprep"; } print "\n"; print ""; $scopdoms = shift(@$scopinforef); if(defined $scopdoms) { #scopdoms string has domainname:suid form @dominfo = split(":", $scopdoms); print "$dominfo[0]"; } print "\n"; print "\n"; #print remaining scop domains while ((scalar @$scopinforef) > 0) { print "\n"; print "\n"; print ""; $scopdoms = shift(@$scopinforef); if(defined $scopdoms) { #scopdoms string has domainname:suid form @dominfo = split(":", $scopdoms); print "$dominfo[0]"; } print "\n"; print "\n"; } } sub begin_html_table { $title = "Best Alignments for $TARGET"; print "\n\n"; print "$title\n"; print "\n\n"; print " >\n"; print "

$title

\n"; print "
\n"; if($warning) { print "To select the top models, SAM_T02 generates pairwise\n"; print "alignments of the target sequence and the best-scoring\n"; print "templates. The alignments with the best E-values are\n"; print "presented here. If there are several alignments to\n"; print "templates that share a SCOP superfamily or FSSP representative,\n"; print "SAM_T02 only reports the best one.\n"; print "

\n"; print "Warning: E-values of the top models may be overly optimistic.\n"; print "

\n"; } print "

\n"; #start the table and print header row print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; } sub end_html_table { #finish the table print "
A2M alignmentAL alignmentE-valueFSSP repSCOP Domain(s)
\n"; #finish off the page with a citation print "
\n"; print "

\n"; print "Please cite: Karplus, K. and Karchin, R. and Barrett, C. and Tu, S. and Cline, M. and Diekhans, M. and Grate, L. and Casper, J. and Hughey, R. ``What is the value added by human intervention in protein structure prediction?''\n"; print "Proteins: Structure Function and Genetics 45(S5):86-91,2001\n"; print "\n\n"; } sub print_usage_exit { print "Usage: casp_summary_report_html foo-casp_alignments.rdb foo-predicted_alignments.rdb foo-combined-scores_rdb workdir target baseurl > casp_summary_report.html\n"; exit(-1); }