#!/usr/bin/perl -w # # Chris Dragon, 9-9-03: Added comments. I'm not an expert on these systems, # so my comments are only my best guess as to what various things are doing. # If you see a comment that makes no sense to you, it could very well be wrong. # I'll prefix my comments with CD:. # # CD: Generates an HTML summary report containing the best alignments for a # sequence. # # Usage: # casp_summary_report_html \ # --align target.top_reported_alignments.rdb \ # --target target \ # [--make_al] \ # > casp_summary_report.html # # CD: # --align specifies the RDB file containing the best alignments chosen for # display. # --target contains the sequence ID (ie. 1edn) of the target sequence. # --make_al If true, we'll generate .al files for each .a2m file in the RDB # file specified by --align. # # Update history: # Chris Dragon, 9-10-03: Copies .al files to MODEL[model num].al files. sub print_data_row($$$$$$$); sub find_al_file($$); sub make_al_file($$); sub read_rdb_header(*\@\%); { use FileHandle; STDERR->autoflush(1); use English; use File::Basename; use File::stat; use Getopt::Long; $modnum=0; # model number for .al format files $conversion_prog = dirname($PROGRAM_NAME)."/a2m-to-caspal.perl"; $make_al_alignments = 0; # Get command line options using the Getopt::Long standard perl module. GetOptions("align=s" => \$predict_aligns_rdb, "target=s" => \$TARGET, "make_al" => \$make_al_alignments); if ($make_al_alignments) { print STDERR "# Will make .al format for alignments\n"; } if (!defined($predict_aligns_rdb) || !defined($TARGET)) { &print_usage_exit; } # foo-predicted_alignments.rdb contains 5 fields #Alignment Evalue Align_Evalue FSSP SCOP #50S 10N 10N 5S 50S #We want an html page with a table which tells us for each top alignment: # - link to a2m formatted alignment # - alignment E-value # fssp rep of template (plus link) # all SCOP domains found in the template (plus link) open(PREDICT_ALIGNS, "$predict_aligns_rdb") || die "Can't open $predict_aligns_rdb for reading\n"; my @colname; # names of columns in rdb file my %col_num; # mapping name->number read_rdb_header(*PREDICT_ALIGNS{IO},@colname, %col_num); $aligncolnum = $col_num{"Alignment"}; $Ecolnum = $col_num{"Evalue"}; $AEcolnum = $col_num{"Align_Evalue"}; $FSSPcolnum = $col_num{"FSSP"}; $SCOPcolnum = $col_num{"SCOP"}; $SUIDcolnum = $col_num{"SUID"}; &begin_html_table; while ($line = ) { chomp $line; @cols = split("\t", $line); #get template name from alignment name $a2m_name=$cols[$aligncolnum]; $a2m_name =~ /^([^\/]+)\//; $templateid = $1; if ($make_al_alignments) { $alfilename = $a2m_name; $alfilename =~ s/.a2m/.al/; find_al_file($alfilename,$a2m_name); } else { $alfilename = ""; } &print_data_row($cols[$aligncolnum], $alfilename, $cols[$Ecolnum], $cols[$AEcolnum], $cols[$FSSPcolnum], $cols[$SCOPcolnum], $cols[$SUIDcolnum]); } close(PREDICT_ALIGNS); &end_html_table; } sub print_data_row($$$$$$$) { ($a2mfile, $alfile, $best_evalue, $align_evalue, $fssprep, $scop, $suid ) = @_; print "\n"; print "$a2mfile\n"; print "$alfile\n" if ($make_al_alignments); print "$best_evalue$align_evalue\n"; print ""; if(defined $fssprep) { # KNOWN BUG: FSSP NO LONGER EXISTS---need to update to newer scheme # but I was unable to download the Dali Domain Dictionary on 9 May 2005 print "$fssprep"; } print "\n"; print ""; if (defined($scop)) { my @scopdoms = split(',', $scop); my @scopsuids = split(',', $suid); for (my $i =0; $i < scalar(@scopdoms); $i++) { print "
\n" if ($i>0); print "$scopdoms[$i]"; } } print "\n"; print "\n"; } sub begin_html_table { $title = "Best Alignments for $TARGET"; print "\n\n"; print ""; print "$title\n"; print "\n\n"; print "

$title

\n"; print "
\n"; print "To select the top models, SAM generates pairwise\n"; print "alignments of the target sequence and the best-scoring\n"; print "templates. The alignments with the best E-values are\n"; print "presented here.\n"; print "

\n"; print "Warning: E-values smaller than about 0.001 may be overly optimistic.\n"; print "

\n"; $align_width = ($make_al_alignments? 30: 60); print "

\n"; #start the table and print header row print "\n"; print "\n"; print "\n"; if ($make_al_alignments) { print "\n"; } print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; } sub end_html_table() { #finish the table print "
A2M alignmentAL alignmentTemplate E-valueAlignment costFSSP repSCOP Domain(s)
\n"; #finish off the page with a citation print "
\n"; print "

\n"; print "Please cite: Karplus, K. and Karchin, R. and Barrett, C. and Tu, S. and Cline, M. and Diekhans, M. and Grate, L. and Casper, J. and Hughey, R. ``What is the value added by human intervention in protein structure prediction?''\n"; print "Proteins: Structure Function and Genetics 45(S5):86-91,2001\n"; print "\n\n"; } sub print_usage_exit() { print "Usage: casp_summary_report_html \ --align target.predicted_alignments.rdb \ --best target.best_scores.rdb \ --target target \ [--make_al] \ > casp_summary_report.html\n"; exit(-1); } sub find_al_file($$) { my ($alfilename,$a2m_name) = @_; print STDERR "# looking for $alfilename\n"; if ( ! -e "$alfilename") { # the .al file doesn't exist, so make it make_al_file($alfilename,$a2m_name); } else { # the file exists, but is it up to date? # Need to do stat($alfilename) and stat($a2m_name) and compare # mtime s my $alstat = stat($alfilename); my $a2mstat= stat($a2m_name); if ($alstat->mtime < $a2mstat->mtime) { # oops, .al file is older, remake it make_al_file($alfilename,$a2m_name); } } } sub make_al_file($$) { my ($alfilename,$a2m_name) = @_; my ($SAM_YEAR) = '05'; print STDERR "Will make $alfilename\n"; #a2m-to-caspal.perl takes an argument file that we must produce here #-infasta in1.a2m #-outcaspal out1.casp.al #-target 1EKT:B TRANSCRIPTION STATE REGULATORY PROTEIN ABRB #-template 1ev13 Chain 3, Echovirus 1 #-author UCSC SAM-T${SAM_YEAR} server #-method method.txt (defined $author) or $author = "UCSC SAM-T${SAM_YEAR} server"; $modnum++; $alignEvalue = $cols[$Ecolnum]; $argfile = $alfilename; $argfile =~ s/[.]al$/.args/; open(ARGFILE, ">$argfile") || die "Can't open $argfile for writing.\n"; print ARGFILE "-infasta $a2m_name\n"; print ARGFILE "-outcaspal $alfilename\n"; print ARGFILE "-target $TARGET\n"; print ARGFILE "-template $templateid\n"; print ARGFILE "-author $author\n"; if (defined($method)) { print ARGFILE "-method $method\n"; } print ARGFILE "-modelnum $modnum\n"; print ARGFILE "-score $alignEvalue\n"; close(ARGFILE); system("$conversion_prog $argfile"); unlink $argfile; } # read in the header of an RDB file and store the column names in @$col_names_ref # with a reversed index in %$col_num_ref. sub read_rdb_header(*\@\%) { my ($FILE,$col_names_ref,$col_num_ref) = @_; # skip the comments at the beginning while (<$FILE> ) { last if /.+/ && !(/^\s*#/); } # read the column names chomp; @$col_names_ref = split(/\t/); # index the column names for(my $i = 0; $i; } # CHANGE LOG: # 27 Sept 2003 Kevin Karplus # Modified find_al_file and moved code to make_al_file # so that al files that were older than a2m files would be remade. # # Sun May 8 10:07:24 PDT 2005 Kevin Karplus # Eliminated reading best-scores, since all info now in the alignments rdb file