#!/usr/bin/perl # RDBCombine # Requires: RDBParse.pm #================================================================ # Jonathan Casper and Jenny Draper # jcasper@soe.ucsc.edu, learithe@soe.ucsc.edu # # INPUT: Several RDB filenames # OUTPUT: The combined EHL-structure predition of those files, # along with a CASP file containing the same information. # PRECONDITIONS: This script assumes that compare files exist # for each of the RDB alphabets used, and that their locations # are stored in RDBParse.pm. # # RDBCombine takes several RDB file names on the command line # and combines their secondary structure predictions into a # single EHL prediction. The translation (from the source # alphabets to EHL) is done using compare tables in # /projects/compbio/lib/2nd-compare/ # This location is stored in RDBParse.pm, and additional # compare file locations may be added there. # # The weighting of each RDB file for the EHL predictions # is proportional to their mutual information with the # EHL alphabet, as taken from those same compare files. # # The resulting RDB file is written to standard output. # Additionally, this script also creates a CASP file that # contains the most probable structure (E, H, or L) for each # residue. The title of this file is TARGETID.ss, where # TARGETID is the id in the name of the first RDB file. # # Last modified 6/30/02 use FileHandle; STDERR->autoflush(1); use English; use File::Basename; use lib dirname($PROGRAM_NAME); use RDBParse; &process_command_line(); @RDB_out = RDBParseFiles(@rdb_files); $RDB_length = $RDB_out[0]; $RDBcomments = $RDB_out[1]; %alph_weights = %{$RDB_out[2]}; @RDB_results = @{$RDB_out[3]}; @alphabets = keys %alph_weights; foreach $alphabet (@alphabets) { $total_weight += $alph_weights{$alphabet}; #$PC_out[1]; } $comments = "# This file is the result of combining several RDB files, specifically\n"; foreach $rdbfile (@rdb_files) { $rdbfile =~ /\.(\w*)-?\w*\.rdb$/; # get alphabet from .alph(-abc).rdb $alphabet = $1; $comments .= ("# $rdbfile (weight $alph_weights{$alphabet})\n"); } $comments .= "# These files were combined by translating their predictions into EHL\n"; $comments .= "# predictions with tables generated by compare-real, and then combining\n"; $comments .= "# those predictions with weights proportional to their mutual information\n"; $comments .= "# with the EHL alphabet. The comments from the individual files follow.\n#\n"; $comments .= $RDBcomments; print "$comments"; print "Pos\tAA\tE\tH\tC\n"; print "10N\t1S\t5N\t5N\t5N\n"; for ($i = 1; $i <= $RDB_length; $i++) { @EHL_prob = (0,0,0); foreach $alphabet (@alphabets) { $EHL_prob[0] += $alph_weights{$alphabet}/$total_weight * ${ $RDB_results[$i]{$alphabet} }[2]; $EHL_prob[1] += $alph_weights{$alphabet}/$total_weight * ${ $RDB_results[$i]{$alphabet} }[3]; $EHL_prob[2] += $alph_weights{$alphabet}/$total_weight * ${ $RDB_results[$i]{$alphabet} }[4]; } $pos = ${ $RDB_results[$i]{$alphabets[0]} }[0]; $AA = ${ $RDB_results[$i]{$alphabets[0]} }[1]; printf ("%d\t%s\t%.4f\t%.4f\t%.4f\n", $pos, $AA, $EHL_prob[0], $EHL_prob[1], $EHL_prob[2]); } sub process_command_line () { $PROGRAM_NAME = $0; # defaults undef @rdb_files; # Get list of rdb files to combine for(my $i=0; $i <= $#ARGV; $i++) { $_ = $ARGV[$i]; if (/-a/) { $author = $ARGV[++$i]; next; } push @rdb_files, $ARGV[$i]; } &print_usage_exit if($#rdb_files < 1) ; } sub print_usage_exit () { print "Usage: RDBCombine T0139.t2k.alpha.rdb T0139.t2k.dssp-ebghstl.rdb \\\n"; print " T0139.t2k.str.rdb . . . > T0139.t2k.combined.rdb\n"; exit(-1); }