#!/usr/bin/perl
# RDBCombine
# Requires: RDBParse.pm
#================================================================
# Jonathan Casper and Jenny Draper
# jcasper@soe.ucsc.edu, learithe@soe.ucsc.edu
#
# INPUT: Several RDB filenames
# OUTPUT: The combined EHL-structure predition of those files,
#    along with a CASP file containing the same information.
# PRECONDITIONS: This script assumes that compare files exist
#    for each of the RDB alphabets used, and that their locations
#    are stored in RDBParse.pm.
#
# RDBCombine takes several RDB file names on the command line
# and combines their secondary structure predictions into a
# single EHL prediction.  The translation (from the source
# alphabets to EHL) is done using compare tables in
# /projects/compbio/lib/2nd-compare/
# This location is stored in RDBParse.pm, and additional
# compare file locations may be added there.
#
# The weighting of each RDB file for the EHL predictions
# is proportional to their mutual information with the
# EHL alphabet, as taken from those same compare files.
#
# The resulting RDB file is written to standard output.
# Additionally, this script also creates a CASP file that
# contains the most probable structure (E, H, or L) for each
# residue.  The title of this file is TARGETID.ss, where
# TARGETID is the id in the name of the first RDB file.
#
# Last modified 6/30/02

use FileHandle;
STDERR->autoflush(1);
use English;
use File::Basename;
use lib dirname($PROGRAM_NAME);
use RDBParse;

&process_command_line();


@RDB_out = RDBParseFiles(@rdb_files);
$RDB_length = $RDB_out[0];
$RDBcomments = $RDB_out[1];
%alph_weights = %{$RDB_out[2]};
@RDB_results = @{$RDB_out[3]};

@alphabets = keys %alph_weights;

foreach $alphabet (@alphabets)
{
  $total_weight += $alph_weights{$alphabet}; #$PC_out[1];
}

$comments = "# This file is the result of combining several RDB files, specifically\n";
foreach $rdbfile (@rdb_files)
{
  $rdbfile =~ /\.(\w*)-?\w*\.rdb$/;   # get alphabet from .alph(-abc).rdb
  $alphabet = $1;
  $comments .= ("# $rdbfile  (weight $alph_weights{$alphabet})\n");
}
$comments .= "# These files were combined by translating their predictions into EHL\n";
$comments .= "# predictions with tables generated by compare-real, and then combining\n";
$comments .= "# those predictions with weights proportional to their mutual information\n";
$comments .= "# with the EHL alphabet.  The comments from the individual files follow.\n#\n";

$comments .= $RDBcomments;

print "$comments";
print "Pos\tAA\tE\tH\tC\n";
print "10N\t1S\t5N\t5N\t5N\n";

for ($i = 1; $i <= $RDB_length; $i++)
{
  @EHL_prob = (0,0,0);

  foreach $alphabet (@alphabets)
  {
    $EHL_prob[0] += $alph_weights{$alphabet}/$total_weight *
      ${ $RDB_results[$i]{$alphabet} }[2];
    $EHL_prob[1] += $alph_weights{$alphabet}/$total_weight *
      ${ $RDB_results[$i]{$alphabet} }[3];
    $EHL_prob[2] += $alph_weights{$alphabet}/$total_weight *
      ${ $RDB_results[$i]{$alphabet} }[4];
  }

  $pos = ${ $RDB_results[$i]{$alphabets[0]} }[0];
  $AA = ${ $RDB_results[$i]{$alphabets[0]} }[1];

  printf ("%d\t%s\t%.4f\t%.4f\t%.4f\n", 
		  $pos, $AA, $EHL_prob[0], $EHL_prob[1], $EHL_prob[2]);

}


sub process_command_line () 
{ 
  $PROGRAM_NAME = $0;
  # defaults
  undef @rdb_files;

  # Get list of rdb files to combine
  for(my $i=0; $i <= $#ARGV; $i++)
  {
    $_ = $ARGV[$i];
    if (/-a/)
    {
      $author = $ARGV[++$i];
      next;
    }
    push @rdb_files,  $ARGV[$i];
  }
  &print_usage_exit if($#rdb_files < 1) ;

}


sub print_usage_exit ()
{
  print "Usage: RDBCombine T0139.t2k.alpha.rdb T0139.t2k.dssp-ebghstl.rdb \\\n";
  print "   T0139.t2k.str.rdb . . . > T0139.t2k.combined.rdb\n";
  exit(-1);
}