#RDBParse package RDBParse; require Exporter; use FileHandle; STDERR->autoflush(1); use English; use File::Basename; use lib dirname($PROGRAM_NAME); use ParseCompare; @ISA = qw(Exporter); @EXPORT = qw( RDBParseFiles ); sub RDBParseFiles (@) { $compare_location = "/projects/compbio/lib/2nd-compare/"; @rdb_files = @_; foreach $RDBFILE (@rdb_files) { open RDBFILE or die "ERROR: Couldn't open RDB file $RDBFILE\n"; # NOTE: This part is ugly - we read in each compare file, even though # the main program has to do the same thing for mutual info. $RDBFILE =~ /\.(\w*)-?\w*\.rdb$/; # get alphabet from .alph(-abc).rdb $alphabet = $1; push @alphabets, $alphabet; @compare = ParseCompareFile ($compare_location . "t2k." . $alphabet . "-dssp_ehl2.compare"); $alph_weights{$alphabet} = $compare[1]; %joint_prob = %{$compare[2]}; $comments .= "# Comments from $RDBFILE\n"; $comments .= "# ============================================\n"; # look for and save comments while($line = ) { chomp($line); last if (!($line =~ /^#/)); $comments .= $line . "\n"; } # split into an array, all but the first two elements are alphabet letters # use these letters as indicies into the hash table @letters = split("\t", $line); # scan and discard the next line (length information) $line = ; # create a new RDB line from each line in the file $line_num = 0; while($line = ) { chomp($line); if ($line =~ /^#/) { $comments .= $line . "\n"; next; } @line = split ("\t", $line); $line_num++; $E_prob = $H_prob = $L_prob = 0; for ($i = 2; $i <= $#line; $i++) { $prob = $line[$i]; $E_prob += $prob * ${$joint_prob{$letters[$i]}}[0]; $H_prob += $prob * ${$joint_prob{$letters[$i]}}[1]; $L_prob += $prob * ${$joint_prob{$letters[$i]}}[2]; } $total_prob = $E_prob + $H_prob + $L_prob; # Renormalize to remove errors $E_prob /= $total_prob; $H_prob /= $total_prob; $L_prob /= $total_prob; @{ $output[$line_num]{$alphabet} } = ($line[0], $line[1], $E_prob, $H_prob, $L_prob); } close (RDBFILE); $comments .= "# ============================================\n"; } return ($line_num, $comments, \%alph_weights, \@output); }