#!/usr/bin/perl -w # constraints-from-rdb [-start 1] < foo.str2.rdb > foo.constraints # # converts a secondary structure prediction to # HelixConstraint and StrandConstraint commands for undertaker # # If -start is set, then add its value minus 1 to the positions # in the rdb file. # # If -alphabet is set, use it to choose the translation table. use strict; use Getopt::Long; sub print_segment($$$$); # This conversion between alphabet and constraints needs reworking # for n_sep, n_notor2, o_sep, o_notor2: move helix positions! # The alphabet reduction is not the standard CASP one--- # only things that are very likely to be helix or strand are counted # as helix or strand. my %str2_alphabet_reduction = ( "E" => "E" # E of stride or dssp, , "A" => "E" # anti-parallel in str or str2 , "P" => "E" # parallel in str or str2 , "Z" => "E" # anti-parallel edge , "Q" => "E" # parallel edge , "Y" => "E" # anti-parallel edge, not? bonded (str2) , "H" => "H" # only H for helix---don't try to add # helix constraints for 3-10 ); my %pb_alphabet_reduction = ( "C" => "E" # C is a bit dubious , "D" => "E" # the solid strands , "E" => "E" # E is a bit dubious, perhaps too many loops , "M" => "H" # only M, as L and N are too likely to be 3-10 or turn ); my %alpha_alphabet_reduction = ( "A" => "E" , "E" => "E" , "H" => "H" ); my %notor_alphabet_reduction = ( "A" => "E" , "B" => "E" , "P" => "E" , "H" => "H" #would be better with offset of +-2 ); my %sep_alphabet_reduction = ( "A" => "E" , "K" => "E" , "L" => "E" , "M" => "E" , "N" => "E" , "P" => "E" , "Q" => "E" , "R" => "E" , "S" => "E" , "T" => "E" , "U" => "E" , "V" => "E" , "W" => "E" , "H" => "H" ); my %str4_alphabet_reduction = ( "A" => "E" , "B" => "E" , "C" => "E" , "D" => "E" , "E" => "E" , "K" => "E" , "L" => "E" , "M" => "E" , "P" => "E" , "Q" => "E" , "R" => "E" , "W" => "E" , "H" => "H" ); my %bys_alphabet_reduction = ( "E" => "E" , "Y" => "E" , "H" => "H" ); my %alphabet_map = ( "str" => \%str2_alphabet_reduction , "str2" => \%str2_alphabet_reduction , "stride_ebghtl" => \%str2_alphabet_reduction , "dssp_ebghtl" => \%str2_alphabet_reduction , "dssp-ehl2" => \%str2_alphabet_reduction , "bys" => \%bys_alphabet_reduction , "pb" => \%pb_alphabet_reduction , "str4" => \%str4_alphabet_reduction , "alpha" => \%alpha_alphabet_reduction , "o_notor" => \%notor_alphabet_reduction , "o_notor2" => \%notor_alphabet_reduction , "n_notor" => \%notor_alphabet_reduction , "n_notor2" => \%notor_alphabet_reduction , "o_sep" => \%sep_alphabet_reduction , "n_sep" => \%sep_alphabet_reduction ); my $start = 1; my $alphabet="str2"; GetOptions( "alphabet=s" => \$alphabet , "start_column=n" => \$start ); my $alphabet_reduction_ref = $alphabet_map{$alphabet}; if (! defined($alphabet_reduction_ref)) { die("ERROR: constraints-from-rdb doesn't know secondary structure alphabet $alphabet"); } # skip comments at beginning of rdb file for ($_=; /^\s*#/; $_=) {} # now get the column names chomp; my @col_names = split(/\t/); # names for the colummns my $pos_col_num=0; my $aa_col_num=1; my @add_to; # which type to add to for(my $i = 0; $i<=$#col_names; $i++) { my $col = uc($col_names[$i]); my $recode = $$alphabet_reduction_ref{$col}; if ($col eq "AA") { $aa_col_num = $i; $add_to[$i] = undef; # not a weight } elsif ($col eq "POS") { $pos_col_num = $i; $add_to[$i] = undef; # not a weight } else { $add_to[$i] = "L"; } next if ! defined($recode); $add_to[$i] = $recode; } # print STDERR "DEBUG: columns= " . join(" ", @col_names) . "\n"; # print STDERR "DEBUG: add_to= " . join(" ", @add_to) . "\n"; my $segment_type = "L"; # am I in helix, loop, or strand my $segment_start = undef; # where did segment start my $segment_end = undef; # where did segment end my $segment_weight = 999999; # minimum weight in segment so far ; # skip the format line # read the data lines while() { my @col_values = split(/\t/); # values of columns my $res = $col_values[$aa_col_num] . ($col_values[$pos_col_num]+$start-1); # add up total weights for classifying residue my %total_weights= ("H"=>0, "E"=>0, "L"=>0); for(my $j = 0; $j<=$#col_values; $j++) { my $col_type = $add_to[$j]; next if !defined($col_type); $total_weights{$col_type} += $col_values[$j]; } # determine residue type (if strongly predicted) my $residue_type= $total_weights{"H"} > 0.6? "H" :( $total_weights{"E"} > 0.6? "E" : "L"); my $residue_weight = $total_weights{$residue_type}; # print STDERR "DEBUG: $res $residue_type $residue_weight\n"; if ($residue_type ne $segment_type) { # starting a new segment. Print old one if appropriate print_segment($segment_type, $segment_weight, $segment_start, $segment_end); $segment_start = $res; $segment_type = $residue_type; $segment_weight= $residue_weight; } $segment_end = $res; $segment_weight = $residue_weight if $residue_weight< $segment_weight; } print_segment($segment_type, $segment_weight, $segment_start, $segment_end); exit (0); sub print_segment($$$$) { my ($segment_type, $segment_weight, $segment_start, $segment_end) = @_; return if $segment_type eq "L"; return if $segment_start eq $segment_end; print "HelixConstraint " if ($segment_type eq "H"); print "StrandConstraint" if ($segment_type eq "E"); print "\t$segment_start\t$segment_end\t$segment_weight\n"; }