#!/usr/bin/perl -w # constraints-from-rdb [-start 1] < foo.str2.rdb > foo.constraints # # converts a secondary structure prediction to # HelixConstraint and StrandConstraint commands for undertaker # # If -start is set, then add its value minus 1 to the positions # in the rdb file. use strict; use Getopt::Long; sub print_segment($$$$); # The alphabet reduction is not the standard CASP one--- # only things that are very likely to be helix or strand are counted # as helix or strand. my %alphabet_reduction = ( "E" => "E" # E of stride or dssp, , "A" => "E" # anti-parallel in str or str2 , "P" => "E" # parallel in str or str2 , "Z" => "E" # anti-parallel edge , "Q" => "E" # parallel edge , "Y" => "E" # anti-parallel edge, not? bonded (str2) , "H" => "H" # only H for helix---don't try to add # helix constraints for 3-10 , "AA" => "AA" # amino acid , "POS"=> "POS" # position ); my $start = 1; GetOptions( "start_column=n" => \$start ); # skip comments at beginning of rdb file for ($_=; /^\s*#/; $_=) {} # now get the column names chomp; my @col_names = split(/\t/); # names for the colummns my $pos_col_num=0; my $aa_col_num=1; my @add_to; # which type to add to for(my $i = 0; $i<=$#col_names; $i++) { my $col = uc($col_names[$i]); my $recode = $alphabet_reduction{$col}; $add_to[$i] = "L"; next if ! defined($recode); if ($recode eq "AA") { $aa_col_num = $i; $add_to[$i] = undef; # not a weight } elsif ($col eq "POS") { $pos_col_num = $i; $add_to[$i] = undef; # not a weight } else { $add_to[$i] = $recode; } } # print STDERR "DEBUG: columns= " . join(" ", @col_names) . "\n"; # print STDERR "DEBUG: add_to= " . join(" ", @add_to) . "\n"; my $segment_type = "L"; # am I in helix, loop, or strand my $segment_start = undef; # where did segment start my $segment_end = undef; # where did segment end my $segment_weight = 999999; # minimum weight in segment so far ; # skip the format line # read the data lines while() { my @col_values = split(/\t/); # values of columns my $res = $col_values[$aa_col_num] . ($col_values[$pos_col_num]+$start-1); # add up total weights for classifying residue my %total_weights= ("H"=>0, "E"=>0, "L"=>0); for(my $i = 0; $i<=$#col_values; $i++) { my $col_type = $add_to[$i]; next if !defined($col_type); $total_weights{$col_type} += $col_values[$i]; } # determine residue type (if strongly predicted) my $residue_type= $total_weights{"H"} > 0.6? "H" :( $total_weights{"E"} > 0.6? "E" : "L"); my $residue_weight = $total_weights{$residue_type}; # print STDERR "DEBUG: $res $residue_type $residue_weight\n"; if ($residue_type ne $segment_type) { # starting a new segment. Print old one if appropriate print_segment($segment_type, $segment_weight, $segment_start, $segment_end); $segment_start = $res; $segment_type = $residue_type; $segment_weight= $residue_weight; } $segment_end = $res; $segment_weight = $residue_weight if $residue_weight< $segment_weight; } print_segment($segment_type, $segment_weight, $segment_start, $segment_end); exit (0); sub print_segment($$$$) { my ($segment_type, $segment_weight, $segment_start, $segment_end) = @_; return if $segment_type eq "L"; return if $segment_start eq $segment_end; print "HelixConstraint " if ($segment_type eq "H"); print "StrandConstraint" if ($segment_type eq "E"); print "\t$segment_start\t$segment_end\t$segment_weight\n"; }