#!/usr/bin/perl -w # seq-from-rdb < foo. rdb > foo.seq # reads an output from a neural net that predicts secondary-structure, # and outputs the prediction as a fasta file { my $target_id = "unknown_target"; # skip the initial header comments, except for the one that tells the # name of the target. for ($_=; /^\s*#/; $_=) { if (/# TARGET\s+(\S+)/) { $target_id = $1; } } print ">$target_id\n"; # get the column names chomp; my @col_names = split(/\t/); ($col_names[0] eq "Pos") || die "seq-from-rdb ($target_id): first column should be named 'Pos'\n"; ($col_names[1] eq "AA") || die "seq-from-rdb ($target_id): second column should be named 'AA'\n"; ; # skip the format information in the rdb header while () { chomp; next if /^$/; # skip completely empty lines my @col_values = split(/\t/); (scalar(@col_values) == scalar(@col_names)) || die "seq-from-rdb ($target_id): have " . scalar(@col_names) . " columns in header, but " . scalar(@col_values) . " on line:\n'$_'\n"; my $max_at = 2; for (my $at=3; $at < scalar(@col_values); $at++) { $max_at = $at if ($col_values[$at] > $col_values[$max_at]); } print $col_names[$max_at]; } print "\n"; }