#!/usr/bin/perl -w

# seq-from-rdb < foo. rdb > foo.seq
#	reads an output from a neural net that predicts secondary-structure,
#	and outputs the prediction as a fasta file

{
    my $target_id = "unknown_target";
    # skip the initial header comments, except for the one that tells the
    # name of the target.
    for ($_=<STDIN>;	/^\s*#/; $_=<STDIN>)
    {	if (/# TARGET\s+(\S+)/)
        {   $target_id = $1;
	}
    }
    
    print ">$target_id\n";
    
    # get the column names
    chomp;
    my @col_names = split(/\t/);
    ($col_names[0] eq "Pos") 
    	|| die "seq-from-rdb ($target_id): first column should be named 'Pos'\n";
    ($col_names[1] eq "AA") 
    	|| die "seq-from-rdb ($target_id): second column should be named 'AA'\n";

    <STDIN>;	# skip the format information in the rdb header
    
    while (<STDIN>)
    {	chomp;
    	next if /^$/;	# skip completely empty lines
        my @col_values = split(/\t/);
    	(scalar(@col_values) == scalar(@col_names))
		|| die "seq-from-rdb ($target_id): have " . scalar(@col_names)
			. " columns in header, but " . scalar(@col_values)
			. " on line:\n'$_'\n";
    	my $max_at = 2;
	for (my $at=3; $at < scalar(@col_values); $at++)
	{    $max_at = $at if ($col_values[$at] > $col_values[$max_at]);
	}
	print $col_names[$max_at];
    }
    
    print "\n";

}