#!/usr/bin/perl -w # difference-evaluation -suffix '-scwrl' < decoys/evaluate.rdb > evaluate-scwrl-diff.rdb # extracts interesting results from the evaluate.rdb file and puts # them in a tabular format. It looks for paired names (after # stripping off .pdb or .pdb.gz), such as ROBETTA_TS1 and ROBETTA_TS1-scwrl # and reports the cost for each and the difference in costs. # missing_atoms is also reported (pre,post,diff) # options: # # -cost real_cost Name of column to extract (may be repeated). # Outputs columns real_cost_pre, real_cost_post, real_cost_diff # -suffix '-scwrl' Suffix to add to "pre" name to get "post" name use Getopt::Long; my @cost_columns=(); my $suffix = "-scwrl"; GetOptions( "cost=s@" => \@cost_columns , "suffix=s" => \$suffix ); if (scalar(@cost_columns) <=0) { $cost_columns[0] = "real_cost"; } # if "missing_atoms" is not in the cost_columns, push it on the end my $missing_missing=1; foreach my $cost_col (@cost_columns) { if ($cost_col eq "missing_atoms") { $missing_missing=0; last; } } push (@cost_columns, "missing_atoms") if ($missing_missing); # look for column names my @col_names; while () { next if /^\s*#/; chomp; @col_names = split(/\t/); last; } my %col_number; for(my $i=0; $i; # skip field-width info # hashes indexed by file name with .pdb and .pdb.gz stripped and with suffix stripped my %lines_pre; # rdb line for files that lack suffix my %lines_post; # rdb line for files that have suffix my $target; # target name while($line=) { chomp($line); my @cols = split(/\t/, $line); my $file = $cols[0]; if ($file =~ /(\S+)[.]gz$/) { $file = $1; } if ($file =~ /(\S+)[.]pdb$/) { $file = $1; } if ($file =~ /(\S+)$suffix/o) { $lines_post{$1} = $line; } else { $lines_pre{$file} = $line; } # print STDERR "DEBUG: file='$file'\n"; if (!defined($target) && $file =~/^\s*(T[_0123456789]+)/) { $target = $1; } } if (! defined($target)) { die "Can't find a target result in the evaluation file"; } # print rdb header print "target\tfile"; foreach my $cost_col (@cost_columns) { print "\t".$cost_col."_pre" ."\t".$cost_col."_post" ."\t".$cost_col."_diff"; } print "\n"; print "5S\t20S"; foreach my $cost_col (@cost_columns) { print "\t7N\t7N\t7N"; } print "\n"; for my $file (sort (keys(%lines_post))) { my $pre_line=$lines_pre{$file}; next if (!defined($pre_line)); my $post_line=$lines_post{$file}; next if (!defined($post_line)); my @pre_cols = split(/\t/, $pre_line); my @post_cols = split(/\t/, $post_line); print "$target\t$file"; foreach my $cost_col (@cost_columns) { my $cost_col_num = $col_number{$cost_col}; die "Cost number for $cost_col disappeared" if (! defined($cost_col_num)); printf "\t%.4f\t%.4f\t%.4f" , $pre_cols[$cost_col_num], $post_cols[$cost_col_num], $post_cols[$cost_col_num] -$pre_cols[$cost_col_num] ; } print "\n"; }