#!/usr/bin/perl -w # # Author: Grant Thiltgen # # This script is to collect output data from the multiple neural nets # run to test our training protocol. # CHANGE LOG: # Thu Nov 18 09:20:34 PST 2010 Kevin Karplus # Added --net_list option to get list of nets, instead of # generating numerically. # Thu Nov 18 09:30:00 PST 2010 Kevin Karplus # Added --best_n option (to replace hard-wired 10) use strict; use English; use File::Basename; use Getopt::Long; use Pod::Usage; my $num_nets; # The number of nets that have data. my $txxalign; # The alignment, for example t05-thin90 my $dataset; # The dataset, for example dunbrack-30pc-1763 my $arch; # The architecture of the neural net. my $alphaname; # The name of the alphabet used in neural net training my $ctrain=1; # Value for declaring if crosstraining data is available my $data; # Data used, for example tr12 or t1c2 my $net_list= "../training-data/fullset.nets"; # where are the net names listed my $best_n=10; # number of results to print GetOptions( "number=i" => \$num_nets , "txxalign=s" => \$txxalign , "dataset=s" => \$dataset , "arch=s" => \$arch , "alphaname=s" => \$alphaname , "data=s" => \$data , "net_list=s" => \$net_list , "ctrain=i" => \$ctrain , "best_n=i" => \$best_n , "help|?" => sub {pod2usage("verbose"=>1);} , "man" => sub {pod2usage("verbose"=>2);} ) or pod2usage("verbose" => 0); # Building loops to create the data file. my $line; # Information from the files brought into this. my $value; # Value of hash for foreach loop. my %trainbits; # storing training data in bits my @temp; # temporary array for storing data in the line. my $key; # value for key in hash. my @nets; # Value for nets my $sum=0; # Value to calculate average of nets my $count=0; open(NETSET, $net_list) or die("Can't open list of nets in $net_list\n"); print STDOUT "Net\tValue\n"; print STDOUT "6S\t9N\n"; while ( my $name = ) { chomp $name; if ((length($name)==0) || ($name =~ /#/)) { next; } { my $file = "$name/quality-reports/$dataset-$txxalign-$arch-$data-$alphaname-mult50-from-empty.train"; open (TRAIN, $file) or print STDERR "Can't open $data training file $file for $name\n"; while ($line = ) { chomp $line; if ($line =~ /^#/) {next;} @temp = split /\s+/, $line; $trainbits{$name} = $temp[3]; } close TRAIN; $sum += $trainbits{$name}; $count += 1; } } close NETSET; my $average = $sum / $count; print STDOUT "avg\t$average\n"; my $i=0; # temp value for sorting. open (NETS, ">networks/$dataset-$txxalign-$arch-$data-$alphaname-mult50.nets"); foreach $key (sort highlow keys %trainbits) { if ($i++ < $best_n) { print NETS "$key\n"; } print STDOUT "$key\t$trainbits{$key}\n"; } close (NETS); sub highlow ($$) { my ($a,$b) = @_; $trainbits{$b} <=> $trainbits{$a}; } __END__