#!/usr/bin/perl -w # merge_ts1_ts2 SAM-T08-server-domain_evaluate.rdb SAM-T08-server_TS2-domain_evaluate.rdb ... > SAM-T08-server_TS1_TS2.rdb # creates a new rdb file with all the cost columns of the input files. # The "target" and "name" columns of the inputs are used to determine # which data rows are merged. The names in the output will have # "_TS[12345]" stripped off the ends, and the stripped off identifier # added to the column names. # # That means that we would have a line for T0388/D1 SAM-T08-server, with # columns n_ca_c_TS1 n_ca_c_TS2 nn1000_TS1 nn1000_TS2 and so forth. use English; use File::Basename; use lib dirname($PROGRAM_NAME); use READ_RDB; my %value; # $value{$target. "\t" .$server_name}{$colname."_TS1"} # has the value for column $colname for $server_name."_TS1" on $target my %all_cols; # keys are all column names (except "name" and "target") for output # value is irrelevant foreach my $file (@ARGV) { open (RDBIN, "<$file") || die "Error: can't open $file for input"; # clear the column name and number arrays # set up the column name and number arrays read_rdb_header(*RDBIN); # print STDERR "DEBUG: col_names=" .join(",", @col_names) . "\n"; my $model_col = $col_num{"name"}; my $target_col = $col_num{"target"}; # print STDERR "DEBUG: model_col=$model_col target-col=$target_col\n"; while(my $line=) { chomp($line); my @fields = split(/\t/, $line); # print STDERR "DEBUG: there are ". scalar(@fields) . " fields\n"; my $model = $fields[$model_col]; $model =~ /^(.+)(_TS[12345])$/; my $name=$1; my $extension=$2; my $target_model = $fields[$target_col] . "\t" . $name; for ($i =0; $i< scalar(@col_names); $i++) { my $col = $col_names[$i]; next if ($col eq "name" || $col eq "target" || $col eq "length"); my $name = $col . $extension; $value{$target_model}{$name} = $fields[$i]; $all_cols{$name} = 1; } } close RDBIN; } @sorted_cols = sort(keys(%all_cols)); print "target\tname\t" . join("\t", @sorted_cols) . "\n"; print "10S\t20S" . ("\t10N" x scalar(@sorted_cols)) . "\n"; foreach my $row_name (sort(keys(%value))) { print $row_name . "\t" . join("\t", map { $value{$row_name}{$_} } @sorted_cols) . "\n" }