#!/usr/local/bin/perl -w #Usage: get_t2k_lib_hits < foo.best-scores.rdb > foo.top_hits_t2k # get_t2k_lib_hits -negate < foo.best-scores.rdb > foo.top_hits_non_t2k #output single column of chains in the T2k library #from the top hits list use FileHandle; STDERR->autoflush(1); use English; use File::Basename; use lib dirname($PROGRAM_NAME); use IdChecker; use READ_RDB; $report_not_in_t2k = ($#ARGV >=0 && $ARGV[0] =~ /^-negate/)?1:0; #build a hash of ID's in template library #we assume now that all necessary template sequences and models #have been built $T2K_ID_FILE = "/projects/compbio/experiments/models.97/indexes/t2k.ids"; %t2k_ids = IdChecker::ReadIDs($T2K_ID_FILE); read_rdb_header(*STDIN); $idcolnum = $col_num{"Sequence_ID"}; $FSSPcolnum = $col_num{"FSSP-rep"}; while($line = ) { #skip lines that don't contain a chain ID in first column next if ($line =~ /^\s+/); @cols = split("\t", $line); $chainID = $cols[$idcolnum]; $fsspRep = $cols[$FSSPcolnum]; if (defined($t2k_ids{$chainID}) ^ $report_not_in_t2k) { print "$chainID\t$fsspRep\n"; } }