#!/usr/local/bin/perl -w #Usage: t2k_non_lib_pairwise_alignments workdir target T2Kmlib use FileHandle; STDERR->autoflush(1); use English; use File::Basename; use lib dirname($PROGRAM_NAME); use IdChecker; use lib qw(/projects/compbio/experiments/models.97/scripts); use SamT2k; # for run_prog, run_cmd ($WORKDIR, $TARGET, $T2K_MLIB, $DBSIZE) = @ARGV; #fix path set by SamT2k to include pcb needed by get-pdb-info my $oldPATH = $ENV{"PATH"}; $ENV{"PATH"} = "$oldPATH:/projects/compbio/bin"; $SAMBIN = "/projects/compbio/experiments/protein-predict/SAM_T02/bin_freeze/sam"; $T02_SCRIPTS = "/projects/compbio/experiments/protein-predict/SAM_T02/scripts"; $TARGET_AA_MOD = "$TARGET.t2k-w0.5.mod"; $TARGET_SEED_A2M = "$TARGET.a2m"; $PDB_DB = "/projects/compbio/experiments/protein-predict/SAM_T02/data/pdbaa"; #hmm built with only the target seq $TARGET_SEED_MOD = "$TARGET.mod"; %hitshash = (); # get ids of the top hits while($line = ) { chomp $line; @cols = split("\t", $line); #key is chainID of the hit and value is its FSSP rep $hitshash{$cols[0]} = $cols[1]; } foreach $PRED (keys %hitshash) { #destination file prefixes for joint alignments $TP = "$PRED/$TARGET-$PRED"; #create directory for alignments to this hit if ( !(defined $WORKDIR) || !(defined $PRED)){ die "WORKDIR:$WORKDIR PRED:$PRED missing!\n"; } #create directory for alignments to this hit $new_dir = $WORKDIR . "/" . $PRED; run_prog("mkdir $new_dir") if (! -e $new_dir); #the sequence is not in our library so we must #get the sequence from the pdb seqs file run_prog( "$T02_SCRIPTS/get_one_seq $PRED < $PDB_DB > $WORKDIR/$PRED/$PRED.fa"); $PRED_SEQ = "$WORKDIR/$PRED/$PRED.fa"; #in this case we have the fasta sequence $PRED_SEQ only #and our target models and sequences $adpstyle = 1; &pw_viterbi($T2K_MLIB, $adpstyle); &pw_local($T2K_MLIB, $adpstyle); &pw_simple_SW($adpstyle); $adpstyle = 5; &pw_viterbi($T2K_MLIB, $adpstyle); &pw_local($T2K_MLIB, $adpstyle); &pw_simple_SW($adpstyle); } exit(0); sub pw_viterbi{ #no viterbi mlibs built yet so these are not calibrated #We pass in the MLIB argument just to be ready for the MLIBS #that we will build soon ($MLIB, $adp) = @_; #target-template #align stride-mixed sequence $cmd = "$SAMBIN/hmmscore $TP-vit-adpstyle$adp.pw -i $WORKDIR/$TARGET_AA_MOD -db $WORKDIR/$TARGET_SEED_A2M -db $PRED_SEQ -dbsize $DBSIZE -viterbi 1 -sw 2 -subtract_null 4 -select_align 8 -adpstyle $adp"; run_prog("$cmd"); $cmd = "gzip -f $TP-vit-adpstyle$adp.pw.a2m"; run_prog("$cmd"); } #do SW alignment of the template sequence to a HMM built #with only the target sequence sub pw_simple_SW{ ($adp) = @_; #target-template #align stride-mixed sequence $cmd = "$SAMBIN/hmmscore $TP-simpleSW-adpstyle$adp.pw -i $WORKDIR/$TARGET_SEED_MOD -db $WORKDIR/$TARGET_SEED_A2M -db $PRED_SEQ -dbsize $DBSIZE -adpstyle $adp -sw 2 -subtract_null 4 -select_align 8"; run_prog("$cmd"); $cmd = "gzip -f $TP-simpleSW-adpstyle$adp.pw.a2m"; run_prog("$cmd"); } sub pw_local{ ($MLIB, $adp) = @_; #target-template #align stride-mixed sequence $cmd = "$SAMBIN/hmmscore $TP-local-adpstyle$adp.pw -i $WORKDIR/$TARGET_AA_MOD -modellibrary $WORKDIR/$MLIB -db $WORKDIR/$TARGET_SEED_A2M -db $PRED_SEQ -dbsize $DBSIZE -adpstyle $adp -sw 2 -subtract_null 4 -select_align 8"; run_prog("$cmd"); $cmd = "gzip -f $TP-local-adpstyle$adp.pw.a2m"; run_prog("$cmd"); } sub print_usage_exit{ print "Usage: t2k_non_lib_pairwise_alignments workdir target T2Kmlib < seed-top_hits_non_t2k\n"; exit(-1); }