# LCTARGET= set_in_Makefile # TARGET= SET_IN_Makefile # TO DO: # * Add more pseudo-targets to get finer-grained output to summary.html # * Program format_casp_alignments should use CASP5-2D-ID # #to come: transfer activity from the pairwise_alignments script #back to this Makefile. #the script has dependencies on some of Mark D.'s tcl scripts #which in turn depend on various scripts which are in various #locations in /projects/compbio. #to come: transfer the Diekhans scripts and all dependencies #into the SAM_T02/scripts directory AL_METHOD= t2k TARG_AL = ${TARGET}.${AL_METHOD} CASP5-2D-ID = 4069-6308-1312 CASP5-WEBSITE = http://predictioncenter.llnl.gov/casp5/targets/templates WEBROOTDIR=/projects/compbio/experiments/protein-predict/SAM_T02 WEBSCRIPTSDIR= ${WEBROOTDIR}/scripts CASP5 = /projects/compbio/experiments/casp5 CASP5-SCRIPTS = ${CASP5}/scripts-human UNDERTAKER-SCRIPTS = /cse/faculty/karplus/undertaker/scripts ifndef UNDERTAKER UNDERTAKER = /cse/faculty/karplus/undertaker/undertaker endif WORKDIR= $(shell pwd) ifndef BASE-URL BASE-URL = file:${WORKDIR} endif #home for frozen versions of the programs and scripts we need #note: these are also set in BIN-SCRIPTS2K/sam-t2k.conf BIN = ${WEBROOTDIR}/bin_freeze PCB = /projects/compbio/bin PCBS = ${PCB}/scripts ifndef UNAME-M UNAME-M = $(shell uname -m) endif PCB-SUB = ${PCB}/${UNAME-M} FRAGFINDER = ${PCB-SUB}/fragfinder BIN-SAM = ${PCB-SUB} MAKE = /usr/bin/gmake CONVERSION_DIR = ${WEBROOTDIR}/casp_convert PCEM = /projects/compbio/experiments/models.97 PCEM-SCRIPTS = ${PCEM}/scripts PCEM-SCRIPTS2K = ${PCEM}/scripts2k # approx size of the template library (t2k.ids) LIBSIZE = 6014 .PRECIOUS: \ ${TARGET}.a2m ${TARG_AL}.a2m.gz \ ${TARG_AL}-100-30-dssp-ebghstl-scores.rdb \ ${TARG_AL}-100-30-dssp-ebghstl-scores.html \ ${TARG_AL}-100-30-stride-ebghtl-scores.rdb \ ${TARG_AL}-100-30-stride-ebghtl-scores.html \ ${TARG_AL}-100-30-str-scores.rdb \ ${TARG_AL}-100-30-str-scores.html \ ${TARG_AL}-100-30-alpha-scores.rdb \ ${TARG_AL}-100-30-alpha-scores.html \ ${TARG_AL}-100-30-dssp_ehl2-scores.rdb \ ${TARG_AL}-100-30-dssp_ehl2-scores.html \ fragment-a2m/${TARG_AL}.frag ################## # pseudo targets # ################## default: \ summary_create summary_inputs receipt_ack build_t2k_alignment \ build_pretty_alignment build_mod 1.end_section \ summary_2track 2track 2.end_section \ summary_logos \ w0.5_logo dssp_logo stride_logo str_logo alpha_logo dssp_ehl2_logo \ 3.end_section \ summary_target_mod_scores 2track_target_mod_scores \ 2track_target_mod_scores_annotate 1track_target_mod_scores \ 4.end_section \ summary_template_mod_scores template_mod_scores 5.end_section \ summary_top_hits top_hits 6.end_section \ summary_top_alignments simple_seed_mod top_alignments_to_build \ final_predictions 7.end_section \ undertaker_start_section rasmol_scripts \ undertaker_multi_align undertaker_from_many \ undertaker_show_alignment \ frag-a2m \ 8.end_section \ summary_end ${TARGET}.a2m: wget -N \ ${CASP5-WEBSITE}/${LCTARGET}.seq.txt \ ${CASP5-WEBSITE}/${LCTARGET}.doc.html \ ${CASP5-WEBSITE}/${LCTARGET}.pdb.txt mv -f ${LCTARGET}.seq.txt ${TARGET}.a2m mv -f ${LCTARGET}.pdb.txt ${TARGET}.blank.pdb mv -f ${LCTARGET}.doc.html ${TARGET}.doc.html #create html results page summary_create: ${CASP5-SCRIPTS}/create_summary_html \ ${TARGET} ${BASE-URL} > ${WORKDIR}/summary.html ${WEBSCRIPTSDIR}/add_master_table_head_summary_html \ >> ${WORKDIR}/summary.html #start the inputs section summary_inputs: ${WEBSCRIPTSDIR}/add_section_head_summary_html \ Inputs >> ${WORKDIR}/summary.html #acknowledge sequence receipt #add pointer to sequence file to the html results page receipt_ack: ${TARGET}.a2m echo Received sequence ${TARGET}.a2m ${WEBSCRIPTSDIR}/add_summary_html \ "Submitted sequence(s)" \ ${BASE-URL}/${TARGET}.a2m ${TARGET}.a2m >> ${WORKDIR}/summary.html ${WEBSCRIPTSDIR}/add_summary_html \ "Provided documentation" \ ${BASE-URL}/${TARGET}.doc.html ${TARGET}.doc.html >> ${WORKDIR}/summary.html build_t2k_alignment: ${TARGET}.t2k.a2m.gz ${WEBSCRIPTSDIR}/add_summary_html \ "SAM_T02 multiple alignment in a2m format" \ ${BASE-URL}/$^ $^ >> ${WORKDIR}/summary.html build_pretty_alignment: ${TARG_AL}.pa.html ${WEBSCRIPTSDIR}/add_summary_html \ "SAM_T02 multiple alignment in pretty html format" \ ${BASE-URL}/$^ $^ >> ${WORKDIR}/summary.html build_mod: ${TARG_AL}-w0.5.mod ${WEBSCRIPTSDIR}/add_summary_html \ "SAM_T02 target hidden Markov model" \ ${BASE-URL}/$^ $^ >> ${WORKDIR}/summary.html #start the two-track section summary_2track: ${WEBSCRIPTSDIR}/add_section_head_summary_html \ "Secondary Structure Prediction" >> ${WORKDIR}/summary.html 2track: ${TARG_AL}-thin90.a2m.gz \ do_dssp do_stride do_str do_alpha do_dssp_ehl2 do_dssp: ${TARG_AL}.dssp-ebghstl.rdb ${TARG_AL}.dssp-ebghstl.mod \ ${TARG_AL}.dssp-color.rasmol ${WEBSCRIPTSDIR}/add_summary_html \ "DSSP EBGHSTL structure prediction CASP format" \ ${BASE-URL}/${TARG_AL}.dssp-ebghstl ${TARG_AL}.dssp-ebghstl >> ${WORKDIR}/summary.html ${WEBSCRIPTSDIR}/add_summary_html \ "DSSP EBGHSTL structure prediction RDB format" \ ${BASE-URL}/${TARG_AL}.dssp-ebghstl.rdb ${TARG_AL}.dssp-ebghstl.rdb >> ${WORKDIR}/summary.html ${WEBSCRIPTSDIR}/add_summary_html \ "DSSP EBGHSTL structure prediction sequence format" \ ${BASE-URL}/${TARG_AL}.dssp-ebghstl.seq ${TARG_AL}.dssp-ebghstl.seq >> ${WORKDIR}/summary.html do_stride: ${TARG_AL}.stride-ebghtl.rdb ${TARG_AL}.stride-ebghtl.mod \ ${TARG_AL}.stride-color.rasmol ${WEBSCRIPTSDIR}/add_summary_html \ "Stride EBGHTL structure prediction CASP format" \ ${BASE-URL}/${TARG_AL}.stride-ebghtl ${TARG_AL}.stride-ebghtl >> ${WORKDIR}/summary.html ${WEBSCRIPTSDIR}/add_summary_html \ "Stride EBGHTL structure prediction RDB format" \ ${BASE-URL}/${TARG_AL}.stride-ebghtl.rdb ${TARG_AL}.stride-ebghtl.rdb >> ${WORKDIR}/summary.html ${WEBSCRIPTSDIR}/add_summary_html \ "Stride EBGHTL structure prediction sequence format" \ ${BASE-URL}/${TARG_AL}.stride-ebghtl.seq ${TARG_AL}.stride-ebghtl.seq >> ${WORKDIR}/summary.html do_str: ${TARG_AL}.str.rdb ${TARG_AL}.str.mod \ ${TARG_AL}.str-color.rasmol ${WEBSCRIPTSDIR}/add_summary_html \ "Str structure prediction CASP format" \ ${BASE-URL}/${TARG_AL}.str ${TARG_AL}.str >> ${WORKDIR}/summary.html ${WEBSCRIPTSDIR}/add_summary_html \ "Str structure prediction RDB format" \ ${BASE-URL}/${TARG_AL}.str.rdb ${TARG_AL}.str.rdb >> ${WORKDIR}/summary.html ${WEBSCRIPTSDIR}/add_summary_html \ "Str structure prediction sequence format" \ ${BASE-URL}/${TARG_AL}.str.seq ${TARG_AL}.str.seq >> ${WORKDIR}/summary.html do_alpha: ${TARG_AL}.alpha.rdb ${TARG_AL}.alpha.mod \ ${TARG_AL}.alpha-color.rasmol ${WEBSCRIPTSDIR}/add_summary_html \ "Alpha angle prediction RDB format" \ ${BASE-URL}/${TARG_AL}.alpha.rdb ${TARG_AL}.alpha.rdb >> ${WORKDIR}/summary.html ${WEBSCRIPTSDIR}/add_summary_html \ "Alpha angle prediction sequence format" \ ${BASE-URL}/${TARG_AL}.alpha.seq ${TARG_AL}.alpha.seq >> ${WORKDIR}/summary.html do_dssp_ehl2: ${TARG_AL}.dssp_ehl2.rdb \ ${TARG_AL}.dssp_ehl2 \ ${TARG_AL}.dssp_ehl2.mod ${WEBSCRIPTSDIR}/add_summary_html \ "DSSP 3-value prediction RDB format" \ ${BASE-URL}/${TARG_AL}.dssp_ehl2.rdb ${TARG_AL}.dssp_ehl2.rdb >> ${WORKDIR}/summary.html # BUG: don't have rdb->seq format conversion for dssp_ehl2 # ${TARG_AL}.dssp_ehl2-color.rasmol # ${WEBSCRIPTSDIR}/add_summary_html \ # "DSSP 3-value prediction sequence format" \ # ${BASE-URL}/${TARG_AL}.dssp_ehl2.seq ${TARG_AL}.dssp_ehl2.seq >> ${WORKDIR}/summary.html summary_logos: ${WEBSCRIPTSDIR}/add_section_head_summary_html \ "Sequence Logos" >> ${WORKDIR}/summary.html w0.5_logo: ${TARG_AL}.w0.5-logo.eps ${WEBSCRIPTSDIR}/add_summary_html \ "SAM_T02 multiple alignment in sequence logo format" \ ${BASE-URL}/$^ $^ >> ${WORKDIR}/summary.html dssp_logo: ${TARG_AL}.dssp-ebghstl-logo.eps ${WEBSCRIPTSDIR}/add_summary_html \ "DSSP EBGHSTL structure prediction in sequence logo format" \ ${BASE-URL}/$^ $^ >> ${WORKDIR}/summary.html stride_logo: ${TARG_AL}.stride-ebghtl-logo.eps ${WEBSCRIPTSDIR}/add_summary_html \ "Stride EBGHTL structure prediction in sequence logo format" \ ${BASE-URL}/$^ $^ >> ${WORKDIR}/summary.html str_logo: ${TARG_AL}.str-logo.eps ${WEBSCRIPTSDIR}/add_summary_html \ "Str structure prediction in sequence logo format" \ ${BASE-URL}/$^ $^ >> ${WORKDIR}/summary.html alpha_logo: ${TARG_AL}.alpha-logo.eps ${WEBSCRIPTSDIR}/add_summary_html \ "Alpha angle prediction in sequence logo format" \ ${BASE-URL}/$^ $^ >> ${WORKDIR}/summary.html dssp_ehl2_logo: ${TARG_AL}.dssp_ehl2-logo.eps ${WEBSCRIPTSDIR}/add_summary_html \ "DSSP 3-value prediction in sequence logo format" \ ${BASE-URL}/$^ $^ >> ${WORKDIR}/summary.html #start the target model scores section summary_target_mod_scores: ${WEBSCRIPTSDIR}/add_section_head_summary_html \ "Target model scores" >> ${WORKDIR}/summary.html 2track_target_mod_scores: \ ${TARG_AL}-100-30-str.mlib \ ${TARG_AL}-100-30-str.dist \ ${TARG_AL}-100-30-dssp-ebghstl.mlib \ ${TARG_AL}-100-30-dssp-ebghstl.dist \ ${TARG_AL}-100-30-stride-ebghtl.mlib \ ${TARG_AL}-100-30-stride-ebghtl.dist \ ${TARG_AL}-100-30-alpha.mlib \ ${TARG_AL}-100-30-alpha.dist \ ${TARG_AL}-100-30-dssp_ehl2.mlib \ ${TARG_AL}-100-30-dssp_ehl2.dist 2track_target_mod_scores_annotate: \ ${AL_METHOD}-100-30-dssp-ebghstl-scores \ ${AL_METHOD}-100-30-stride-ebghtl-scores \ ${AL_METHOD}-100-30-str-scores \ ${AL_METHOD}-100-30-alpha-scores \ ${AL_METHOD}-100-30-dssp_ehl2-scores ${AL_METHOD}-%-scores: ${TARG_AL}-%-scores.rdb \ ${TARG_AL}-%-scores.html ${WEBSCRIPTSDIR}/add_summary_html \ "Annotated amino acid/$* two-track target model scores" \ ${BASE-URL}/${TARG_AL}-$*-scores.html \ ${TARG_AL}-$*-scores.html >> ${WORKDIR}/summary.html 1track_target_mod_scores: ${TARG_AL}-w0.5.mlib \ ${TARG_AL}-w0.5.dist \ ${TARG_AL}-w0.5-scores.rdb \ ${TARG_AL}-w0.5-scores.html ${WEBSCRIPTSDIR}/add_summary_html \ "Annotated amino acid single-track target model scores of PDB" \ ${BASE-URL}/${TARG_AL}-w0.5-scores.html \ ${TARG_AL}-w0.5-scores.html >> ${WORKDIR}/summary.html #start the template model scores section summary_template_mod_scores: ${WEBSCRIPTSDIR}/add_section_head_summary_html \ "Template model scores" >> ${WORKDIR}/summary.html template_mod_scores: ${TARGET}.template-lib.dist-rdb \ ${TARGET}.template-lib-sorted.rdb \ ${TARGET}.template-lib-scores.rdb \ ${TARGET}.template-lib-scores.html ${WEBSCRIPTSDIR}/add_summary_html \ "Annotated template model scores" \ ${BASE-URL}/${TARGET}.template-lib-scores.html \ ${TARGET}.template-lib-scores.html >> ${WORKDIR}/summary.html #start the top hits section summary_top_hits: ${WEBSCRIPTSDIR}/add_section_head_summary_html \ "Top Hits" >> ${WORKDIR}/summary.html top_hits: ${TARG_AL}.best-scores.rdb \ ${TARG_AL}.best-scores.html ${WEBSCRIPTSDIR}/add_summary_html \ "Best scoring hits from all models" \ ${BASE-URL}/${TARG_AL}.best-scores.html \ ${TARG_AL}.best-scores.html >> ${WORKDIR}/summary.html #start the top alignments section summary_top_alignments: ${WEBSCRIPTSDIR}/add_section_head_summary_html \ "Alignments for Top Hits" >> ${WORKDIR}/summary.html simple_seed_mod: ${TARGET}.mod top_alignments_to_build: ${TARG_AL}.top_hits_t2k \ ${TARG_AL}.top_hits_non_t2k \ ${TARG_AL}.top_hits_pwise_alignments.rdb final_predictions: many-alignments top-alignments many-alignments: ${TARG_AL}.predicted_alignments.rdb \ ${TARG_AL}.top_reported_alignments.rdb \ ${TARG_AL}.many_alignments.rdb \ ${TARG_AL}.many_alignments.html \ ${TARG_AL}.undertaker-align.script ${WEBSCRIPTSDIR}/add_summary_html \ "T02 Many Alignments Summary" \ ${BASE-URL}/${TARG_AL}.many_alignments.html\ ${TARG_AL}.many_alignments.html >> ${WORKDIR}/summary.html top-alignments: ${TARG_AL}.casp_top_reported_alignments.rdb \ ${TARG_AL}.top_reported_alignments.html ${WEBSCRIPTSDIR}/add_summary_html \ "T02 Top Models Summary" \ ${BASE-URL}/${TARG_AL}.top_reported_alignments.html\ ${TARG_AL}.top_reported_alignments.html >> ${WORKDIR}/summary.html rasmol_scripts: \ dssp_color_script \ stride_color_script \ str_color_script \ alpha_color_script \ dssp_ehl2_color_script %_color_script: ${TARG_AL}.%-color.rasmol ${WEBSCRIPTSDIR}/add_summary_html \ "Script for $* coloring in rasmol" \ ${BASE-URL}/$^ $^ \ >> ${WORKDIR}/summary.html undertaker_multi_align: ${TARG_AL}-2track-undertaker.a2m ${WEBSCRIPTSDIR}/add_summary_html \ "Multiple alignment for undertaker templates" \ ${BASE-URL}/$^ $^ >> ${WORKDIR}/summary.html undertaker_from_many: ${TARG_AL}.undertaker-align.script ${WEBSCRIPTSDIR}/add_summary_html \ "many alignments for undertaker templates" \ ${BASE-URL}/$^ $^ >> ${WORKDIR}/summary.html undertaker_show_alignment: ${TARG_AL}.undertaker-align.pdb.gz ${WEBSCRIPTSDIR}/add_summary_html \ "PDB file with models for many alignments" \ ${BASE-URL}/$^ $^ >> ${WORKDIR}/summary.html frag-a2m: ${TARG_AL}.frag -mkdir decoys ${WEBSCRIPTSDIR}/add_summary_html \ "Fragment list for undertaker (from fragfinder)" \ ${BASE-URL}/$^ $^ >> ${WORKDIR}/summary.html #end html results page summary_end: ${WEBSCRIPTSDIR}/end_summary_html \ >> ${WORKDIR}/summary.html ################# # web-interface # ################# %.end_section: ${WEBSCRIPTSDIR}/end_section_summary_html \ >> ${WORKDIR}/summary.html ######################################## # Building a ${AL_METHOD} alignment from a seed # ######################################## ifndef A2M A2M = ${AL_METHOD}.a2m.gz endif TARGET-A2M = ${TARGET}.${A2M} #frozen versions of these programs BIN-SCRIPTS2K = ${BIN}/scripts2k TARGET2K = ${BIN-SCRIPTS2K}/target2k #build a model (somewhat) quickly for web-page testing TARGET2KTEST = ${BIN-SCRIPTS2K}/target2k -iter 1 HMMSCORE = ${BIN-SAM}/hmmscore HMMSCORE_NEW = ${BIN-SAM}/hmmscore.new %.t2k.a2m.gz: %.a2m ${TARGET2K} -out $*.t2k \ -seed $^ gzip -f $*.t2k.a2m #thin the alignment to 90% sequence identity for use with the neural nets #(which were trained on thinned alignments) %-thin90.a2m.gz: %.a2m.gz ${BIN-SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.90 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ #convert an a2m.gz alignment to a human-readable model %-w0.5.mod: %.a2m.gz ${BIN-SCRIPTS2K}/w0.5 $^ $@.tmp ${BIN-SAM}/hmmconvert $*-w0.5 -model_file $@.tmp -rm -f $@.tmp #compress a file %.gz: % gzip -f $^ #make a pretty-aligned alignment from a compressed a2m alignment %.pa: %.a2m.gz ${BIN-SAM}/prettyalign $^ -m8 -i -n -L3333 > $@ %.pa: %.a2m ${BIN-SAM}/prettyalign $^ -m8 -i -n -L3333 > $@ %.pa.html: %.a2m.gz gunzip -c $^ > tmp.a2m ${PCBS}/a2m2html -a2m_in tmp.a2m > $@ rm tmp.a2m %.pa.html: %.a2m ${WEBSCRIPTSDIR}/a2m2html -a2m_in $^ > $@ ################################## # ANNOTATING A TARGET # ################################## # secondary structure prediction # ################################## BIN-PREDICT2ND = ${BIN}/predict_2nd PCEM-INDEXES = /projects/compbio/experiments/models.97/indexes LIB= /projects/compbio/lib PREDICT-2ND = /cse/faculty/karplus/dna/predict-2nd/predict-2nd TEMPLATE-SEQS = ${PCEM-INDEXES}/t2k.x-seqs MIXTURE = ${LIB}/recode3.20comp TRANS-REG = ${LIB}/fssp-trained.regularizer # DSSP-related stuff: EBGHSTL-NET = ${BIN-PREDICT2ND}/networks/t2k-5740-IDaaHr-5-15-7-15-9-15-13-ebghstl-seeded.net TEMPLATE-EBGHSTL = ${PCEM-INDEXES}/t2k.dssps EBGHSTL-TWOTRACKDBS = ${TEMPLATE-SEQS},${TEMPLATE-EBGHSTL} %.dssp-ebghstl %.dssp-ebghstl.rdb %.dssp-ebghstl.seq : %-thin90.a2m.gz ${EBGHSTL-NET} echo ReadNeuralNet ${EBGHSTL-NET} > tmp.script echo ReadA2M $< >> tmp.script echo PrintPrediction $*.dssp-ebghstl ${CASP5-2D-ID} >> tmp.script echo PrintPredictionFasta $*.dssp-ebghstl.seq >> tmp.script echo PrintRDB $*.dssp-ebghstl.rdb >> tmp.script ${PREDICT-2ND} < tmp.script rm tmp.script %.dssp-ebghstl.mod: %.dssp-ebghstl.rdb ${PCEM-SCRIPTS}/2nd-rdb-to-sam-model -alphabet EBGHSTL $^ $@ #this target creates the mlib and dist file with no scores # we then remove the dist file so it will be created when # %-100-30-dssp-ebghstl.dist is called %-100-30-dssp-ebghstl.mlib: %-w0.5.mod %.dssp-ebghstl.mod ${HMMSCORE} $*-100-30-dssp-ebghstl \ -calibrate 1 \ -alphabet protein,EBGHSTL \ -trackmod $*-w0.5.mod,$*.dssp-ebghstl.mod \ -db ${EBGHSTL-TWOTRACKDBS} \ -trackcoeff 1.0,0.3 \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 0 -rm -f $*-100-30-dssp-ebghstl.dist # -trackprior rsdb-comp2.32comp,t99-2d-comp.9comp %-100-30-dssp-ebghstl.dist: %-100-30-dssp-ebghstl.mlib \ ${TEMPLATE-SEQS} ${TEMPLATE-EBGHSTL} ${HMMSCORE} dsspfoo \ -modellibrary $< \ -db ${EBGHSTL-TWOTRACKDBS} \ -db_size ${LIBSIZE} \ -select_score 4 -Emax 40. mv -f dsspfoo.1.$@ $@ %-100-30-dssp-ebghstl-scores.rdb: %-100-30-dssp-ebghstl.dist ${WEBSCRIPTSDIR}/annotate_target_scores ${AL_METHOD}-100-30-dssp-ebghstl < $^ > $@ %-100-30-dssp-ebghstl-scores.html: %-100-30-dssp-ebghstl-scores.rdb ${WEBSCRIPTSDIR}/oneway_hits_rdb2html $*-100-30-dssp-ebghstl-scores < $^ > $@ %.dssp-ebghstl-logo.eps: %.dssp-ebghstl.mod ${BIN-SAM}/makelogo $*.dssp-ebghstl-logo -i $^ \ -logo_rel_entropy 1 \ -logo_bars_per_line 50 -logo_title "$* EBGHSTL" \ -logo_caption_f ${TARG_AL}.dssp-ebghstl.seq \ -logo_under_file ${TARGET}.a2m \ -logo_color_file /projects/compbio/lib/stride.colors %.dssp-color.rasmol: %.dssp-ebghstl.seq ${CASP5-SCRIPTS}/rasmol_color_from_2ry -pdb ${TARGET}.blank.pdb < $^ > $@ ln -sf $@ dssp # STRIDE-related stuff: EBGHTL-NET= ${BIN-PREDICT2ND}/networks/t2k-5651-IDaaHr-5-15-7-15-9-15-13-ebghtl-stride-seeded.net TEMPLATE-EBGHTL = ${PCEM-INDEXES}/t2k.2ds EBGHTL-TWOTRACKDBS = ${TEMPLATE-SEQS},${TEMPLATE-EBGHTL} %.stride-ebghtl %.stride-ebghtl.rdb %.stride-ebghtl.seq : %-thin90.a2m.gz ${EBGHTL-NET} echo ReadNeuralNet ${EBGHTL-NET} > tmp.script echo ReadA2M $< >> tmp.script echo PrintPrediction $*.stride-ebghtl >> tmp.script echo PrintPredictionFasta $*.stride-ebghtl.seq >> tmp.script echo PrintRDB $*.stride-ebghtl.rdb >> tmp.script ${PREDICT-2ND} < tmp.script rm tmp.script %.stride-ebghtl.mod: %.stride-ebghtl.rdb ${PCEM-SCRIPTS}/2nd-rdb-to-sam-model -alphabet EBGHTL $^ $@ #this target creates the mlib and dist file with no scores # we then remove the dist file so it will be created when # %-100-30-stride-ebghtl.dist is called %-100-30-stride-ebghtl.mlib: %-w0.5.mod %.stride-ebghtl.mod ${HMMSCORE} $*-100-30-stride-ebghtl \ -calibrate 1 \ -alphabet protein,EBGHTL \ -trackmod $*-w0.5.mod,$*.stride-ebghtl.mod \ -db ${EBGHTL-TWOTRACKDBS} \ -trackcoeff 1.0,0.3 \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 0 -rm -f $*-100-30-stride-ebghtl.dist # -trackprior rsdb-comp2.32comp,t99-ebghtl-comp.6comp # this target creates mlib and distfile in one step but then it's awkward # to get the distfile printed to the web page in a separate section #%-100-30-stride-ebghtl.mlib: %-w0.5.mod %.stride-ebghtl.mod # ${HMMSCORE} $*-100-30-stride-ebghtl \ # -calibrate 1 \ # -alphabet protein,EBGHTL \ # -trackmod $*-w0.5.mod,$*.stride-ebghtl.mod \ # -db ${EBGHTL-TWOTRACKDBS} \ # -trackcoeff 1.0,0.3 \ # -sw 2 -dpstyle 0 -subtract_null 4 \ # -select_score 8 # ${WEBSCRIPTSDIR}/add_summary_html \ # "Calibration results for amino acid/STRIDE EBGHTL two-track target model" \ # ${BASE-URL}/$@ $@ >> ${WORKDIR}/summary.html %-100-30-stride-ebghtl.dist: %-100-30-stride-ebghtl.mlib \ ${TEMPLATE-SEQS} ${TEMPLATE-EBGHTL} ${HMMSCORE} stridefoo \ -modellibrary $< \ -db ${EBGHTL-TWOTRACKDBS} \ -db_size ${LIBSIZE} \ -select_score 4 -Emax 40. mv -f stridefoo.1.$@ $@ %-100-30-stride-ebghtl-scores.rdb: %-100-30-stride-ebghtl.dist ${WEBSCRIPTSDIR}/annotate_target_scores ${AL_METHOD}-100-30-stride-ebghtl < $^ > $@ %-100-30-stride-ebghtl-scores.html: %-100-30-stride-ebghtl-scores.rdb ${WEBSCRIPTSDIR}/oneway_hits_rdb2html $*-100-30-stride-ebghtl-scores < $^ > $@ #make the logo files with TXXX.t2k.stride-ebghtl-logo.eps # NOT TXXX.stride-ebghtl-logo.eps %.stride-ebghtl-logo.eps: %.stride-ebghtl.mod ${BIN-SAM}/makelogo $*.stride-ebghtl-logo -i $^ \ -logo_rel_entropy 1 \ -logo_bars_per_line 50 -logo_title "$* EBGHTL" \ -logo_caption_f ${TARG_AL}.stride-ebghtl.seq \ -logo_under_file ${TARGET}.a2m \ -logo_color_file /projects/compbio/lib/stride.colors %.stride-ebghtl-logo-small.eps: %.stride-ebghtl.mod ${BIN-SAM}/makelogo $*.stride-ebghtl-logo-small -i $^ \ -logo_rel_entropy 1 \ -logo_bars_per_line 110 \ -logo_title "$* EBGHTL" \ -logo_caption_f ${TARG_AL}.stride-ebghtl.seq \ -logo_under_file ${TARGET}.a2m \ -logo_color_file /projects/compbio/lib/stride.colors %.stride-color.rasmol: %.stride-ebghtl.seq ${CASP5-SCRIPTS}/rasmol_color_from_2ry -pdb ${TARGET}.blank.pdb < $^ > $@ ln -sf $@ stride # STR (extended DSSP) stuff: STR-NET = /projects/compbio2/usr/karplus/predict-2nd/testing/str/networks/t2k-5651-IDaaHr-5-15-7-15-9-15-13-str-seeded.net TEMPLATE-STR = ${PCEM-INDEXES}/t2k.strs STR-TWOTRACKDBS = ${TEMPLATE-SEQS},${TEMPLATE-STR} %.str %.str.rdb %.str.seq : %-thin90.a2m.gz ${STR-NET} echo ReadAlphabet /projects/compbio/lib/alphabet/str.alphabet > tmp.script echo ReadNeuralNet ${STR-NET} >> tmp.script echo ReadA2M $< >> tmp.script echo PrintPrediction $*.str >> tmp.script echo PrintPredictionFasta $*.str.seq >> tmp.script echo PrintRDB $*.str.rdb >> tmp.script ${PREDICT-2ND} < tmp.script rm tmp.script %.str.mod: %.str.rdb ${PCEM-SCRIPTS}/2nd-rdb-to-sam-model -alphabet STR $^ $@ %-dssp_ehl2-logo.eps: %.t2k.dssp_ehl2.mod ${BIN-SAM}/makelogo $*-dssp_ehl2-logo -i $^ \ -logo_rel_entropy 1 \ -logo_bars_per_line 50 -logo_title "$* EHL2" \ -logo_under_file ${TARGET}.a2m \ -logo_color_file /projects/compbio/lib/stride.colors # ${WEBSCRIPTSDIR}/add_summary_html "DSSP EHL structure prediction in sequence logo format" ${BASEURL}/$@ $@ >> ${WORKDIR}/summary.html %-dssp_ehl2-logo-small.eps: %.dssp_ehl2.mod ${BIN-SAM}/makelogo $*-dssp_ehl2-logo-small -i $^ \ -logo_rel_entropy 1 \ -logo_bars_per_line 80 -logo_title "$* EHL2" \ -logo_under_file ${TARGET}.a2m \ -logo_color_file /projects/compbio/lib/stride.colors #this target creates the mlib and dist file with no scores # we then remove the dist file so it will be created when # %-100-30-str.dist is called %-100-30-str.mlib: %-w0.5.mod %.str.mod ${HMMSCORE} $*-100-30-str \ -calibrate 1 \ -alphabet protein,STR \ -trackmod $*-w0.5.mod,$*.str.mod \ -db ${STR-TWOTRACKDBS} \ -trackcoeff 1.0,0.3 \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 0 -rm -f $*-100-30-str.dist # this target creates mlib and distfile in one step but then it's awkward # to get the distfile printed to the web page in a separate section #%-100-30-str.mlib: %-w0.5.mod %.str.mod # ${HMMSCORE} $*-100-30-str \ # -calibrate 1 \ # -alphabet protein,STR \ # -trackmod $*-w0.5.mod,$*.str.mod \ # -db ${STR-TWOTRACKDBS} \ # -trackcoeff 1.0,0.3 \ # -sw 2 -dpstyle 0 -subtract_null 4 \ # -select_score 8 \ # ${WEBSCRIPTSDIR}/add_summary_html \ # "Calibration results for amino acid/STR two-track target model" \ # ${BASE-URL}/$@ $@ >> ${WORKDIR}/summary.html %-100-30-str.dist: %-100-30-str.mlib \ ${TEMPLATE-SEQS} ${TEMPLATE-STR} ${HMMSCORE} strfoo \ -modellibrary $< \ -db ${STR-TWOTRACKDBS} \ -db_size ${LIBSIZE} \ -select_score 4 -Emax 40. mv -f strfoo.1.$@ $@ %-100-30-str-scores.rdb: %-100-30-str.dist ${WEBSCRIPTSDIR}/annotate_target_scores ${AL_METHOD}-100-30-str < $^ > $@ %-100-30-str-scores.html: %-100-30-str-scores.rdb ${WEBSCRIPTSDIR}/oneway_hits_rdb2html $*-100-30-str-scores < $^ > $@ #make the logo files with TXXX.t2k.str-logo.eps # NOT TXXX.str-logo.eps %.str-logo.eps: %.str.mod ${BIN-SAM}/makelogo $*.str-logo -i $^ \ -logo_rel_entropy 1 \ -logo_bars_per_line 50 \ -logo_title "$* STR" \ -logo_caption_f ${TARG_AL}.str.seq \ -logo_under_file ${TARGET}.a2m \ -logo_color_file /projects/compbio/lib/str.colors %.str-logo-small.eps: %.str.mod ${BIN-SAM}/makelogo $*.str-logo-small -i $^ \ -logo_rel_entropy 1 \ -logo_bars_per_line 110 \ -logo_title "$* STR" \ -logo_caption_f ${TARG_AL}.str.seq \ -logo_under_file ${TARGET}.a2m \ -logo_color_file /projects/compbio/lib/str.colors %.str-color.rasmol: %.str.seq ${CASP5-SCRIPTS}/rasmol_color_from_2ry -pdb ${TARGET}.blank.pdb < $^ > $@ ln -sf $@ str # ALPHA angle stuff: ALPHA-NET = /projects/compbio2/usr/karplus/predict-2nd/testing/alpha/networks/t2k-5651-IDaaHr-5-15-7-15-9-15-13-alpha-seeded.net TEMPLATE-ALPHA = ${PCEM-INDEXES}/t2k.alphas ALPHA-TWOTRACKDBS = ${TEMPLATE-SEQS},${TEMPLATE-ALPHA} %.alpha %.alpha.rdb %.alpha.seq : %-thin90.a2m.gz ${ALPHA-NET} echo ReadAlphabet /projects/compbio/lib/alphabet/alpha.alphabet > tmp.script echo ReadNeuralNet ${ALPHA-NET} >> tmp.script echo ReadA2M $< >> tmp.script echo PrintPrediction $*.alpha >> tmp.script echo PrintPredictionFasta $*.alpha.seq >> tmp.script echo PrintRDB $*.alpha.rdb >> tmp.script ${PREDICT-2ND} < tmp.script rm tmp.script %.alpha.mod: %.alpha.rdb ${PCEM-SCRIPTS}/2nd-rdb-to-sam-model -alphabet ALPHA $^ $@ #this target creates the mlib and dist file with no scores # we then remove the dist file so it will be created when # %-100-30-alpha.dist is called %-100-30-alpha.mlib: %-w0.5.mod %.alpha.mod ${HMMSCORE} $*-100-30-alpha \ -calibrate 1 \ -alphabet protein,ALPHA \ -trackmod $*-w0.5.mod,$*.alpha.mod \ -db ${ALPHA-TWOTRACKDBS} \ -trackcoeff 1.0,0.3 \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 0 -rm -f $*-100-30-alpha.dist # this target creates mlib and distfile in one step but then it's awkward # to get the distfile printed to the web page in a separate section #%-100-30-alpha.mlib: %-w0.5.mod %.alpha.mod # ${HMMSCORE} $*-100-30-alpha \ # -calibrate 1 \ # -alphabet protein,ALPHA \ # -trackmod $*-w0.5.mod,$*.alpha.mod \ # -db ${ALPHA-TWOTRACKDBS} \ # -trackcoeff 1.0,0.3 \ # -sw 2 -dpstyle 0 -subtract_null 4 \ # -select_score 8 \ # ${WEBSCRIPTSDIR}/add_summary_html \ # "Calibration results for amino acid/ALPHA two-track target model" \ # ${BASE-URL}/$@ $@ >> ${WORKDIR}/summary.html %-100-30-alpha.dist: %-100-30-alpha.mlib \ ${TEMPLATE-SEQS} ${TEMPLATE-ALPHA} ${HMMSCORE} alphafoo \ -modellibrary $< \ -db ${ALPHA-TWOTRACKDBS} \ -db_size ${LIBSIZE} \ -select_score 4 -Emax 40. mv -f alphafoo.1.$@ $@ %-100-30-alpha-scores.rdb: %-100-30-alpha.dist ${WEBSCRIPTSDIR}/annotate_target_scores ${AL_METHOD}-100-30-alpha < $^ > $@ %-100-30-alpha-scores.html: %-100-30-alpha-scores.rdb ${WEBSCRIPTSDIR}/oneway_hits_rdb2html $*-100-30-alpha-scores < $^ > $@ %.alpha-logo.eps: %.alpha.mod ${BIN-SAM}/makelogo $*.alpha-logo -i $^ \ -logo_rel_entropy 1 \ -logo_bars_per_line 50 -logo_title "$* ALPHA" \ -logo_caption_f ${TARG_AL}.alpha.seq \ -logo_under_file ${TARGET}.a2m \ -logo_color_file /projects/compbio/lib/alpha.colors %.alpha-logo-small.eps: %.alpha.mod ${BIN-SAM}/makelogo $*.alpha-logo-small -i $^ \ -logo_rel_entropy 1 \ -logo_bars_per_line 80 \ -logo_title "$* ALPHA" \ -logo_caption_f ${TARG_AL}.alpha.seq \ -logo_under_file ${TARGET}.a2m \ -logo_color_file /projects/compbio/lib/alpha.colors %.alpha-color.rasmol: %.alpha.seq ${CASP5-SCRIPTS}/rasmol_color_from_2ry -pdb ${TARGET}.blank.pdb < $^ > $@ ln -sf $@ alpha # DSSP_EHL2 merged prediction stuff: TEMPLATE-DSSP_EHL2 = ${PCEM-INDEXES}/t2k.dssps DSSP_EHL2-TWOTRACKDBS = ${TEMPLATE-SEQS},${TEMPLATE-DSSP_EHL2} %.t2k.dssp_ehl2.rdb: %.t2k.dssp-ebghstl.rdb \ %.t2k.stride-ebghtl.rdb \ %.t2k.str.rdb \ %.t2k.alpha.rdb ${WEBSCRIPTSDIR}/RDBCombine $^ -a ${CASP5-2D-ID} > $@ %.t2k.dssp_ehl2: %.t2k.dssp_ehl2.rdb ${CASP5-SCRIPTS}/rdb2casp $^ ${CASP5-2D-ID} > $@ %.dssp_ehl2.mod: %.dssp_ehl2.rdb ${PCEM-SCRIPTS}/2nd-rdb-to-sam-model -alphabet EHL2 $^ $@ #this target creates the mlib and dist file with no scores # we then remove the dist file so it will be created when # %-100-30-dssp_ehl2.dist is called %-100-30-dssp_ehl2.mlib: %-w0.5.mod %.dssp_ehl2.mod ${HMMSCORE} $*-100-30-dssp_ehl2 \ -calibrate 1 \ -alphabet protein,EHL2 \ -trackmod $*-w0.5.mod,$*.dssp_ehl2.mod \ -db ${DSSP_EHL2-TWOTRACKDBS} \ -trackcoeff 1.0,0.3 \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 0 -rm -f $*-100-30-dssp_ehl2.dist %-100-30-dssp_ehl2.dist: %-100-30-dssp_ehl2.mlib \ ${TEMPLATE-SEQS} ${TEMPLATE-DSSP_EHL2} ${HMMSCORE} dssp_ehl2foo \ -modellibrary $< \ -db ${DSSP_EHL2-TWOTRACKDBS} \ -db_size ${LIBSIZE} \ -select_score 4 -Emax 40. mv -f dssp_ehl2foo.1.$@ $@ %-100-30-dssp_ehl2-scores.rdb: %-100-30-dssp_ehl2.dist ${WEBSCRIPTSDIR}/annotate_target_scores ${AL_METHOD}-100-30-dssp_ehl2 < $^ > $@ %-100-30-dssp_ehl2-scores.html: %-100-30-dssp_ehl2-scores.rdb ${WEBSCRIPTSDIR}/oneway_hits_rdb2html $*-100-30-dssp_ehl2-scores < $^ > $@ # BUG: don't have dssp_ehl2.seq, so using dssp-ebghstl.seq %.dssp_ehl2-logo.eps: %.dssp_ehl2.mod ${BIN-SAM}/makelogo $*.dssp_ehl2-logo -i $^ \ -logo_rel_entropy 1 \ -logo_bars_per_line 50 -logo_title "$* DSSP_EHL2" \ -logo_caption_f ${TARG_AL}.dssp-ebghstl.seq \ -logo_under_file ${TARGET}.a2m \ -logo_color_file /projects/compbio/lib/dssp.colors %.dssp_ehl2-color.rasmol: %.dssp_ehl2.seq ${CASP5-SCRIPTS}/rasmol_color_from_2ry -pdb ${TARGET}.blank.pdb < $^ > $@ ln -sf $@ ehl #template library scores %.template-lib.dist-rdb: ${PCEM-INDEXES}/t2k-w0.5-db.mlib ${TARGET}.a2m ${HMMSCORE_NEW} $*.template-lib -modellibrary $< \ -db_size ${LIBSIZE} \ -db ${TARGET}.a2m -rdb 1 \ -select_score 4 -Emax 40 %.template-lib-sorted.rdb: %.template-lib.dist-rdb ${WEBSCRIPTSDIR}/strip_comments < $^ \ | ${WEBSCRIPTSDIR}/shorten_mod_names \ | ${PCB}/row SEQID eq ${TARGET} \ | ${PCB}/sorttbl EVALUE \ > $@ %.template-lib-scores.rdb: %.template-lib-sorted.rdb ${WEBSCRIPTSDIR}/annotate_template_scores < $^ > $@ %.template-lib-scores.html: %.template-lib-scores.rdb ${WEBSCRIPTSDIR}/oneway_hits_rdb2html $*.template-lib-scores < $^ > $@ # single-track model pdb scoring # PDB_DB = /projects/compbio/data/pdb/all-protein PDB_DB = /projects/compbio/experiments/protein-predict/SAM_T02/data/pdbaa #calibrate the single track model #this target creates the mlib and dist file with no scores # we then remove the dist file so it will be created when # %-w0.5.dist is called %-w0.5.mlib: %-w0.5.mod ${HMMSCORE} $*-w0.5 \ -calibrate 1 \ -i $*-w0.5.mod \ -db ${TEMPLATE-SEQS} \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 0 -rm -f $*-w0.5.dist # -trackprior rsdb-comp2.32comp %-w0.5.dist: %-w0.5.mlib ${HMMSCORE} w0.5foo \ -modellibrary $^ \ -db ${PDB_DB} \ -dbsize ${LIBSIZE} \ -select_score 4 -Emax 40. mv -f w0.5foo.1.$*-w0.5.mod.dist $@ %-w0.5-scores.rdb: %-w0.5.dist ${WEBSCRIPTSDIR}/annotate_target_scores ${AL_METHOD}-w0.5 < $^ > $@ # ${WEBSCRIPTSDIR}/add_summary_html \ "Annotated amino acid single-track target model scores of PDB in RDB format" \ ${BASE-URL}/$@ $@ >> ${WORKDIR}/summary.html %-w0.5-scores.html: %-w0.5-scores.rdb ${WEBSCRIPTSDIR}/oneway_hits_rdb2html $*-w0.5-scores < $^ > $@ ############# # LOGOS # ############# #what is the target.seq if an alignment is submitted ? %.w0.5-logo.eps: %-w0.5.mod ${BIN-SAM}/makelogo $*.w0.5-logo -i $^ \ -logo_rel_entropy 1 \ -logo_bars_per_line 50 -logo_title "$* w0.5" \ -logo_caption_f ${TARG_AL}.dssp-ebghstl.seq \ -logo_under_file ${TARGET}.a2m ############### # TOP HITS # ############### # define the threshold below which you want hits reported. # If ANY of the methods reports a hit this good, it will be included # in ${TARGET}.best_scores.rdb ifndef NUM_BEST NUM_BEST = 10 endif ifndef BEST_EVALUE BEST_EVALUE = 1.e-05 endif #find the best hits (include dupes) %.${AL_METHOD}.best-scores.rdb: \ %.${AL_METHOD}-w0.5-scores.rdb \ %.template-lib-scores.rdb \ %.${AL_METHOD}-100-30-dssp-ebghstl-scores.rdb \ %.${AL_METHOD}-100-30-stride-ebghtl-scores.rdb \ %.${AL_METHOD}-100-30-str-scores.rdb \ %.${AL_METHOD}-100-30-alpha-scores.rdb \ %.${AL_METHOD}-100-30-dssp_ehl2-scores.rdb ${CASP5-SCRIPTS}/best_scores -num ${NUM_BEST} -E ${BEST_EVALUE} $^ > $@ %.best-scores.html: %.best-scores.rdb ${CASP5-SCRIPTS}/oneway_hits_rdb2html $*.best_hits < $^ > $@ ########################################### # ALIGNMENTS # ########################################### #track models to be used in pairwise alignments EBGHSTL_TRACKMOD = ${TARG_AL}.dssp-ebghstl.mod EBGHTL_TRACKMOD = ${TARG_AL}.stride-ebghtl.mod STR_TRACKMOD = ${TARG_AL}.str.mod W05_MLIB = ${TARG_AL}-w0.5.mlib EBGHSTL_MLIB = ${TARG_AL}-100-30-dssp-ebghstl.mlib EBGHTL_MLIB = ${TARG_AL}-100-30-stride-ebghtl.mlib STR_MLIB = ${TARG_AL}-100-30-str.mlib ALPHA_MLIB = ${TARG_AL}-100-30-alpha.mlib DSSP_EHL2_MLIB = ${TARG_AL}-100-30-dssp_ehl2.mlib #settings of how many templates to predict vs. number of #alignments to convert to CASP format must be done carefully # need to add error checking so these numbers don't conflict #with each other # how many alignments to select from best templates NUM_ALIGNMENTS = 50 #top alignments we report for CASP NUM_TOP = 5 #make links to the casp alignments? LINKS = 1 NO_LINKS = 0 #build an HMM from target sequence only to produce alignments #similar to simple Smith-Waterman. We observe that the #T2K HMMs are so general that they may drift away from the #original seed sequence %.mod: %.a2m ${BIN-SAM}/modelfromalign $* \ -alignfile $^ \ -insert /projects/compbio/lib/fssp-trained.regularizer \ -aweight_bits 0.8\ -fimtrans -1\ -fimstrength 1\ -ins_jump_conf 1 \ -match_jump_conf 1 \ -del_jump_conf 1 \ -binary_output 1\ -prior_library /projects/compbio/lib/recode3.20comp \ -a2mdots 0 \ -a protein \ -sw 2 -jump_in_prob 0.2 -jump_out_prob 1 \ -aweight_method 1\ -aweight_exponent 10 %.top_hits_t2k: %.best-scores.rdb ${CASP5-SCRIPTS}/get_t2k_lib_hits < $^ > $@ %.top_hits_non_t2k: %.best-scores.rdb ${CASP5-SCRIPTS}/get_t2k_lib_hits -negate < $^ > $@ ## QUESTION: CAN WE FREELY ADD MORE PAIRWISE ALIGNMENTS TO ## t2k_lib_pairwise_alignments AND ## t2k_non_lib_pairwise_alignments %.top_hits_pwise_alignments.rdb: %.top_hits_t2k %.top_hits_non_t2k echo "Building pairwise alignments" ${CASP5-SCRIPTS}/t2k_lib_pairwise_alignments ${WORKDIR} \ ${TARGET} ${EBGHSTL_TRACKMOD} ${EBGHTL_TRACKMOD} ${STR_TRACKMOD} ${W05_MLIB} ${EBGHSTL_MLIB} ${EBGHTL_MLIB} ${STR_MLIB} ${LIBSIZE} \ < $*.top_hits_t2k ${CASP5-SCRIPTS}/t2k_non_lib_pairwise_alignments ${WORKDIR} \ ${TARGET} ${W05_MLIB} ${LIBSIZE} < $*.top_hits_non_t2k echo "Template FSSP_rep" > $@ echo "5S 5S" >> $@ cat $*.top_hits_t2k $*.top_hits_non_t2k >> $@ #call with chain name. Ex: 1grt.new_t2k_lib_pwise_alignments %.new_t2k_lib_pwise_alignments: echo "Building pairwise alignments" ${WEBSCRIPTSDIR}/new_t2k_lib_pairwise_alignments $* ${WORKDIR} \ ${TARGET} ${EBGHSTL_TRACKMOD} ${EBGHTL_TRACKMOD} ${STR_TRACKMOD} ${W05_MLIB} ${EBGHSTL_MLIB} ${EBGHTL_MLIB} ${STR_MLIB} ${LIBSIZE} %.new_t2k_non_lib_pwise_alignments: echo "Building pairwise alignments" ${WEBSCRIPTSDIR}/new_t2k_non_lib_pairwise_alignments $* ${WORKDIR} \ ${TARGET} ${W05_MLIB} ${LIBSIZE} #report sorted list of the pairwise alignments %.predicted_alignments.rdb: %.top_hits_pwise_alignments.rdb %.best-scores.rdb ${CASP5-SCRIPTS}/gather_best_align_scores ${NUM_ALIGNMENTS} ${WORKDIR} ${TARGET} $^ > $@ # ${WEBSCRIPTSDIR}/add_summary_html \ "T02 Sorted list of top alignments of ${TARGET} and top hits" \ ${BASE-URL}/$@ $@ >> ${WORKDIR}/summary.html #get the TOP-N For now N=5 %.top_reported_alignments.rdb: \ %.predicted_alignments.rdb \ %.best-scores.rdb ${CASP5-SCRIPTS}/top_reported_alignments ${NUM_TOP} $^ > $@ #convert this to html and add to the summary page # create CASP alignments # pass in number of best alignments to be converted, directory where # conversion program is located and working directory %.many_alignments.rdb: %.predicted_alignments.rdb ${WEBSCRIPTSDIR}/format_casp_alignments ${NUM_ALIGNMENTS} ${CONVERSION_DIR} ${WORKDIR} ${TARGET} ${NO_LINKS} < $^ > $@ # ${WEBSCRIPTSDIR}/add_summary_html \ "T02 Predictions in CASP al format" \ ${BASE-URL}/$@ $@ >> ${WORKDIR}/summary.html #if WARNING=1, print an explanation and warning message about #top alignment selection WARNING = 1 NOWARNING = 0 %.many_alignments.html: %.predicted_alignments.rdb %.best-scores.rdb ${CASP5-SCRIPTS}/casp_summary_report_html \ $^ ${WORKDIR} ${TARGET} \ ${BASE-URL} ${NOWARNING} > $@ %.casp_top_reported_alignments.rdb: %.top_reported_alignments.rdb ${WEBSCRIPTSDIR}/format_casp_alignments ${NUM_TOP} ${CONVERSION_DIR} ${WORKDIR} ${TARGET} ${LINKS} < $^ > $@ %.top_reported_alignments.html: %.top_reported_alignments.rdb %.best-scores.rdb ${CASP5-SCRIPTS}/casp_summary_report_html \ $^ ${WORKDIR} ${TARGET} \ ${BASE-URL} ${WARNING} > $@ undertaker_start_section: ${WEBSCRIPTSDIR}/add_section_head_summary_html \ "Undertaker (3d) files" >> ${WORKDIR}/summary.html %.undertaker-align.script: %.many_alignments.rdb ${CASP5-SCRIPTS}/make_undertaker_alignment_list < $^ > $@ # FOR OLD VERSION OF FRAGFINDER fragment-a2m/read-fragments.script: ${TARG_AL}-w0.5.mod ${STR_TRACKMOD} -mkdir fragment-a2m cd fragment-a2m; \ ${FRAGFINDER} ${TARGET} \ -a protein,STR \ -trackmod ../${TARG_AL}-w0.5.mod,../${STR_TRACKMOD} \ -track_coeff 1.0,0.3 \ -db ${STR-TWOTRACKDBS} \ -alignfile ../${TARGET}.a2m,- \ -fraglen 9 -numpermatch 6 cd fragment-a2m; \ ${UNDERTAKER-SCRIPTS}/make-read-fragments.csh > read-fragments.script ${TARG_AL}.frag: ${TARG_AL}-w0.5.mod ${STR_TRACKMOD} ${FRAGFINDER} ${TARG_AL} \ -a protein,STR \ -trackmod ${TARG_AL}-w0.5.mod,${STR_TRACKMOD} \ -track_coeff 1.0,0.3 \ -db ${STR-TWOTRACKDBS} \ -firstsequence ${TARGET}.a2m,- \ -fraglen 9 -numpermatch 6 %-2track-undertaker.a2m: %-w0.5.mod %.str.mod \ ${TEMPLATE-SEQS} ${TEMPLATE-STR} ${HMMSCORE} $*-2track-undertaker \ -calibrate 1 \ -a protein,STR \ -trackmod $*-w0.5.mod,$*.str.mod \ -track_coeff 1.0,0.3 \ -sw 2 -adpstyle 5 \ -db $*.a2m,$*.str.seq \ -db ${STR-TWOTRACKDBS} \ -select_score 8 -Emax ${BEST_EVALUE} \ -select_align 4 read-decoys.script: decoys echo "InfilePrefix decoys/"> $@ ls decoys/*${TARGET}*pdb* \ | sed 's;decoys/;ReadConformPDB ;' \ >> $@ echo "InfilePrefix" >> $@ ls robetta[1-5]*pdb* \ | sed 's;rob;ReadConformPDB rob;' \ >> $@ -chgrp protein $@ -chmod g+w $@ score-decoys.rdb score-decoys.breaks: read-decoys.script \ define-score.script ${CASP5-SCRIPTS}/score-decoys.under ${UNDERTAKER} < ${CASP5-SCRIPTS}/score-decoys.under ${PCB}/sorttbl cost < score-decoys-unsorted.rdb > $@ -rm score-decoys-unsorted.rdb -chgrp protein $@ -chmod g+w $@ score-real%-decoys.rdb : read-decoys.script real%.pdb \ define-score.script ${CASP5-SCRIPTS}/compare-real%.under ${UNDERTAKER} < ${CASP5-SCRIPTS}/compare-real$*.under ${PCB}/sorttbl rmsd cost < score-decoys-unsorted.rdb > $@ -rm score-decoys-unsorted.rdb -chgrp protein $@ -chmod g+w $@ score-real-decoys.rdb : read-decoys.script real.pdb \ define-score.script ${CASP5-SCRIPTS}/compare-real.under ${UNDERTAKER} < ${CASP5-SCRIPTS}/compare-real.under ${PCB}/sorttbl rmsd cost < score-decoys-unsorted.rdb > $@ -rm score-decoys-unsorted.rdb -chgrp protein $@ -chmod g+w $@ %.undertaker-align.pdb.gz: show-align.script \ %.undertaker-align.script \ %-2track-undertaker.a2m ${UNDERTAKER} < show-align.script >& show-align.log gzip -f $*.undertaker-align.pdb %/read-alignments.under: % cd $*; \ ${UNDERTAKER-SCRIPTS}/make-read-fragments.csh > read-alignments.under # for close homology modeling, may want to pick out best scores using # single sequences sw-best: [1-9]* echo 'foreach x ([0-9]*)' > tmp.script echo 'grep -h "$$x " $$x/*SW*dist' >> tmp.script echo 'end' >> tmp.script chmod +x tmp.script csh tmp.script \ | sort -n +3 \ | uniq \ > $@ rm tmp.script # target for stealing robetta pdb model # Need to have CAFASP-ID defined---for some reason they DON'T use # target numbers but their own internal numbering scheme. # If you want to look at the robetta models for some other target, take # these steps: # Go to http://www.cs.bgu.ac.il/~dfischer/CAFASP3/targets.html # and look at the link from the date column of the desired target. # It will contain an id=xxxx field. # # In the Makefile for the target, define CAFASP-ID=xxxx to be # the id in the CAFASP system. # # make robetta1.pdb # gzip -9f robetta*pdb ifdef CAFASP-ID robetta.pdb: wget -O $@ 'http://bioinfo.pl/Meta/target.pl?id=${CAFASP-ID}&file=robetta.pdb' endif robetta1.pdb: robetta.pdb ${CASP5-SCRIPTS}/break-ts-into-pdb $^ ifdef PRED ifdef PRED2 # The heavy-str target makes a pairwise alignment for the template PRED # using a much higher weight than usual on the str track. # It is usually used as a recursive make: # make PRED=1kyfA PRED2=1k heavy-str PRED-INFO = ${PCEM}/pdb/${PRED2}/${PRED}/info heavy-str: ${PRED}/${TARGET}-${PRED}-heavy-str-local.pw.a2m ${PRED}/${TARGET}-${PRED}-heavy-str-local.pw.a2m: ${TARGET}.t2k-w0.5.mod ${TARGET}.t2k.str.mod -mkdir ${PRED} ${HMMSCORE} ${PRED}/${TARGET}-${PRED}-heavy-str-local.pw \ -alphabet protein,STR \ -trackmod ${TARGET}.t2k-w0.5.mod,${TARGET}.t2k.str.mod \ -trackcoeff 1.0,2.0 \ -db ${TARGET}.a2m,${TARGET}.t2k.str.seq \ -db ${PRED-INFO}/${PRED}.stride-mixed.seq,${PRED-INFO}/${PRED}.stride-mixed.str \ -sw 2 -dpstyle 0 -subtract_null 4 \ -adpstyle 5 \ -select_align 8 endif endif # targets for sending results to CASP mail-2ry: ${TARG_AL}.dssp_ehl2 mail submit@predictioncenter.llnl.gov \ submit@predictioncenter.llnl.gov \ < $^ ${TARGET}.ts-submit%: ${TARGET}.submit%.pdb ${CASP5-SCRIPTS}/pdb2casp ${TARGET} -model $* \ -author ${CASP5-2D-ID} \ -method ${CASP5}/generic.method \ < $^ > $@ mail-pdb%: ${TARGET}.ts-submit% mail -s 'SAM-T02-human ${TARGET} TS' \ submit@predictioncenter.llnl.gov \ < $^