# MACROS that need to be set: # TARGET := SET_IN_Makefile # START_COL defaults to 1 # PREFERRED_AL_METHOD defaults to t2k # MANUAL_TOP_HITS which chains to make pairwise alignments for # in extra_alignments and which to make # read_alignments.under files for in read_alignments # (default empty) # if NO_SUMMARY is set, then don't do any of the output to the # summary.html file (useful for updating bits and pieces) # what is first column for numbering alignments and mutual information ifndef START_COL START_COL := 1 endif # export all variables to sub-makes export # (even if someone turns off export, still export these three) export TARGET export LCTARGET export START_COL ifndef PREFERRED_AL_METHOD PREFERRED_AL_METHOD:=t2k endif ifndef AL_METHOD AL_METHOD:=${PREFERRED_AL_METHOD} endif TARG_AL := ${TARGET}.${AL_METHOD} # TO DO: # Modify file so that a full set of predictions is made for # both target2k and target04 alignments. # # Clean up (get rid of?) script make-alignments, # using standard programs to get hit list from *best-scores.rdb # into a list that can be run with $(foreach ...) # the way that extra_alignments now are. # REDO_SEARCHES if set, causes the template library to be # included in the dependencies of the searches. # (if not defined, then template library not in dependencies # REDO_T2K if set, causes NR to be included in the dependencies # for the t2k iterative search, usually triggering rebuilding # the t2k.a2m.gz file. # REDO_T04 if set, causes NR to be included in the dependencies # for the t04 iterative search, usually triggering rebuilding # the t04.a2m.gz file. # REDO_MI if set, add dependency on correlated_columns executable. # Useful if the definition of the mi.rdb files changes. # macros that control the t2k iterative search # BLAST_MAX default 10,000, set it smaller for long proteins with many # homologs that take a long time to run. # ADPSTYLE default 5, set it to 1 for long proteins that cause hmmscore # to crash in the final alignment step of target2k # macros that control compression # NOGZIP_PDB if set causes undertaker.pdb file not to be gzipped. # macros used in recursive makes for pairwise alignments: # MANUAL_TOP_HITS which chains to make pairwise alignments for # provided manually # PRED needs to be set to template chain ID in recursive makes for # pairwise alignments # PRED2 first two letters of PRED, now computed # automatically from PRED. # ALIGN_VITERBI if set, causes only ADP=1 and not ADP=5 alignments to # be tried # The following macros are set automatically when making pairwise alignments. # ALIGN_TYPE local or global, set when generating pairwise alignments # ALIGN_NAME # ADP # TWO_TRACK_ADP (used to set ADP for two-track alignments) # SW # FSSP # FSSP2 # STRUCT_ALPH (also used for generating secondary structure predictions) # BURIAL_ALPH (used for 3-track HMMs) # STRUCT_WEIGHT # get the version of the compiler used on this machine ifndef GCC_VERSION GCC_VERSION := ${shell g++ --version} endif ifneq '${words ${GCC_VERSION}}' '1' # GCC_WORDS := ${words ${GCC_VERSION}} GCC_VERSION := ${word 3,${GCC_VERSION}} endif # Normally compute mutual information for all column pairs, # but turn it off if NO_MUTUAL is set. # (computation is now fast enough that this shouldn't be necessary) ifndef NO_MUTUAL MUTUAL_DEPEND := else MUTUAL_DEPEND := endif YEAST := /projects/compbio/experiments/protein-predict/yeast YEAST_SCRIPTS := ${YEAST}/scripts CASP6 := /projects/compbio/experiments/protein-predict/casp6 CASP6_NETWORKS := ${CASP6}/networks CASP6_SCRIPTS := ${CASP6}/scripts # AUTHOR code for CASP6 submissions of "SAM-T04 hand" group. CASP6_ID := 4204-4258-2837 CASP6_SUBMIT := submit@predictioncenter.llnl.gov ifndef UNAME_M UNAME_M := $(shell uname -m) endif ifndef UNAME_P UNAME_P := $(shell uname -p) endif PCL := /projects/compbio/lib PCB := /projects/compbio/bin PCBS := ${PCB}/scripts EXTRACT := ${PCBS}/extract-from-fasta PCB_SUB := ${PCB}/${UNAME_P} FRAGFINDER := ${PCB_SUB}/fragfinder BIN_SAM := ${PCB_SUB} BIN_PREDICT2ND := ${PCB_SUB} UNDERTAKER_SCRIPTS := /projects/compbio/experiments/undertaker/scripts ifndef UNDERTAKER UNDERTAKER := /projects/compbio/programs/undertaker/undertaker endif ifndef WORKDIR WORKDIR := $(shell pwd) endif ifndef HOST HOST := $(shell hostname) endif # how many residues wide should each row of the logos be? # Ideally, we'd like this to be computed from the sequence length, # with length<=200 yielding 50 # 200 ${WORKDIR}/summary.html endif #start the inputs section header_inputs: ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_section_head_summary_html \ Inputs >> ${WORKDIR}/summary.html endif #acknowledge sequence receipt #add pointer to sequence file to the html results page receipt_ack: ${TARGET}.a2m echo Received sequence ${TARGET}.a2m ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_summary_html \ "Submitted sequence(s)" \ ${TARGET}.a2m >> ${WORKDIR}/summary.html ${YEAST_SCRIPTS}/add_summary_html \ "README file" \ README >> ${WORKDIR}/summary.html ${YEAST_SCRIPTS}/add_summary_html \ "Provided documentation" \ ${TARGET}.doc.html >> ${WORKDIR}/summary.html endif header_alignment: ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_section_head_summary_html \ 'Multiple alignment' >> ${WORKDIR}/summary.html endif build_multiple: -$(foreach AL_METHOD,${MA_METHODS},\ ${MAKE} -k AL_METHOD=${AL_METHOD} \ build_multiple_alignment build_pretty build_mod w0.5_logo conserved_script 1.small_divider;) build_multiple_alignment: ${TARG_AL}.a2m.gz ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_summary_html \ "SAM_${AL_METHOD} multiple alignment in a2m format" \ $^ >> ${WORKDIR}/summary.html endif build_pretty: ${TARG_AL}.pa.html ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_summary_html \ "SAM_${AL_METHOD} multiple alignment in pretty html format" \ $^ >> ${WORKDIR}/summary.html endif build_mod: ${TARG_AL}.w0.5.mod ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_summary_html \ "SAM_${AL_METHOD} target hidden Markov model" \ $^ >> ${WORKDIR}/summary.html endif w0.5_logo: ${TARG_AL}.w0.5-logo.eps ${TARG_AL}.w0.5-logo.pdf ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_summary_multiple_formats \ "SAM_${AL_METHOD} multiple alignment---sequence logo" \ ${TARG_AL}.w0.5-logo \ eps pdf >> ${WORKDIR}/summary.html endif conserved_script: conserved_${AL_METHOD} ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_summary_html \ "Script for $* highlighting in rasmol" \ $^ >> ${WORKDIR}/summary.html endif #start the two-track section header_2track: ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_section_head_summary_html \ '\ Secondary Structure Prediction\ \ (Explanation of secondary-structure predictions) \ ' \ >> ${WORKDIR}/summary.html endif 2track: -$(foreach AL,t2k t04,\ ${MAKE} -k AL_METHOD=${AL} ${AL}_2track;) %_2track: ${TARGET}.%-thin90.a2m.gz -$(foreach STRUCT_ALPH,${PURE_SECONDARY_ALPHABETS},${MAKE} -k \ AL_METHOD=$* \ STRUCT_ALPH=${STRUCT_ALPH} do_secondary do_secondary_logo 1.small_divider;) -$(foreach STRUCT_ALPH,${BURIAL_ALPHABETS},${MAKE} -k \ AL_METHOD=$* \ STRUCT_ALPH=${STRUCT_ALPH} do_burial_or_secondary \ ${TARGET}.$*.${STRUCT_ALPH}.mod do_secondary_logo 1.small_divider;) do_burial_or_secondary: ${TARG_AL}.${STRUCT_ALPH}.rdb \ ${TARG_AL}.${STRUCT_ALPH}.seq \ ${TARG_AL}.${STRUCT_ALPH}-color.rasmol ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_summary_multiple_formats \ "${STRUCT_ALPH} structure prediction" \ ${TARG_AL}.${STRUCT_ALPH} \ rdb seq \ >> ${WORKDIR}/summary.html ${YEAST_SCRIPTS}/add_summary_html \ "Script for ${TARG_AL}.${STRUCT_ALPH} coloring in rasmol" \ ${TARG_AL}.${STRUCT_ALPH}-color.rasmol >> ${WORKDIR}/summary.html endif ifeq (${AL_METHOD},${PREFERRED_AL_METHOD}) -ln -sf ${TARG_AL}.${STRUCT_ALPH}-color.rasmol ${COLOR_SCRIPT_SHORT_NAME} endif do_secondary: do_burial_or_secondary \ ${TARG_AL}.${STRUCT_ALPH}.constraints do_secondary_logo: ${TARG_AL}.${STRUCT_ALPH}-logo.eps ${TARG_AL}.${STRUCT_ALPH}-logo.pdf ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_summary_multiple_formats \ "${STRUCT_ALPH}---sequence logo" \ ${TARG_AL}.${STRUCT_ALPH}-logo \ eps pdf \ >> ${WORKDIR}/summary.html endif %.small_divider : ifndef NO_SUMMARY echo '
' >> ${WORKDIR}/summary.html endif #start the target model scores section header_target_mod_scores: ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_section_head_summary_html \ "Target model scores" >> ${WORKDIR}/summary.html endif 2track_target_mod_scores: $(foreach al,t2k t04,2track_${al}_target_mod_scores) 2track_%_target_mod_scores: -$(foreach x,${SECONDARY_ALPHABETS},\ ${MAKE} -k STRUCT_ALPH=${x} AL_METHOD=$* \ ${TARGET}.$*-100-30-${x}.mlib \ ${TARGET}.$*-100-30-${x}.dist \ $*-100-30-${x}-scores;) -${MAKE} -k STRUCT_ALPH=str2 BURIAL_ALPH=CB_burial_14_7 \ AL_METHOD=$* \ $*-100-40-40-str2+CB_burial_14_7-scores ${AL_METHOD}-%-scores: ${TARG_AL}-%-scores.rdb \ ${TARG_AL}-%-scores.html ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_summary_html \ "amino acid/$* two-track target model scores" \ ${TARG_AL}-$*-scores.html >> ${WORKDIR}/summary.html endif 1track_target_mod_scores: -$(foreach d,t2k t04, \ ${MAKE} -k AL_METHOD=${d} 1track_target_mod_scores_${d};) 1track_target_mod_scores_${AL_METHOD}: ${TARG_AL}.w0.5.mlib \ ${TARG_AL}.w0.5.dist \ ${TARG_AL}-w0.5-scores.rdb \ ${TARG_AL}-w0.5-scores.html ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_summary_html \ "amino acid single-track target model scores of PDB" \ ${TARG_AL}-w0.5-scores.html >> ${WORKDIR}/summary.html endif #start the template model scores section header_template_mod_scores: ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_section_head_summary_html \ "Template model scores" >> ${WORKDIR}/summary.html endif template_mod_scores: ${TARGET}.template-lib-scores.rdb \ ${TARGET}.template-lib-scores.html ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_summary_html \ "Annotated template model scores" \ ${TARGET}.template-lib-scores.html >> ${WORKDIR}/summary.html endif #start the top hits section header_top_hits: ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_section_head_summary_html \ "Top Hits" >> ${WORKDIR}/summary.html endif top_hits: -$(foreach d,t2k t04, \ ${MAKE} -k AL_METHOD=${d} \ ${TARGET}.${d}.best-scores.rdb ${TARGET}.${d}.best-scores.html;) ifndef NO_SUMMARY -$(foreach d,t2k t04, \ ${YEAST_SCRIPTS}/add_summary_html \ "Best scoring hits from all models" \ ${TARGET}.$d.best-scores.html >> ${WORKDIR}/summary.html;) endif #start the top alignments section header_top_alignments: ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_section_head_summary_html \ "Alignments for Top Hits" >> ${WORKDIR}/summary.html endif # NUM_ALIGN_TEMPLATES specifies the number of templates to include # when building pairwise alignments. # Actually, the macro is 2 more then number of templates to align. ifndef NUM_ALIGN_TEMPLATES NUM_ALIGN_TEMPLATES :=32 endif build_top_alignments: ${TARGET}.t2k.best-scores.rdb ${TARGET}.mod ifdef VITERBI_ALIGN grep -v '^ ' < $< \ | head -${NUM_ALIGN_TEMPLATES} \ | ${YEAST_SCRIPTS}/make-alignments ${TARGET} VITERBI_ALIGN=1 else grep -v '^ ' < $< \ | head -${NUM_ALIGN_TEMPLATES} \ | ${YEAST_SCRIPTS}/make-alignments ${TARGET} endif final_predictions: -$(foreach d,t2k t04, \ ${MAKE} -k AL_METHOD=${d} many-alignments top-alignments;) many-alignments: ${TARG_AL}.many_alignments.rdb \ ${TARG_AL}.many_alignments.html ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_summary_html \ "T02 Many Alignments Summary" \ ${TARG_AL}.many_alignments.html >> ${WORKDIR}/summary.html endif top-alignments: ${TARG_AL}.top_reported_alignments.rdb \ ${TARG_AL}.top_reported_alignments.html ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_summary_html \ "T02 Top Models Summary" \ ${TARG_AL}.top_reported_alignments.html >> ${WORKDIR}/summary.html endif # do recursive make to make sure that the wildcard in all-align.a2m.gz is up to date all_align: -${MAKE} -k all-align.a2m.gz all-align.pa ifndef NO_SUMMARY -${YEAST_SCRIPTS}/add_summary_multiple_formats \ "multiple alignment of templates" \ all-align \ a2m.gz pa \ >> ${WORKDIR}/summary.html endif undertaker_start_section: ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_section_head_summary_html \ "Undertaker (3d) files" >> ${WORKDIR}/summary.html endif %.undertaker-align.under: %.top_reported_alignments.rdb ${YEAST_SCRIPTS}/make_undertaker_alignment_list < $^ > $@ undertaker_multi_align: ${TARG_AL}-2track-undertaker.a2m ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_summary_html \ "Multiple alignment for undertaker templates" \ $^ >> ${WORKDIR}/summary.html endif undertaker_from_many: ${TARG_AL}.undertaker-align.under ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_summary_html \ "Undertaker input for top alignments" \ $^ >> ${WORKDIR}/summary.html endif ifdef NOGZIP_PDB PDBEXT := pdb else PDBEXT := pdb.gz endif undertaker_show_alignment: ${TARG_AL}.undertaker-align.${PDBEXT} ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_summary_html \ 'PDB file with \ \ model(s) for top alignments' \ $^ >> ${WORKDIR}/summary.html endif frag_a2m: $(foreach AL_METHOD, ${MA_METHODS}, ${MAKE} AL_METHOD=${AL_METHOD} ${TARGET}.${AL_METHOD}.many.frag.gz;) ifndef NO_SUMMARY $(foreach AL_METHOD, ${MA_METHODS}, \ ${YEAST_SCRIPTS}/add_summary_html \ "Fragment list for undertaker (from fragfinder)" \ ${TARGET}.${AL_METHOD}.many.frag.gz >> ${WORKDIR}/summary.html; \ ) endif extra_alignments: -$(foreach x,${MANUAL_TOP_HITS}, \ ${MAKE} -k PRED=${x} \ single-track-target-alignments \ template-alignments \ two-track-alignments \ three-track-alignments ; ) read_alignments: \ $(foreach x,${MANUAL_TOP_HITS},${x}/read-alignments-noscwrl.under ${x}/read-alignments-scwrl.under) undertaker_try1: -${MAKE} ${TARGET}.do1 ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_summary_html \ "First attempted model using undertaker" \ decoys/${TARGET}.try1-opt2.pdb.gz >> ${WORKDIR}/summary.html endif score_all: decoys/score-all.try1.rdb ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_summary_html \ "Undertaker scores for decoys" \ $^ >> ${WORKDIR}/summary.html endif #end html results page summary_end: ifndef NO_SUMMARY ${YEAST_SCRIPTS}/end_summary_html \ >> ${WORKDIR}/summary.html endif ################# # web-interface # ################# %.end_section: date ifndef NO_SUMMARY ${YEAST_SCRIPTS}/end_section_summary_html \ >> ${WORKDIR}/summary.html endif ############################################### # common operations, applicable to many files # ############################################### rm_empty: find . -empty -exec rm -f '{}' \; -print #thin the alignment to 90% sequence identity for use with the neural nets #(which were trained on thinned alignments) %-thin90.a2m.gz: %.a2m.gz ${BIN_SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.90 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ %-thin62.a2m.gz: %.a2m.gz ${BIN_SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.62 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ %-thin50.a2m.gz: %.a2m.gz ${BIN_SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.50 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ %-thin40.a2m.gz: %.a2m.gz ${BIN_SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.40 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ %-thin35.a2m.gz: %.a2m.gz ${BIN_SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.35 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ %-thin30.a2m.gz: %.a2m.gz ${BIN_SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.30 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ #convert an a2m.gz alignment to a human-readable model %.w0.5.mod: %.a2m.gz ${PCBS}/w0.5 $^ $@.tmp ${BIN_SAM}/hmmconvert $*.w0.5 -model_file $@.tmp -rm -f $@.tmp %-w1.0.mod: %.a2m.gz ${PCBS}/w1.0 $^ $@ #compress a file %.gz: % gzip -f $^ #make a pretty-aligned alignment from a compressed a2m alignment %.pa: %.a2m.gz ${BIN_SAM}/prettyalign $^ -m8 -i -n -L3333 > $@ %.pa: %.a2m ${BIN_SAM}/prettyalign $^ -m8 -i -n -L3333 > $@ # Make a2m file with dots. %.dotted-a2m: %.a2m.gz ${BIN_SAM}/prettyalign $^ -f \ | grep -v '^;' > $@ %.dotted-a2m: %.a2m ${BIN_SAM}/prettyalign $^ -f \ | grep -v '^;' > $@ %.pa.html: %.a2m.gz gunzip -c $^ > tmp.a2m ${A2M2HTML} -a2m_in tmp.a2m > $@ rm tmp.a2m %.pa.html: %.a2m ${A2M2HTML} -a2m_in $^ > $@ ################################## # ANNOTATING A TARGET # ################################## # secondary structure prediction # ################################## # PREDICT_2ND := /cse/faculty/karplus/dna/predict-2nd/predict-2nd PREDICT_2ND := /projects/compbio/programs/predict-2nd/bin/i686/opt/predict-2nd TEMPLATE_SEQS := ${PCEM_INDEXES}/${AL_METHOD}.x-seqs MIXTURE := ${PCL}/recode3.20comp TRANS_REG := ${PCL}/fssp-trained.regularizer TWOTRACKDBS := ${TEMPLATE_SEQS},${TEMPLATE_STRUCT} ifdef REDO_SEARCHES TWOTRACK_DEPEND := ${TEMPLATE_SEQS} ${TEMPLATE_STRUCT} else TWOTRACK_DEPEND := endif ifdef PREDICT_NET %.${STRUCT_ALPH} %.${STRUCT_ALPH}.rdb %.${STRUCT_ALPH}.seq : %-thin90.a2m.gz ${PREDICT_NET} ifdef STRUCT_ALPH_FILE echo ReadAlphabet ${STRUCT_ALPH_FILE} > tmp.script else echo > tmp.script endif echo ReadNeuralNet ${PREDICT_NET} >> tmp.script echo ReadA2M $< >> tmp.script echo PrintPredictionFasta $*.${STRUCT_ALPH}.seq >> tmp.script echo PrintRDB $*.${STRUCT_ALPH}.rdb >> tmp.script ${PREDICT_2ND} < tmp.script rm tmp.script endif %.${STRUCT_ALPH}.constraints: %.${STRUCT_ALPH}.rdb ${CASP6_SCRIPTS}/constraints-from-rdb -start ${START_COL} < $^ > $@ %.${STRUCT_ALPH}.mod: %.${STRUCT_ALPH}.rdb ${PCEM_SCRIPTS}/2nd-rdb-to-sam-model -alphabet ${SAM_STRUCT_ALPH} $^ $@ # This target creates the mlib and dist file with no scores # we then remove the dist file so it will be created when # %-100-30-${STRUCT_ALPH}.dist is called # The double-scoring is annoying, but it does make rescoring later # possible without recalibration. %-100-30-${STRUCT_ALPH}.mlib: %.w0.5.mod %.${STRUCT_ALPH}.mod ${HMMSCORE} $*-100-30-${STRUCT_ALPH} \ -calibrate 1 \ -alphabet protein,${SAM_STRUCT_ALPH} \ -trackmod $*.w0.5.mod,$*.${STRUCT_ALPH}.mod \ -db ${TWOTRACKDBS} \ -trackcoeff 1.0,0.3 \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 0 -rm -f $*-100-30-${STRUCT_ALPH}.dist # -trackprior rsdb-comp2.32comp,t99-2d-comp.9comp %-100-30-${STRUCT_ALPH}.dist: %-100-30-${STRUCT_ALPH}.mlib \ ${TWOTRACK_DEPEND} ${HMMSCORE} dsspfoo \ -modellibrary $< \ -db ${TWOTRACKDBS} \ -db_size ${LIBSIZE} \ -select_score 4 -Emax 40. mv -f dsspfoo.1.$@ $@ %-100-30-${STRUCT_ALPH}-scores.rdb: %-100-30-${STRUCT_ALPH}.dist ${SCOP} ${ANNOTATE} ${AL_METHOD}-100-30-${STRUCT_ALPH} < $< > $@ %-scores.rdb: %.dist ${SCOP} ${ANNOTATE} $(subst .rdb,,$(subst ${TARGET}.,,$@)) < $< > $@ # The following awkard syntax is to make sure that the template-lib-scores.html # file is not incorrectly made without the initial pruning. # This is ugly and ridiculous---a better naming convention would fix # the problem more cleanly. ${TARGET}.t2k-%-scores.html: ${TARGET}.t2k-%-scores.rdb ${YEAST_SCRIPTS}/oneway_hits_rdb2html ${TARGET}.t2k.$*-scores < $^ > $@ ${TARGET}.t04-%-scores.html: ${TARGET}.t04-%-scores.rdb ${YEAST_SCRIPTS}/oneway_hits_rdb2html ${TARGET}.t04.$*-scores < $^ > $@ # three-track str2 + CB_BURIAL_14_7 angle stuff: ifeq (${BURIAL_ALPH},CB_burial_14_7) SAM_BURIAL_ALPH :=CB_BURIAL_14_7 TEMPLATE_BURIAL:=${PCEM_INDEXES}/${AL_METHOD}.CB-burial-14-7s endif THREETRACKDBS := ${TEMPLATE_SEQS},${TEMPLATE_STRUCT},${TEMPLATE_BURIAL} ifdef REDO_SEARCHES THREETRACK_DEPEND := ${TEMPLATE_SEQS} ${TEMPLATE_STRUCT} ${TEMPLATE_BURIAL} else THREETRACK_DEPEND := endif # This target creates the mlib and dist file with no scores # we then remove the dist file so it will be created when # %-100-40-40-${STRUCT_ALPH}+${BURIAL_ALPH}.dist is called # The double-scoring is annoying, but it does make rescoring later # possible without recalibration. %-100-40-40-${STRUCT_ALPH}+${BURIAL_ALPH}.mlib: %.w0.5.mod %.${STRUCT_ALPH}.mod %.${BURIAL_ALPH}.mod ${HMMSCORE} $*-100-40-40-${STRUCT_ALPH}+${BURIAL_ALPH} \ -calibrate 1 \ -alphabet protein,${STRUCT_ALPH},CB_BURIAL_14_7 \ -trackmod $*.w0.5.mod,$*.${STRUCT_ALPH}.mod,$*.${BURIAL_ALPH}.mod \ -db ${THREETRACKDBS} \ -trackcoeff 1.0,0.4,0.4 \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 0 -rm -f $*-100-40-40-${STRUCT_ALPH}+${BURIAL_ALPH}.dist %-100-40-40-${STRUCT_ALPH}+${BURIAL_ALPH}.dist: %-100-40-40-${STRUCT_ALPH}+${BURIAL_ALPH}.mlib \ ${THREETRACK_DEPEND} ${HMMSCORE} ${BURIAL_ALPH}foo \ -modellibrary $< \ -db ${THREETRACKDBS} \ -db_size ${LIBSIZE} \ -select_score 4 -Emax 40. mv -f ${BURIAL_ALPH}foo.1.$@ $@ ###################################### # DSSP_EHL2 merged prediction stuff: # ###################################### %.${AL_METHOD}.dssp-ehl2.rdb: %.${AL_METHOD}.dssp-ebghstl.rdb \ %.${AL_METHOD}.stride-ebghtl.rdb \ %.${AL_METHOD}.str2.rdb \ %.${AL_METHOD}.alpha.rdb ${YEAST_SCRIPTS}/RDBCombine $^ -a SAM-TO2 > $@ %.${AL_METHOD}.dssp-ehl2.seq: %.${AL_METHOD}.dssp-ehl2.rdb ${PCEM_SCRIPTS}/seq-from-rdb < $^ > $@ # CASP formatted prediction---may be useful for EVA or LiveBench %.${AL_METHOD}.dssp-ehl2: %.${AL_METHOD}.dssp-ehl2.rdb ${YEAST_SCRIPTS}/rdb2casp $^ SAM-T02 > $@ ########################## # TEMPLATE MODEL SCORING # ########################## ifdef REDO_SEARCHES TEMPLATE_LIB_DEPEND := ${PCEM_INDEXES}/t2k-w0.5-db.mlib else TEMPLATE_LIB_DEPEND := endif #template library scores %.template-lib-scores.rdb: ${TEMPLATE_LIB_DEPEND} ${TARGET}.a2m ${HMMSCORE} $*.template-lib \ -modellibrary ${PCEM_INDEXES}/t2k-w0.5-db.mlib \ -db_size ${LIBSIZE} \ -db ${TARGET}.a2m -rdb 1 \ -select_score 4 -Emax 40 grep -v '^[#]' < $*.template-lib.dist-rdb \ | ${YEAST_SCRIPTS}/shorten_mod_names \ | ${PCB}/row SEQID eq ${TARGET} \ | ${PCB}/sorttbl EVALUE \ > $@ -rm $*.template-lib.dist-rdb %.template-lib-scores.html: %.template-lib-scores.rdb ${SCOP} head -n 500 < $< \ | ${YEAST_SCRIPTS}/annotate_template_scores \ | ${YEAST_SCRIPTS}/oneway_hits_rdb2html $*.template-lib-scores \ > $@ ############################## # SINGLE-TRACK TARGET MODELS # ############################## # single-track model pdb scoring # Note: all-protein has the NCBI names, # while SAM_T02/data has our short names for chain ids # PDB_DB := /projects/compbio/data/pdb/all-protein # PDB_DB := /projects/compbio/experiments/protein-predict/SAM_T02/data/pdbaa PDB_DB := /projects/compbio/data/pdb/dunbrack-pdbaa #calibrate the single track model #this target creates the mlib and dist file with no scores # we then remove the dist file so it will be created when # %.w0.5.dist is called %.w0.5.mlib: %.w0.5.mod ${HMMSCORE} $*.w0.5 \ -calibrate 1 \ -i $*.w0.5.mod \ -db ${TEMPLATE_SEQS} \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 0 -rm -f $*.w0.5.dist # -trackprior rsdb-comp2.32comp ifdef REDO_SEARCHES PDB_LIB_DEPEND := ${PDB_DB} else PDB_LIB_DEPEND := endif %.w0.5.dist: %.w0.5.mlib ${PDB_LIB_DEPEND} ${HMMSCORE} w0.5foo \ -modellibrary $< \ -db ${PDB_DB} \ -dbsize ${LIBSIZE} \ -select_score 4 -Emax 40. mv -f w0.5foo.1.$*.w0.5.mod.dist $@ %-w0.5-scores.rdb: %.w0.5.dist ${SCOP} ${ANNOTATE} $*-w0.5 < $< > $@ %-w0.5-scores.html: %-w0.5-scores.rdb ${YEAST_SCRIPTS}/oneway_hits_rdb2html $*-w0.5-scores < $^ > $@ ############# # LOGOS # ############# %.nothin.mod: %.a2m.gz modelfromalign $*.nothin -alignfile $^ \ -insert ${TRANS_REG} \ -prior_library ${MIXTURE} \ -binary_output 1 \ -aweight_method 1 -aweight_bits 0.5 -aweight_exponent 10 %.nothin-logo.eps %.nothin.saves: %.nothin.mod %.${STRUCT_ALPH}.seq ${BIN_SAM}/makelogo $*.nothin-logo -i $< \ -logo_start_num ${START_COL} \ -logo_rel_entropy 1 \ -logo_bars_per_line ${LOGO_WIDTH} -logo_title "$* nothin" \ -logo_caption_f $*.${STRUCT_ALPH}.seq \ -logo_under_file ${TARGET}.a2m \ -logo_savings_output $*.nothin.saves %.w0.5-logo.eps %.w0.5.saves: %.w0.5.mod %.${STRUCT_ALPH}.seq ${BIN_SAM}/makelogo $*.w0.5-logo -i $< \ -logo_start_num ${START_COL} \ -logo_rel_entropy 1 \ -logo_bars_per_line ${LOGO_WIDTH} -logo_title "$* w0.5" \ -logo_caption_f $*.${STRUCT_ALPH}.seq \ -logo_under_file ${TARGET}.a2m \ -logo_savings_output $*.w0.5.saves %.${STRUCT_ALPH}-logo.eps: %.${STRUCT_ALPH}.mod ${BIN_SAM}/makelogo $*.${STRUCT_ALPH}-logo -i $^ \ -logo_start_num ${START_COL} \ -logo_rel_entropy 1 \ -logo_bars_per_line ${LOGO_WIDTH} -logo_title "$* ${STRUCT_ALPH}" \ -logo_caption_f ${TARG_AL}.${STRUCT_ALPH}.seq \ -logo_under_file ${TARGET}.a2m \ -logo_color_file ${STRUCT_ALPH_COLOR_FILE} %.${STRUCT_ALPH}-color.rasmol: %.${STRUCT_ALPH}.seq ${YEAST_SCRIPTS}/rasmol_color_from_burial -pdb ${TARGET}.blank.pdb.gz \ -color ${STRUCT_RASMOL_COLOR} \ < $^ > $@ ############### # TOP HITS # ############### # define the threshold below which you want hits reported. # If ANY of the methods reports a hit this good, it will be included # in ${TARGET}.best_scores.rdb ifndef BEST_EVALUE BEST_EVALUE := 1.e-05 endif # report at least this many hits, even if there are no good evalues. ifndef NUM_BEST NUM_BEST := 10 endif #find the best hits (include dupes) %.${AL_METHOD}.best-scores.rdb: \ %.${AL_METHOD}-w0.5-scores.rdb \ %.template-lib-scores.rdb \ %.${AL_METHOD}-100-30-dssp-ebghstl-scores.rdb \ %.${AL_METHOD}-100-30-stride-ebghtl-scores.rdb \ %.${AL_METHOD}-100-30-str2-scores.rdb \ %.${AL_METHOD}-100-30-alpha-scores.rdb \ %.${AL_METHOD}-100-30-bys-scores.rdb \ %.${AL_METHOD}-100-30-CB_burial_14_7-scores.rdb \ %.${AL_METHOD}-100-40-40-str2+CB_burial_14_7-scores.rdb \ %.${AL_METHOD}-100-30-dssp-ehl2-scores.rdb ${YEAST_SCRIPTS}/best_scores \ -num ${NUM_BEST} -E ${BEST_EVALUE} -lib_size ${LIBSIZE} \ $^ > $@ %.best-scores.html: %.best-scores.rdb ${YEAST_SCRIPTS}/oneway_hits_rdb2html $*.best_hits < $^ > $@ ########################################### # ALIGNMENTS # ########################################### #track models to be used in pairwise alignments TRACKMOD_STRUCT :=${TARG_AL}.${STRUCT_ALPH}.mod STRUCT_MLIB :=${TARG_AL}-100-30-${STRUCT_ALPH}.mod SEED_PAIR := ${TARGET}.a2m,${TARG_AL}.${STRUCT_ALPH}.seq EBGHSTL_TRACKMOD := ${TARG_AL}.dssp-ebghstl.mod EBGHTL_TRACKMOD := ${TARG_AL}.stride-ebghtl.mod STR2_TRACKMOD := ${TARG_AL}.str2.mod W05_MLIB := ${TARG_AL}.w0.5.mlib EBGHSTL_MLIB := ${TARG_AL}-100-30-dssp-ebghstl.mlib EBGHTL_MLIB := ${TARG_AL}-100-30-stride-ebghtl.mlib STR2_MLIB := ${TARG_AL}-100-30-str2.mlib ALPHA_MLIB := ${TARG_AL}-100-30-alpha.mlib DSSP_EHL2_MLIB := ${TARG_AL}-100-30-dssp-ehl2.mlib #settings of how many templates to predict vs. number of #alignments to convert to CASP format must be done carefully # need to add error checking so these numbers don't conflict #with each other # how many alignments to select from best templates ifndef NUM_ALIGNMENTS NUM_ALIGNMENTS := 250 endif #top alignments we report for top_reported ifndef NUM_TOP NUM_TOP := 5 endif #build an HMM from target sequence only to produce alignments #similar to simple Smith-Waterman. We observe that the #T2K HMMs are so general that they may drift away from the #original seed sequence %.mod: %.a2m ${BIN_SAM}/modelfromalign $* \ -alignfile $^ \ -insert /projects/compbio/lib/fssp-trained.regularizer \ -aweight_bits 0.8\ -fimtrans -1\ -fimstrength 1\ -ins_jump_conf 1 \ -match_jump_conf 1 \ -del_jump_conf 1 \ -binary_output 1\ -prior_library ${MIXTURE} \ -a2mdots 0 \ -a protein \ -sw 2 -jump_in_prob 0.2 -jump_out_prob 1 \ -aweight_method 1\ -aweight_exponent 10 #report sorted list of the pairwise alignments ALIGNMENT_FILES:= $(wildcard [1-9]*/*a2m.gz) %.many_alignments.rdb: %.best-scores.rdb ${ALIGNMENT_FILES} ${YEAST_SCRIPTS}/gather_best_align_scores ${NUM_ALIGNMENTS} ${WORKDIR} ${TARGET} < $< > $@ #get the ${NUM_TOP} best alignments %.top_reported_alignments.rdb: \ %.many_alignments.rdb \ %.best-scores.rdb ${YEAST_SCRIPTS}/top_reported_alignments ${NUM_TOP} $^ > $@ #convert this to html and add to the summary page %.many_alignments.html: %.many_alignments.rdb %.best-scores.rdb ${YEAST_SCRIPTS}/casp_summary_report_html \ --align $*.many_alignments.rdb \ --best $*.best-scores.rdb \ --target ${TARGET} > $@ %.top_reported_alignments.html: %.top_reported_alignments.rdb %.best-scores.rdb ${YEAST_SCRIPTS}/casp_summary_report_html \ --align $*.top_reported_alignments.rdb \ --best $*.best-scores.rdb \ --target ${TARGET} --make_al > $@ ############################################# # MUTUAL INFORMATION FOR CONTACT PREDICTION # ############################################# mutual_info_start_section: ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_section_head_summary_html \ "Significant mutual information between column pairs" >> ${WORKDIR}/summary.html endif mutual_info: compute_mutual_info print_mutual_info compute_mutual_info: -$(foreach d,t2k t04,\ ${MAKE} -k AL_METHOD=${d} \ $(foreach v,30 35 40 50 62, do_mutual_info_${v}) ; ) print_mutual_info: ifndef NO_SUMMARY -$(foreach d,t2k t04,\ $(foreach v,30 35 40 50 62, \ ${YEAST_SCRIPTS}/add_summary_html \ "Mutual information table (align thinned to ${v}% ID)" \ ${TARGET}.${d}-thin${v}.mi.rdb >> ${WORKDIR}/summary.html;)) endif do_mutual_info_30: ${MAKE} -k CORR_SIGNIF=100 ${TARG_AL}-thin30.a2m.gz ${TARG_AL}-thin30.mi.rdb do_mutual_info_35: ${MAKE} -k CORR_SIGNIF=100 ${TARG_AL}-thin35.a2m.gz ${TARG_AL}-thin35.mi.rdb do_mutual_info_40: ${MAKE} -k CORR_SIGNIF=20 ${TARG_AL}-thin40.a2m.gz ${TARG_AL}-thin40.mi.rdb do_mutual_info_50: ${MAKE} -k CORR_SIGNIF=2 ${TARG_AL}-thin50.a2m.gz ${TARG_AL}-thin50.mi.rdb do_mutual_info_62: ${MAKE} -k CORR_SIGNIF=0.03 ${TARG_AL}-thin62.a2m.gz ${TARG_AL}-thin62.mi.rdb redo_mutual_info: -mkdir old-mi -mv -f *.mi.rdb old-mi ${MAKE} -k compute_mutual_info ifndef CORR_MIN_SEP CORR_MIN_SEP := 7 endif ifndef CORR_SIGNIF CORR_SIGNIF := 0.1 endif ifndef CORR_OCC_FRAC CORR_OCC_FRAC := 0.7 endif ifndef CORR_START_COL CORR_START_COL := ${START_COL} endif ifeq (${GCC_VERSION},2.96) # This version of correlated-columns is compiled to run # using the 2.96 g++ libraries, but may be out of date. CORR_COLUMNS := /cse/faculty/karplus/commands/2.96/correlated-columns else CORR_COLUMNS := /projects/compbio/programs/correlated-columns/bin/${UNAME_P}/opt/correlated-columns endif ifdef REDO_MI MI_DEPEND:= ${CORR_COLUMNS} else MI_DEPEND:= endif %.mi.rdb: %.a2m.gz ${MI_DEPEND} echo "SetSeed" > tmp.script echo "SetSignif ${CORR_SIGNIF}" >> tmp.script echo "SetMinPairs ${CORR_OCC_FRAC}" >> tmp.script echo "SetMinSep ${CORR_MIN_SEP}" >> tmp.script echo "ReadA2M $< ${CORR_START_COL}" >> tmp.script echo "MutualInfoSignif $@ ${@:.rdb=.constraints}" >> tmp.script ${CORR_COLUMNS} < tmp.script rm tmp.script %.mi.rdb: %.a2m ${MI_DEPEND} echo "SetSeed" > tmp.script echo "SetSignif ${CORR_SIGNIF}" >> tmp.script echo "SetMinPairs ${CORR_OCC_FRAC}" >> tmp.script echo "ReadA2M $<" >> tmp.script echo "MutualInfoSignif $@ ${@:.rdb=.constraints}" >> tmp.script ${CORR_COLUMNS} < tmp.script rm tmp.script ######################################## # CONTACT PREDICTION USING NEURAL NETS # ######################################## ifndef RR_EXT RR_EXT:=280 endif ifeq (${RR_EXT},280) RR_NN_NAME:=NN280-240n300.net.28 RR_ARGS:=-start ${START_COL} -entropy -W 3 -M 3 -T 20.0 -distribution .t04 \ -S .t04.CB_burial_14_7.rdb,.t04.near-backbone-11.rdb,.t04.str2.rdb \ -C .t04-thin62.mi.rdb,.t04-thin40.mi.rdb,.t04-thin35.mi.rdb,.t04-thin30.mi.rdb endif ifeq (${RR_EXT},134) RR_NN_NAME:=NN134-90n70err.net.6 RR_ARGS:=-start ${START_COL} -entropy -window 1 -T 120.0 \ -S .t2k.CB_burial_14_7.rdb,.t2k.str2.rdb,.t04.CB_burial_14_7.rdb,.t04.str2.rdb \ -C .t2k-thin62.mi.rdb,.t2k-thin40.mi.rdb,.t2k-thin35.mi.rdb,.t2k-thin30.mi.rdb,.t04-thin62.mi.rdb,.t04-thin40.mi.rdb,.t04-thin35.mi.rdb,.t04-thin30.mi.rdb endif RR_NEURAL_NET:=${CASP6_NETWORKS}/${RR_NN_NAME} VALIDATE2RR := ${PCB}/validate2rr LWNN_VALIDATE := ${PCB_SUB}/lwnn_validate TRAIN_CONTACT_NN := ${PCB_SUB}/traincontactnn # The prediction requires using the 'traincontactnn' program # to build a list of inputs for the lwnn_validate program # lwnn_validate uses those inputs and a specified neural network # to make predictions. Those predictions are sorted by raw score # and the sequence_length/2 best scoring predictions form # the submitted RR predictions. # ============================ build distributions ============================= build_probs: -$(foreach j,${MA_METHODS},\ ${MAKE} AL_METHOD=${j} ${TARGET}.${j}.probs;) # and the '*.probs' file with the residue distributions using # 'estimate-dist' Similar to '*.saves' # for now, I'll base the distribution on the *.a2m.gz files %.probs : %.a2m.gz echo Alphabet ExtAA > ${TMPDIR}/tmp-$*-probs.script echo ClipWeight 1.0 >> ${TMPDIR}/tmp-$*-probs.script echo PushReg /projects/compbio/lib/recode3.20comp >> ${TMPDIR}/tmp-$*-probs.script echo SequenceWeight HenikoffWeight 1.0 1.0 >> ${TMPDIR}/tmp-$*-probs.script echo ReadA2M $^ >> ${TMPDIR}/tmp-$*-probs.script echo PrintProbs $@ >> ${TMPDIR}/tmp-$*-probs.script echo quit >> ${TMPDIR}/tmp-$*-probs.script estimate-dist < ${TMPDIR}/tmp-$*-probs.script rm -f ${TMPDIR}/tmp-$*-probs.script ${TARGET}.${RR_EXT}.rr : $(foreach al,${MA_METHODS},$(foreach s,str2 CB_burial_14_7, ${TARGET}.${al}.${s}.rdb)) \ $(foreach al,${MA_METHODS},$(foreach t,62 40 35 30, ${TARGET}.${al}-thin${t}.mi.rdb)) \ $(foreach al,${MA_METHODS},${TARGET}.${al}.probs) ${TRAIN_CONTACT_NN} -dump -id ${TARGET} ${RR_ARGS} > ${TMPDIR}/${TARGET}.validate ${LWNN_VALIDATE} -a -v ${TMPDIR}/${TARGET}.validate ${RR_NEURAL_NET} \ | ${VALIDATE2RR} -s ${START_COL} -c -n ${RR_NEURAL_NET} -t ${TARGET} > ${TARGET}.${RR_EXT}.rr ln -sf ${TARGET}.${RR_EXT}.rr.rasmol rr rm ${TARGET}.pairs rm ${TMPDIR}/${TARGET}.validate contact_prediction: ${TARGET}.${RR_EXT}.rr add_contact_prediction: contact_prediction ifndef NO_SUMMARY ${YEAST_SCRIPTS}/add_summary_multiple_formats \ "contact predictions" \ ${TARGET}.${RR_EXT} \ rr rr.constraints \ >> ${WORKDIR}/summary.html endif ############## # FRAGFINDER # ############## FRAGFINDER_SEQS := ${PCEM_INDEXES}/dunbrack-50pc-2621.x-seqs FRAGFINDER_STR2 := ${PCEM_INDEXES}/dunbrack-50pc-2621.str2s FRAGFINDER_CB_BURIAL_14_7 := ${PCEM_INDEXES}/dunbrack-50pc-2621.CB-burial-14-7s FRAGFINDER_STR2_TWOTRACKDBS := ${FRAGFINDER_SEQS},${FRAGFINDER_STR2} FRAGFINDER_STR2+CB_BURIAL_14_7_THREETRACKDBS := ${FRAGFINDER_SEQS},${FRAGFINDER_STR2},${FRAGFINDER_CB_BURIAL_14_7} %.frag.gz: %.w0.5.mod %.str2.mod ${FRAGFINDER} $* \ -a protein,STR2 \ -trackmod $*.w0.5.mod,$*.str2.mod \ -track_coeff 1.0,0.3 \ -db ${FRAGFINDER_STR2_TWOTRACKDBS} \ -firstsequence ${TARGET}.a2m,- \ -fraglen 9 -numpermatch 6 gzip -9f $*.frag gzip -9f $*.fstat %.many.frag.gz: %.w0.5.mod %.str2.mod %.CB_burial_14_7.mod ${FRAGFINDER} $*.many \ -alphabet protein,str2,CB_BURIAL_14_7 \ -trackmod $*.w0.5.mod,$*.str2.mod,$*.CB_burial_14_7.mod \ -trackcoeff 1.0,2.4,1.8 \ -db ${FRAGFINDER_STR2+CB_BURIAL_14_7_THREETRACKDBS} \ -firstsequence ${TARGET}.a2m,-,- \ -fraglen 9 -numpermatch 30 gzip -9f $*.many.frag gzip -9f $*.many.fstat ############## # UNDERTAKER # ############## ifndef USE_MLIB_FOR_UNDERTAKER %-2track-undertaker.a2m: %.w0.5.mod %.str2.mod \ ${TEMPLATE_SEQS} ${TEMPLATE_STR2} ${HMMSCORE} $*-2track-undertaker \ -calibrate 1 \ -a protein,STR2 \ -trackmod $*.w0.5.mod,$*.str2.mod \ -track_coeff 1.0,0.3 \ -sw 2 -adpstyle ${ADPSTYLE} \ -db ${TARGET}.a2m,$*.str2.seq \ -db ${TWOTRACKDBS} \ -select_score 8 -Emax ${BEST_EVALUE} \ -select_align 4 else # WARNING: THIS OPTION NOT DEBUGGED YET! # There seems to be a bug in HMMSCORE that causes the db list on the # command line to be misparsed when using a model library %-2track-undertaker.a2m: ${STR2_MLIB} ${STR2_DEPEND} ${HMMSCORE} str2foo \ -modellibrary $< \ -db ${TARGET}.a2m,$*.str2.seq \ -db ${TWOTRACKDBS} \ -select_score 8 -Emax ${BEST_EVALUE} \ -select_align 4 mv -f str2foo.1.$*-100-30-str2.a2m $@ endif # will need to create a "decoys" directory before any full 3D building # with undertaker can be done. decoys/${TARGET}.%-opt2.pdb.gz: %.under %.costfcn -mkdir -p decoys nice -5 ${UNDERTAKER} < $*.under > $*.log 2>&1 -gzip -f decoys/${TARGET}.$**pdb -gzip -9f $*.log Template.atoms read-decoys.under: decoys echo "InfilePrefix decoys/"> $@ ls decoys/*${TARGET}*pdb* \ | sed 's;decoys/;ReadConformPDB ;' \ >> $@ echo "InfilePrefix" >> $@ -chgrp protein $@ -chmod g+w $@ %.undertaker-align.${PDBEXT}: show-align.under \ %.undertaker-align.under \ %-2track-undertaker.a2m sed s/t2k/$*/g < show-align.under \ | nice -2 ${UNDERTAKER} >& show-align.log ifndef NOGZIP_PDB gzip -f $*.undertaker-align.pdb endif %/read-alignments-noscwrl.under: % cd $*; \ ${UNDERTAKER_SCRIPTS}/make-read-fragments.csh \ > read-alignments-noscwrl.under %/read-alignments-scwrl.under: % cd $*; \ ${UNDERTAKER_SCRIPTS}/make-read-fragments.csh SCWRL \ > read-alignments-scwrl.under # for close homology modeling, may want to pick out best scores using # single sequences sw-best: [1-9]* echo 'foreach x ([0-9]*)' > tmp.script echo 'grep -h "$$x " $$x/*SW*dist' >> tmp.script echo 'end' >> tmp.script chmod +x tmp.script csh tmp.script \ | sort -n +3 \ | uniq \ > $@ rm tmp.script ifdef PRED PRED2 := $(shell echo ${PRED} | sed 's/\(..\).*/\1/') ifdef PRED2 PRED_NOSTRUCT := ${PCEM}/pdb/${PRED2}/${PRED}/nostruct-align PRED_INFO := ${PCEM}/pdb/${PRED2}/${PRED}/info ifdef ALIGN_TYPE ifeq (${ALIGN_TYPE},local) SW=2 endif ifeq (${ALIGN_TYPE},global) SW=0 endif ifeq (${ALIGN_TYPE},simplesw) SW=2 endif endif ifeq (${MASTER},target) ifeq (${ALIGN_TYPE},local) ALIGN_MODEL=${TARG_AL}.w0.5.mod ALIGN_NAME := ${PRED}/${TARGET}-${PRED}-${AL_METHOD}-${ALIGN_TYPE}-adpstyle${ADP} endif ifeq (${ALIGN_TYPE},global) ALIGN_MODEL=${TARG_AL}.w0.5.mod ALIGN_NAME := ${PRED}/${TARGET}-${PRED}-${AL_METHOD}-${ALIGN_TYPE}-adpstyle${ADP} endif ifeq (${ALIGN_TYPE},simplesw) ALIGN_MODEL=${TARGET}.mod ALIGN_NAME := ${PRED}/${TARGET}-${PRED}-${ALIGN_TYPE}-adpstyle${ADP} endif endif ifeq (${MASTER},template) ALIGN_MODEL := ${PRED_NOSTRUCT}/${PRED}.${AL_METHOD}-w0.5.mod ALIGN_NAME := ${PRED}/${PRED}-${TARGET}-${AL_METHOD}-${ALIGN_TYPE}-adpstyle${ADP} endif ifeq (${MASTER},fssp) FSSP_STRUCT := ${PCEM}/pdb/${FSSP2}/${FSSP}/struct-align ALIGN_MODEL := ${FSSP_STRUCT}/${FSSP}.fssp.w0.5.mod ALIGN_NAME := ${PRED}/${FSSP}-${TARGET}-fssp-${ALIGN_TYPE}-adpstyle${ADP} endif ifdef ALIGN_NAME single-track-alignment: ${ALIGN_NAME}.a2m echo $^ made. ${ALIGN_NAME}.a2m: ${ALIGN_MODEL} ${PRED}/${PRED}.seq ${HMMSCORE} ${ALIGN_NAME} \ -alphabet protein -i $< -db ${TARGET}.a2m \ -db ${PRED}/${PRED}.seq \ -db_size ${LIBSIZE} \ -simple_threshold 10000 \ -sw ${SW} -dpstyle 0 -subtract_null 4 \ -adpstyle ${ADP} \ -select_align 8 gzip -f ${ALIGN_NAME}.dist endif single-track-target-alignments: -mkdir -p ${PRED} test -e ${PRED}/${PRED}.seq -o '!' -e ${PRED_INFO}/${PRED}.stride-mixed.seq \ || cp -p ${PRED_INFO}/${PRED}.stride-mixed.seq ${PRED}/${PRED}.seq test -e ${PRED}/${PRED}.seq \ || ${YEAST_SCRIPTS}/extract-one-seq ${PRED} < ${PDB_DB} > ${PRED}/${PRED}.seq \ || { echo removing rm ${PRED}/${PRED}.seq; rm ${PRED}/${PRED}.seq ;} -$(foreach al,t2k t04,\ $(foreach at,simplesw local global, \ ${MAKE} -k AL_METHOD=${al} ALIGN_TYPE=${at} \ PRED=${PRED} PRED2=${PRED2} \ ADP=1 MASTER=target single-track-alignment; )) ifndef ALIGN_VITERBI -$(foreach al,t2k t04,\ $(foreach at,simplesw local global, \ ${MAKE} -k AL_METHOD=${al} ALIGN_TYPE=${at} \ PRED=${PRED} PRED2=${PRED2} \ ADP=5 MASTER=target single-track-alignment; )) endif template-alignments: -mkdir -p ${PRED} -$(foreach al,t2k t04,\ $(foreach at,local global, \ ${MAKE} -k AL_METHOD=${al} ALIGN_TYPE=${at} \ PRED=${PRED} PRED2=${PRED2} ADP=1 MASTER=template single-track-alignment;)) ifndef ALIGN_VITERBI -$(foreach al,t2k t04,\ $(foreach at,local global, \ ${MAKE} -k AL_METHOD=${al} ALIGN_TYPE=${at} \ PRED=${PRED} PRED2=${PRED2} ADP=5 MASTER=template single-track-alignment;)) endif -$(foreach al,t04 t2k, \ ${MAKE} -k ${PRED}/${TARGET}-${PRED}-${al}-muscle.a2m.gz \ PRED=${PRED} PRED2=${PRED2} ;) ######################################## # FSSP-based alignments ######################################## ifdef FSSP ifdef FSSP2 fssp-template-alignments: -mkdir -p ${PRED} -${MAKE} -k ALIGN_TYPE=local \ PRED=${PRED} PRED2=${PRED2} ADP=5 \ MASTER=fssp FSSP=${FSSP} FSSP2=${FSSP2} single-track-alignment -${MAKE} -k ALIGN_TYPE=global \ PRED=${PRED} PRED2=${PRED2} ADP=5 \ MASTER=fssp FSSP=${FSSP} FSSP2=${FSSP2} single-track-alignment endif endif ######################################## # Two-track target alignments ######################################## ifndef STRUCT_WEIGHT STRUCT_WEIGHT=0.3 endif ifdef STRUCT_ALPH # copy local structure alphabet name to SEQ_ALPH, renaming as needed to # match sequence names in info directories INFO_ALPH := ${STRUCT_ALPH} ifeq (${STRUCT_ALPH},stride-ebghtl) INFO_ALPH := 2d endif ifeq (${STRUCT_ALPH},dssp-ebghstl) INFO_ALPH := dssp endif ifeq (${STRUCT_ALPH},dssp-ehl2) INFO_ALPH := dssp endif PRED_SEED_PAIR := ${PRED_INFO}/${PRED}.stride-mixed.seq,${PRED_INFO}/${PRED}.stride-mixed.${INFO_ALPH} TWO_ALIGN_NAME := ${PRED}/${TARGET}-${PRED}-${AL_METHOD}-${ALIGN_TYPE}-${STRUCT_ALPH}-${STRUCT_WEIGHT}-adpstyle${ADP} two-track-alignment: ${TWO_ALIGN_NAME}.a2m echo $^ made. ${TWO_ALIGN_NAME}.a2m: ${TARG_AL}.w0.5.mod ${TRACKMOD_STRUCT} -mkdir -p ${PRED} ${HMMSCORE} ${TWO_ALIGN_NAME} \ -alphabet protein,${SAM_STRUCT_ALPH} \ -trackmod ${TARG_AL}.w0.5.mod,${TRACKMOD_STRUCT} \ -trackcoeff 1.0,${STRUCT_WEIGHT} \ -db ${SEED_PAIR} \ -db ${PRED_SEED_PAIR} \ -db_size ${LIBSIZE} \ -simple_threshold 10000 \ -sw ${SW} -dpstyle 0 -subtract_null 4 \ -adpstyle ${ADP} \ -select_align 8 gzip -f ${TWO_ALIGN_NAME}.dist endif # Note: no two-track alignment for dssp-ehl2, since we aren't creating # a sequence for that alphabet yet. ifdef ALIGN_VITERBI TWO_TRACK_ADP=1 else TWO_TRACK_ADP=5 endif # TO DO: # REDUCE number of two-track alignments tried, but # be sure to include some of the ones that worked well in # alignment tests. # (Actually, reducing the number may be a bad idea---we may need more diversity.) # ADD a SAM profile-profile alignment. two-track-alignments: -mkdir -p ${PRED} -$(foreach al,t2k t04,$(foreach l,local global,$(foreach x,${STRUCTURE_ALPHABETS},\ ${MAKE} -k \ AL_METHOD=${al} \ ALIGN_TYPE=${l} ADP=${TWO_TRACK_ADP} \ PRED=${PRED} PRED2=${PRED2} \ MASTER=target STRUCT_ALPH=${x}\ two-track-alignment;))) -$(foreach al,t2k t04,\ $(foreach l,local global,${MAKE} -k \ AL_METHOD=${al} \ ALIGN_TYPE=${l} ADP=${TWO_TRACK_ADP} \ PRED=${PRED} PRED2=${PRED2} \ STRUCT_WEIGHT=1.5 \ MASTER=target STRUCT_ALPH=str2 \ two-track-alignment;)) ######################################## # Three-track target alignments ######################################## ifndef BURIAL_WEIGHT BURIAL_WEIGHT=0.4 endif ifndef BURIAL_ALPH BURIAL_ALPH := CB_burial_14_7 endif BURIAL_INFO_ALPH := ${BURIAL_ALPH} ifeq (${BURIAL_ALPH},CB_burial_14_7) BURIAL_INFO_ALPH := CB-burial-14-7 endif THREE_ALIGN_NAME := ${PRED}/${TARGET}-${PRED}-${AL_METHOD}-${ALIGN_TYPE}-${STRUCT_ALPH}+${BURIAL_ALPH}-${STRUCT_WEIGHT}+${BURIAL_WEIGHT}-adpstyle${ADP} three-track-alignment: ${THREE_ALIGN_NAME}.a2m echo $^ made. ${THREE_ALIGN_NAME}.a2m: ${TARG_AL}.w0.5.mod \ ${TARG_AL}.${STRUCT_ALPH}.mod \ ${TARG_AL}.${BURIAL_ALPH}.mod -mkdir -p ${PRED} ${HMMSCORE} ${THREE_ALIGN_NAME} \ -alphabet protein,${SAM_STRUCT_ALPH},${BURIAL_ALPH} \ -trackmod ${TARG_AL}.w0.5.mod,${TARG_AL}.${STRUCT_ALPH}.mod,${TARG_AL}.${BURIAL_ALPH}.mod \ -trackcoeff 1.0,${STRUCT_WEIGHT},${BURIAL_WEIGHT} \ -db ${TARGET}.a2m,${TARG_AL}.${STRUCT_ALPH}.seq,${TARG_AL}.${BURIAL_ALPH}.seq \ -db ${PRED_INFO}/${PRED}.stride-mixed.seq,${PRED_INFO}/${PRED}.stride-mixed.${INFO_ALPH},${PRED_INFO}/${PRED}.stride-mixed.${BURIAL_INFO_ALPH} \ -db_size ${LIBSIZE} \ -simple_threshold 10000 \ -sw ${SW} -dpstyle 0 -subtract_null 4 \ -adpstyle ${ADP} \ -select_align 8 gzip -f ${THREE_ALIGN_NAME}.dist ifdef ALIGN_VITERBI THREE_TRACK_ADP=1 else THREE_TRACK_ADP=5 endif three-track-alignments: -mkdir -p ${PRED} -$(foreach al,t2k t04,\ ${MAKE} -k ALIGN_TYPE=local ADP=${THREE_TRACK_ADP} \ AL_METHOD=${al} \ PRED=${PRED} PRED2=${PRED2} \ MASTER=target STRUCT_ALPH=str2 BURIAL_ALPH=CB_burial_14_7\ STRUCT_WEIGHT=0.4 BURIAL_WEIGHT=0.4 \ three-track-alignment;\ ${MAKE} -k ALIGN_TYPE=global ADP=${THREE_TRACK_ADP} \ AL_METHOD=${al} \ PRED=${PRED} PRED2=${PRED2} \ MASTER=target STRUCT_ALPH=str2 BURIAL_ALPH=CB_burial_14_7\ STRUCT_WEIGHT=0.4 BURIAL_WEIGHT=0.4 \ three-track-alignment;\ ${MAKE} -k ALIGN_TYPE=local ADP=${THREE_TRACK_ADP} \ AL_METHOD=${al} \ PRED=${PRED} PRED2=${PRED2} \ MASTER=target STRUCT_ALPH=str2 BURIAL_ALPH=CB_burial_14_7\ STRUCT_WEIGHT=2.2 BURIAL_WEIGHT=1.8 \ three-track-alignment;\ ${MAKE} -k ALIGN_TYPE=global ADP=${THREE_TRACK_ADP} \ AL_METHOD=${al} \ PRED=${PRED} PRED2=${PRED2} \ MASTER=target STRUCT_ALPH=str2 BURIAL_ALPH=CB_burial_14_7\ STRUCT_WEIGHT=2.2 BURIAL_WEIGHT=1.8 \ three-track-alignment;\ ) endif #if PRED2 endif #if PRED ####################################### # tree building (not done by default) # ####################################### # programs in non-standard places PHYTREE := /projects/compbio/usr/karplus/src/phytree/phytree DG := /projects/compbio/usr/karplus/src/phytree/dg DTREE := /projects/compbio/usr/karplus/src/phytree/dtree %_sorted.ids %.tree %_sorted.a2m.gz %.phytrace: %.a2m.gz -gunzip -f $*.a2m.gz ${PHYTREE} -f -o -i -r flat $* $*.a2m ${MIXTURE} -gzip -f $*.a2m -gzip -f $*_sorted.a2m -rm $*.phytrace $*.tree_weight %tree.ps: %tree ${DG} $^ %tree-unroot.ps: %tree ${DTREE} $^ # extract the ids stripping off the muldomain-added section. %.bare-ids: %.a2m.gz gunzip -c $^ \ | ${PCBS}/ids-from-fasta -nodom \ >$@ # WARNING: DISTILL is not installed on SoE Linux computers, # but ps2pdf produces very verbose pdf files. # Using the "pdf-logos" in Makefile will make all the PDF files # on ${DISTILL_HOST}. %.pdf: %.eps ssh ${DISTILL_HOST} 'cd ${WORKDIR}; distill $^' < /dev/null ################################# # KEY RESIDUES AND CONSERVATION # ################################# # This section is for realignment using key residues and selecting # sequences that have those key residues. ${AL_METHOD}-selected: \ ${TARGET}.${AL_METHOD}.w0.5.key-residues \ ${TARGET}.${AL_METHOD}.selected.a2m.gz ${AL_METHOD}-realign: \ ${TARGET}.${AL_METHOD}.w0.5.key-residues \ ${TARGET}.${AL_METHOD}.selected.a2m.gz \ ${TARGET}.${AL_METHOD}.realign.a2m.gz ${AL_METHOD}-realign.w0.5: \ ${TARGET}.${AL_METHOD}.w0.5.key-residues \ ${TARGET}.${AL_METHOD}.realign.a2m.gz \ ${TARGET}.${AL_METHOD}.realign.w0.5.mod \ ${TARGET}.${AL_METHOD}.realign.w0.5-logo.eps \ ${TARGET}.${AL_METHOD}.realign.w0.5-logo.pdf ifndef KEY_MIN_SAVINGS KEY_MIN_SAVINGS := 1.5 endif ifndef KEY_MIN_FREQ KEY_MIN_FREQ := 0.04 endif %.key-residues: %.saves ${PCEM_SCRIPTS2K}/pick-key-residues \ -minbits ${KEY_MIN_SAVINGS} \ -minfreq ${KEY_MIN_FREQ} \ -first_residue ${START_COL} \ <$^ >$@ %.selected.a2m.gz: %.a2m.gz %.w0.5.key-residues gunzip -c $< \ | ${PCEM_SCRIPTS2K}/select-by-key-residues \ -first_residue ${START_COL} \ -residues $*.w0.5.key-residues \ | gzip \ >$@ %.realign.a2m.gz: %.selected.w0.5.mod %.a2m.gz hmmscore $*.realign -i $< -db $*.a2m.gz \ -adpstyle 5 -sw 2 -selectalign 8 gzip -8f $*.realign.a2m conserved_%: ${TARGET}.%.w0.5.key-residues ${YEAST_SCRIPTS}/key-to-rasmol \ -set_name conserved_$* \ < $^ > ${TARGET}.$*.conserved.rasmol -ln -sf ${TARGET}.$*.conserved.rasmol $@ ########## # MUSCLE # ########## # profile-profile alignment using Muscle: ${PRED}/${TARGET}-${PRED}-%-muscle.a2m.gz : ${TARGET}.%.a2m.gz ${PCEM_SCRIPTS04}/muscle-profile-profile \ $^ ${PRED_NOSTRUCT}/${PRED}.$*.a2m.gz \ -out $@ # This section is for realignment using Bob Edgar's "muscle" program. %.muscle.gz: %.a2m.gz gunzip -c $^ \ | muscle -maxhours 2.0 \ | gzip \ > $@ %.muscle.a2m.gz: %.muscle.gz ${PCEM_SCRIPTS}/a2m_from_muscle -in $^ -out $@ -guide 1 ############# # ALL-ALIGN # ############# # all-align.a2m.gz is an alignment created by merging all the # pairwise alignments into a single multiple-alignment. # This is useful for looking for consensus about alignments. # It may also be useful (after thinning at 100%) as an input # for undertaker. all-align.a2m.gz: $(wildcard [1-9][0-9a-z][0-9a-z][0-9a-z]*/*.a2m [1-9][0-9a-z][0-9a-z][0-9a-z]*/*.a2m.gz) ${PCEM_SCRIPTS04}/merge_a2m -guide ${TARGET} -out $@ $^ all-align.pa: all-align.a2m.gz ${BIN_SAM}/prettyalign $^ -m5 > $@ # A no-thin model built from all-align.a2m may be useful for # finding consensus columns. %.no-thin.mod: %.a2m.gz ${PCL}/make-weights.pl $^ tmp.weight ${MIXTURE} \ "EntropyWeight 0.7 10" 1.0 > tmp.log ${BIN_SAM}/modelfromalign $*.no-thin -alignfile $^ \ -prior_library ${MIXTURE} \ -alignment_weights tmp.weight -rm -f tmp.weight tmp.log %.no-thin.logo.eps: %.no-thin.mod ${BIN_SAM}/makelogo $*.no-thin.logo -i $< \ -logo_start_num ${START_COL} \ -logo_rel_entropy 1 \ -logo_bars_per_line ${LOGO_WIDTH} -logo_title "$* no thinning" \ -logo_caption_f ${TARG_AL}.dssp-ebghstl.seq \ -logo_under_file ${TARGET}.a2m \ -logo_savings_output $*.no-thin.saves ######################################## # Building a t2k alignment from a seed # ######################################## TARGET2K := ${PCEM_SCRIPTS2K}/target2k NR:=/scratch/data/nrp/nr ifeq ($(wildcard ${NR}.*),) NR:=/var/tmp/nrp/nr endif ifeq ($(wildcard ${NR}.*),) NR=/projects/compbio/data/nrp/nr endif ifndef ADPSTYLE ADPSTYLE := 5 endif ifndef BLAST_MAX BLAST_MAX := 10000 endif ifdef REDO_T2K T2K_DEPEND := ${NR} else T2K_DEPEND := endif %.t2k.a2m.gz: %.a2m ${T2K_DEPEND} ${TARGET2K} -out $*.t2k \ -final_adpstyle ${ADPSTYLE} \ -blast_max_report ${BLAST_MAX} \ -db ${NR} \ -seed $< -tmp_dir /var/tmp gzip -f $*.t2k.a2m ######################################## # Building a t04 alignment from a seed # ######################################## ifdef REDO_T04 T04_DEPEND := ALWAYS else T04_DEPEND= endif ${TARGET}.t04.a2m.gz: ${TARGET}.a2m ${T04_DEPEND} echo "making T04 alignment" ${PCEM_SCRIPTS04}/target04 \ -seed $< -out $@ \ -tmp /var/tmp -db ${NR} \ -final_align viterbi \ -thresh 0.0001 -thresh 0.0005 -thresh 0.002 -thresh 0.01 ######################################## # TARGETS FOR REMOVING FILES TO REMAKE # ######################################## remove-top-reported-alignments: -rm ${TARG_AL}.top_reported_alignments.rdb remove-best-scores: -rm ${TARG_AL}.best-scores.* ################## # SCORING DECOYS # ################## #edit score-all.under to have the cost function desired. %/read-pdb.under: % -mkdir -p $* cd $*; ${UNDERTAKER_SCRIPTS}/make-read-decoys.csh > read-pdb.under -chgrp protein $@ -chmod g+w $@ decoys/%.pretty: decoys/%.rdb -mv -f $@ $@.old ${CASP6_SCRIPTS}/prettyscore -terse -decpoint < $^ > $@ decoys/score-all.rdb: decoys/read-pdb.under score-all.under -mv -f $@ $@.old -rm -f decoys/all.clashes* -rm -f decoys/all.breaks* nice -2 ${UNDERTAKER} < score-all.under ${PCB}/sorttbl cost < $@ > sort.tmp mv sort.tmp $@ gzip -9f decoys/all.clashes gzip -9f decoys/all.breaks decoys/score-all.%.rdb: %.costfcn decoys/read-pdb.under -mv -f $@ $@.old -rm -f decoys/all.clashes* -rm -f decoys/all.breaks* sed -e s/XXX0000/${TARGET}/ -e s/try1/$*/ \ < ${CASP6}/starter-directory/score-all.under \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp ${PCB}/sorttbl cost < $@ > sort.tmp mv -f sort.tmp $@ gzip -9f decoys/all.clashes gzip -9f decoys/all.breaks # edit superimpose-best.under to pick which models to superimpose best-models.pdb.gz: decoys/read-pdb.under superimpose-best.under nice -1 ${UNDERTAKER} < superimpose-best.under -gzip -f best-models.pdb ############################################ # FETCHING ROBETTA MODELS FROM CASP6 CACHE # ############################################ decoys/robetta-model%.pdb.gz: wget -N \ 'http://robetta.bakerlab.org/servlet/robetta.GetModel?m=$*&t=${TARGET}'\ -O ${@:.gz=} if grep 'DOES NOT EXIST' ${@:.gz=} ; then rm ${@:.gz=} ; else gzip -9f ${@:.gz=} ; fi fetch_robetta: $(foreach m,1 2 3 4 5 6 7 8 9 10,decoys/robetta-model${m}.pdb.gz) ###################################### # USING ROSETTA TO REPACK SIDECHAINS # ###################################### # Which version of rosetta to use ifndef ROSETTA ROSETTA = /projects/compbio/usr/karplus/rosetta endif paths.txt: ${CASP6}/starter-directory/paths.txt cp -p $^ $@ ifdef DISULF_FILE DISULF_ARGS= -fix_disulf ${DISULF_FILE} -norepack_disulf else DISULF_ARGS= -find_disulf -norepack_disulf endif %.repack.res: %.a2m ${CASP6_SCRIPTS}/make-repack-res-file -start_col ${START_COL} < $^ > $@ %.dimer.repack.res: %.a2m ${CASP6_SCRIPTS}/make-repack-res-file -multimer 2 -start_col ${START_COL} < $^ > $@ # score a file using Rosetta, producing an annotated .score.pdb file # and adding to decoys/%.fasc decoys/%.score.pdb: decoys/%.pdb paths.txt ${ROSETTA} \ -s $< -read_all_chains \ -score -scorefile $* \ -decoystats \ -fa_output -fa_input\ ${DISULF_ARGS} \ -nstruct 1 mv decoys/$*_0001.pdb decoys/$*.score.pdb # repack side chains using Rosetta score mode # BUGGY: Rosetta screws up prolines and disulfides. decoys/%.repack-score.pdb.gz: decoys/%.pdb paths.txt ${ROSETTA} aa XXXX A \ -s $< -read_all_chains \ -score -scorefile $* \ -fa_output -fa_input \ -ex1 -ex1aro -ex2 -ex34 \ -try_both_his_tautomers \ -use_input_sc -repack \ ${DISULF_ARGS} \ -nstruct 1 mv decoys/aa$*_0001.pdb decoys/$*.repack-score.pdb gzip -9f decoys/$*.repack-score.pdb # Shorthand for a common request---do an optimization run and repack ${TARGET}.do%: ${MAKE} -k decoys/${TARGET}.try$*-opt2.pdb.gz \ decoys/${TARGET}.try$*-opt2.repack-nonPC.pdb.gz \ decoys/grep-best-rosetta \ decoys/score-all.try$*.rdb decoys/score-all.try$*.pretty # Shorthand for a common repacking request--just make "try12.repack" %.repack: ${MAKE} -k decoys/${TARGET}.$*-opt2.repack-nonPC.pdb.gz decoys/score-all.$*.rdb decoys/score-all.$*.pretty # repack sidechains using Rosetta design mode, not changing # CYS and PRO residues decoys/%.repack-nonPC.pdb.gz: decoys/%.pdb paths.txt ${TARGET}.repack.res cp -f $< XXXX.pdb cp ${TARGET}.a2m XXXXA.fasta ${ROSETTA} aa XXXX A \ -s XXXX.pdb \ -scorefile $* \ -read_all_chains \ -design -fixbb -resfile ${TARGET}.repack.res \ -fa_output -fa_input \ -ex1 -ex2 -ex34 \ -use_input_sc \ ${DISULF_ARGS} \ -nstruct 1 \ > $*.repack.log 2>&1 -gzip -9f $*.repack.log rm XXXX.pdb XXXXA.fasta mv decoys/XXXX_0001.pdb decoys/$*.repack-nonPC.pdb -gzip -9f decoys/$*.repack-nonPC.pdb # same as above, but starting with gzipped pdb file. decoys/%.repack-nonPC.pdb.gz: decoys/%.pdb.gz paths.txt ${TARGET}.repack.res gunzip -c $< > XXXX.pdb cp ${TARGET}.a2m XXXXA.fasta ${ROSETTA} aa XXXX A \ -s XXXX.pdb \ -scorefile $* \ -read_all_chains \ -design -fixbb -resfile ${TARGET}.repack.res \ -fa_output -fa_input \ -ex1 -ex2 -ex34 \ -use_input_sc \ ${DISULF_ARGS} \ -nstruct 1 rm XXXX.pdb XXXXA.fasta mv decoys/XXXX_0001.pdb decoys/$*.repack-nonPC.pdb -gzip -9f decoys/$*.repack-nonPC.pdb # repack sidechains using Rosetta design mode, not changing # CYS and PRO residues dimer%.repack-nonPC.pdb.gz: dimer%.pdb paths.txt ${TARGET}.dimer.repack.res cp -f $< XXXX.pdb cp ${TARGET}.a2m XXXXA.fasta grep -v '>' ${TARGET}.a2m >> XXXXA.fasta ${ROSETTA} aa XXXX A \ -s XXXX.pdb \ -scorefile $* \ -read_all_chains \ -design -fixbb -resfile ${TARGET}.dimer.repack.res \ -fa_output -fa_input \ -ex1 -ex2 -ex34 \ -use_input_sc \ ${DISULF_ARGS} \ -nstruct 1 \ > dimer$*.repack.log 2>&1 -gzip -9f dimer$*.repack.log rm XXXX.pdb XXXXA.fasta mv decoys/XXXX_0001.pdb dimer$*.repack-nonPC.pdb gzip -9f dimer$*.repack-nonPC.pdb # The following full-atom relax does not seem to be working yet. decoys/%.relax.pdb: decoys/%.pdb paths.txt cp -f $< XXXX.pdb cp ${TARGET}.a2m XXXXA.fasta ${ROSETTA} aa XXXX A \ -s $< -read_all_chains \ -relax -minimize -farlx \ -new_refold \ -scorefile $* \ -fa_output -fa_input \ -nstruct 1 rm XXXX.pdb XXXXA.fasta mv decoys/aa$*_0001.pdb decoys/$*.relax.pdb decoys/grep-best-rosetta: decoys ${CASP6_SCRIPTS}/sort-by-rosetta ########################## # MAKING CASP SUMBISSION # ########################## ifdef METHOD_FILE USE_CASP_METHOD := -method ${METHOD_FILE} else USE_CASP_METHOD := endif ifdef MANUAL_TOP_HITS USE_CASP_PARENT := -parent '${MANUAL_TOP_HITS}' else USE_CASP_PARENT := -parent "N/A" endif model%.ts: best-models.pdb.gz model%.method gunzip -c $< \ | ${CASP6_SCRIPTS}/pdb2casp \ -target ${TARGET} -author ${CASP6_ID} \ ${USE_CASP_METHOD} ${USE_CASP_PARENT} casp_models: $(foreach x,1 2 3 4 5, model${x}.ts) model%.email: model%.ts mail -s 'SAM-T04 hand ${TARGET}' ${CASP6_SUBMIT} < $^ -mv -f $^ $^-submitted echo mailing model $* done email: $(foreach x,1 2 3 4 5, model${x}.email) ################################ # EVALUATING THE FINAL RESULTS # ################################ ${TARGET}.real.pdb.gz: ${CASP6}/casp6-real-structures.tar.gz gunzip -c $^ | tar xf - ${TARGET}.pdb if test -s ${TARGET}.pdb ;\ then mv ${TARGET}.pdb ${TARGET}.real.pdb ;\ gzip -9f ${TARGET}.real.pdb ;\ fi decoys/evaluate_%.rdb: decoys/read-pdb.under ${TARGET}_%.real.pdb.gz -rm -f $@ sed -e s/XXX0000/${TARGET}/ -e s/_domain/_$*/ \ -e "s/# DO READ/ReadConformPDB ${TARGET}_$*.real.pdb/" \ < ${CASP6}/starter-directory/evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp ${PCB}/sorttbl real_cost < $@ > sort.tmp mv -f sort.tmp $@ ifdef REAL_PDB decoys/evaluate.rdb: decoys/read-pdb.under -rm -f $@ sed -e s/XXX0000/${TARGET}/ -e s/REAL_PDB/${REAL_PDB}/ \ -e "s/_domain//" \ -e "s/# DO READ/ReadConformPDBids/" \ < ${CASP6}/starter-directory/evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp ${PCB}/sorttbl smooth_GDT < $@ > sort.tmp mv -f sort.tmp $@ else decoys/evaluate.rdb: decoys/read-pdb.under ${TARGET}.real.pdb.gz -rm -f $@ sed -e s/XXX0000/${TARGET}/ \ -e "s/_domain//" \ -e "s/# DO READ/ReadConformPDB ${TARGET}.real.pdb/" \ < ${CASP6}/starter-directory/evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp ${PCB}/sorttbl smooth_GDT < $@ > sort.tmp mv -f sort.tmp $@ endif # FINAL_COSTFCN was used for seeing how the costfcn compared with the # real costs, but has been removed from evaluate.under ifndef FINAL_COSTFCN FINAL_COSTFCN:=try1 endif decoys/evaluate.%.rdb: %.costfcn decoys/read-pdb.under -rm -f $@ sed -e s/XXX0000/${TARGET}/ -e s/try1/$*/ \ -e s/_domain// \ -e s/REAL_PDB/${REAL_PDB}/ \ < ${CASP6}/starter-directory/evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp ${PCB}/sorttbl real_cost < $@ > sort.tmp mv -f sort.tmp $@ ############################################# # GROMACS optimization # Sat Jan 22 19:31:50 PST 2005 Kevin Karplus ############################################# %.gromacs0.pdb.gz: %.pdb.gz gunzip -c $^ \ | ${CASP6_SCRIPTS}/run-gromacs -force_field 0 \ -nokeeptmp -tmp /tmp \ | gzip -9 \ > $@ %.gromacs4.pdb.gz: %.pdb.gz gunzip -c $^ \ | ${CASP6_SCRIPTS}/run-gromacs -force_field 4 \ -nokeeptmp -tmp /tmp \ | gzip -9 \ > $@ # Shorthand for a common request: do undertaker then gromacs ${TARGET}.gro%: ${MAKE} -k decoys/${TARGET}.try$*-opt2.pdb.gz \ decoys/${TARGET}.try$*-opt2.gromacs0.pdb.gz \ decoys/${TARGET}.try$*-opt2.gromacs4.pdb.gz \ decoys/score-all.try$*.rdb decoys/score-all.try$*.pretty ifdef MONOMER_LENGTH # Rule for unpacking a single chain into a homo-multimer: %.unpack.pdb.gz: %.pdb.gz gunzip -c $^ \ | ${CASP6_SCRIPTS}/unpack-multimer -length ${MONOMER_LENGTH} \ | gzip > $@ # The multgro target is needed for multimers, # to make sure that gromacs sees separate chains on its inputs, # and to resep ${TARGET}.multgro%: ${MAKE} -k decoys/${TARGET}.try$*-opt2.pdb.gz \ decoys/${TARGET}.try$*-opt2.unpack.pdb.gz \ decoys/${TARGET}.try$*-opt2.unpack.gromacs0.unpack.pdb.gz \ decoys/${TARGET}.try$*-opt2.unpack.gromacs4.unpack.pdb.gz \ decoys/score-all.try$*.rdb decoys/score-all.try$*.pretty endif