# MACROS that need to be set: # TARGET := SET_IN_Makefile # START_COL defaults to 1 # PREFERRED_AL_METHOD defaults to t06 # MANUAL_TOP_HITS which chains to make pairwise alignments for # in extra_alignments and which to make # read_alignments.under files for in read_alignments # (default empty) # if NO_SUMMARY is set, then don't do any of the output to the # summary.html file (useful for updating bits and pieces) # what is first column for numbering alignments and mutual information ifndef START_COL START_COL := 1 endif # export all variables to sub-makes # export # Tue Jun 13 14:41:47 PDT 2006 Kevin Karplus # This "export" was killing the makes with # "execvp: ...: Argument list too long" # error messages. # Commenting it out fixed the problems. # (even if someone turns off export, still export these three) export TARGET export LCTARGET export START_COL # TNUM is the TARGET missing the first T TNUM := ${subst T,,${TARGET}} ifndef PREFERRED_AL_METHOD PREFERRED_AL_METHOD:=t06 endif ifndef AL_METHOD AL_METHOD:=${PREFERRED_AL_METHOD} endif TARG_AL := ${TARGET}.${AL_METHOD} ifndef PREFERRED_RR_EXT PREFERRED_RR_EXT:=449a_45 endif # TO DO: # Clean up (get rid of?) script make-alignments, # using standard programs to get hit list from *best-scores.rdb # into a list that can be run with $(foreach ...) # the way that extra_alignments now are. # REDO_SEARCHES if set, causes the template library to be # included in the dependencies of the searches. # (if not defined, then template library not in dependencies # REDO_T2K if set, causes NR to be included in the dependencies # for the t2k iterative search, usually triggering rebuilding # the t2k.a2m.gz file. # REDO_T04 if set, causes NR to be included in the dependencies # for the t04 iterative search, usually triggering rebuilding # the t04.a2m.gz file. # REDO_MI if set, add dependency on correlated_columns executable. # Useful if the definition of the mi.rdb files changes. # macros that control the t2k iterative search # BLAST_MAX default 10,000, set it smaller for long proteins with many # homologs that take a long time to run. # ADPSTYLE default 5, set it to 1 for long proteins that cause hmmscore # to crash in the final alignment step of target2k # macros that control compression # NOGZIP_PDB if set causes undertaker.pdb file not to be gzipped. # macros used in recursive makes for pairwise alignments: # MANUAL_TOP_HITS which chains to make pairwise alignments for # provided manually # PRED needs to be set to template chain ID in recursive makes for # pairwise alignments # PRED2 first two letters of PRED, now computed # automatically from PRED. # ALIGN_VITERBI if set, causes only ADP=1 and not ADP=5 alignments to # be tried # The following macros are set automatically when making pairwise alignments. # ALIGN_TYPE local or global, set when generating pairwise alignments # ALIGN_NAME # ADP # TWO_TRACK_ADP (used to set ADP for two-track alignments) # SW # FSSP # FSSP2 # STRUCT_ALPH (also used for generating secondary structure predictions) # BURIAL_ALPH (used for 3-track HMMs) # AA_WEIGHT # STRUCT_WEIGHT # BURIAL_WEIGHT PID := ${shell echo $$$$} # get the version of the compiler used on this machine # ifndef GCC_VERSION # GCC_VERSION := ${shell g++ --version} # endif # ifneq '${words ${GCC_VERSION}}' '1' # GCC_VERSION := ${word 3,${GCC_VERSION}} # endif # Normally compute mutual information for all column pairs, # but turn it off if NO_MUTUAL is set. # (computation is now fast enough that this shouldn't be necessary) ifndef NO_MUTUAL MUTUAL_DEPEND := else MUTUAL_DEPEND := endif PCEP := /projects/compbio/experiments/protein-predict YEAST := /projects/compbio/experiments/protein-predict/yeast YEAST_SCRIPTS := ${YEAST}/scripts CASP6 := /projects/compbio/experiments/protein-predict/casp6 CASP6_NETWORKS := ${CASP6}/networks CASP6_SCRIPTS := ${CASP6}/scripts CASP7 := ${PCEP}/casp7 STARTER := ${CASP7}/starter-directory CASP7_SCRIPTS := ${CASP7}/scripts CASP7_NETWORKS := ${CASP7}/networks QA_DATA := ${STARTER}/qa_data QA_SOURCE := ${CASP7}/${TARGET} # AUTHOR CODE for CASP7 submissions of SAM-T06 hand prediction group CASP7_ID := 5370-1100-4902 CASP_ID := ${CASP7_ID} ifndef SAM_YEAR SAM_YEAR := 06 endif ifndef CASP_WEBSITE CASP_WEBSITE := http://predictioncenter.org/casp7/targets/templates endif CASP_SUBMIT := submit@predictioncenter.org ifndef UNAME_M UNAME_M := $(shell uname -m) endif ifndef UNAME_P UNAME_P := $(shell uname -p) endif PCL := /projects/compbio/lib PCB := /projects/compbio/bin PCBS := ${PCB}/scripts EXTRACT := ${PCBS}/extract-from-fasta PCB_SUB := ${PCB}/${UNAME_P} FRAGFINDER := ${PCB_SUB}/fragfinder BIN_SAM := ${PCB_SUB} BIN_PREDICT2ND := ${PCB_SUB} # where the residue-residue contact prediction is done: RR_BIN:= ${CASP7}/bin RR_DATA:= ${CASP7}/data RR_NETWORKS := ${CASP7_NETWORKS} UNDERTAKER_SCRIPTS := /projects/compbio/experiments/undertaker/scripts UNDERTAKER ?= /projects/compbio/programs/undertaker/undertaker # Following path used for testing of modified undertaker source # UNDERTAKER ?= /projects/compbiousr/hyjkim/Spring08/undertaker/undertaker # Temporary addition to check the hbond and str4 costfcns # UNDERTAKER := /cluster/home/thiltgen/undertaker ifndef RASMOL RASMOL:= ${PCB_SUB}/rasmol endif ifndef WORKDIR WORKDIR := $(shell pwd) endif ifndef HOST HOST := $(shell hostname) endif MAKE := /usr/bin/gmake PCEM := /projects/compbio/experiments/models.97 PCEM_SCRIPTS := ${PCEM}/scripts PCEM_SCRIPTS2K := ${PCEM}/scripts2k PCEM_SCRIPTS04 := ${PCEM}/scripts04 PCEM_INDEXES := /projects/compbio/experiments/models.97/indexes HMMSCORE := ${BIN_SAM}/hmmscore A2M2HTML := ${PCBS}/a2m2html # A2M2HTML := /projects/compbio/experiments/protein-predict/SAM_T02/scripts/a2m2html # SCOP := /projects/compbio/data/scop/dir.cla.scop.txt_casp6.gz SCOP := /projects/compbio/data/scop/dir.cla.scop.txt.gz ANNOTATE :=${CASP7_SCRIPTS}/annotate_target_scores -scop ${SCOP} MAIN_PRINTING_SCRIPT := ${CASP7_SCRIPTS}/handle_summary_html.pl # Note about MAIN_PRINTING_SCRIPT: # handle_summary_html.pl takes in an argument (when called on the command line) # that tells it what action to perform. For example, it used to be that there # was an "add_summary" perl script. Now, however, you call handle_summary_html.pl add_summary # to accomplish the same thing. "add_summary" is the argument to the script. # Additional arguments may also be included, depending on the action being performed. # Check scripts/handle_summary_html.pl to see what actions can be performed. # how many residues wide should each row of the logos be? # Ideally, we'd like this to be computed from the sequence length, # with length<=200 yielding 50 # 200> README date >> README echo "Running on "${HOST} >> README date #create html results page summary_create: ${TARGET}.a2m ifndef NO_SUMMARY ${MAIN_PRINTING_SCRIPT} create_summary_html \ ${TARGET} ${TARGET}.a2m> ${WORKDIR}/summary.html endif #start the inputs section header_inputs: ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_section_head_summary_html \ Inputs >> ${WORKDIR}/summary.html endif #acknowledge sequence receipt #add pointer to sequence file to the html results page receipt_ack: ${TARGET}.a2m echo Received sequence ${TARGET}.a2m ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_html \ "Submitted sequence(s)" \ ${TARGET}.a2m >> ${WORKDIR}/summary.html -${MAIN_PRINTING_SCRIPT} add_summary_html \ "README file" \ README >> ${WORKDIR}/summary.html -${MAIN_PRINTING_SCRIPT} add_summary_html \ "Provided documentation" \ ${TARGET}.doc.html >> ${WORKDIR}/summary.html endif pdb_blast: ${TARGET}.pdb.blast ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_html \ "Quick blastp of non-redundant PDB" \ $^ >> ${WORKDIR}/summary.html endif header_alignment: ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_section_head_summary_html \ 'Multiple alignment' >> ${WORKDIR}/summary.html endif define build_multiple_op -${MAKE} -k AL_METHOD=${AL_METHOD} \ build_multiple_alignment build_pretty build_mod w0.5_logo conserved_script 1.small_divider endef build_multiple: ${TARGET}.upper-only.a2m $(foreach AL_METHOD,${MA_METHODS}, $(call build_multiple_op)) build_multiple_alignment: ${TARG_AL}.a2m.gz ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_html \ "SAM_${AL_METHOD} multiple alignment in a2m format" \ $^ >> ${WORKDIR}/summary.html endif build_pretty: ${TARG_AL}.pa.html ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_html \ "SAM_${AL_METHOD} multiple alignment in pretty html format" \ $^ >> ${WORKDIR}/summary.html endif build_mod: ${TARG_AL}.w0.5.mod ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_html \ "SAM_${AL_METHOD} target hidden Markov model" \ $^ >> ${WORKDIR}/summary.html endif w0.5_logo: ${TARG_AL}.w0.5-logo.eps ${TARG_AL}.w0.5-logo.pdf ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_multiple_formats \ "SAM_${AL_METHOD} multiple alignment---sequence logo" \ ${TARG_AL}.w0.5-logo \ eps pdf >> ${WORKDIR}/summary.html endif conserved_script: conserved_${AL_METHOD} ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_html \ "Script for $* highlighting in rasmol" \ $^ >> ${WORKDIR}/summary.html endif #start the local structure section header_local_structure: ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_section_head_summary_html \ '\ Secondary Structure Prediction\ \ (Explanation of secondary-structure predictions) \ ' \ >> ${WORKDIR}/summary.html endif # returns before and after make are important here define do_secondary_op -${MAKE} -k AL_METHOD=${AL} STRUCT_ALPH=${STRUCT_ALPH} do_secondary do_secondary_logo endef # returns before and after make are important here define do_burial_op -${MAKE} -k AL_METHOD=${AL} \ STRUCT_ALPH=${STRUCT_ALPH} STRUCT_ALPH=${STRUCT_ALPH} do_burial_or_secondary \ ${TARGET}.${AL}.${STRUCT_ALPH}.mod do_secondary_logo endef local_structure: $(foreach AL,${MA_METHODS},${TARGET}.${AL}-thin90.a2m.gz) $(foreach STRUCT_ALPH,${PURE_SECONDARY_ALPHABETS},\ $(foreach AL,${MA_METHODS},$(call do_secondary_op)) \ ${MAKE} 1.small_divider;) -${MAKE} -k MERGE_SECONDARY=1 STRUCT_ALPH=dssp-ehl2 \ do_secondary do_secondary_logo mail_secondary 1.small_divider $(foreach STRUCT_ALPH,${BURIAL_ALPHABETS},\ $(foreach AL,${MA_METHODS},$(call do_burial_op))\ ${MAKE} 1.small_divider) ifdef MERGE_SECONDARY SECONDARY_TARGET := ${TARGET}.${STRUCT_ALPH} else SECONDARY_TARGET := ${TARG_AL}.${STRUCT_ALPH} endif # $(warning SECONDARY_TARGET = ${SECONDARY_TARGET}) do_burial_or_secondary: ${SECONDARY_TARGET}.rdb \ ${SECONDARY_TARGET}.seq \ ${SECONDARY_TARGET}-color.rasmol ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_multiple_formats \ "${STRUCT_ALPH} structure prediction" \ ${SECONDARY_TARGET} \ rdb seq \ >> ${WORKDIR}/summary.html -${MAIN_PRINTING_SCRIPT} add_summary_html \ "Script for ${SECONDARY_TARGET} coloring in rasmol" \ ${SECONDARY_TARGET}-color.rasmol >> ${WORKDIR}/summary.html endif ifeq (${AL_METHOD},${PREFERRED_AL_METHOD}) -ln -sf ${SECONDARY_TARGET}-color.rasmol ${COLOR_SCRIPT_SHORT_NAME} endif do_secondary: do_burial_or_secondary \ ${SECONDARY_TARGET}.constraints do_secondary_logo: ${SECONDARY_TARGET}.mod ${SECONDARY_TARGET}-logo.eps ${SECONDARY_TARGET}-logo.pdf ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_multiple_formats \ "${STRUCT_ALPH}---sequence logo" \ ${SECONDARY_TARGET}-logo \ eps pdf \ >> ${WORKDIR}/summary.html endif ifndef ${EMAIL_ADDRESS} EMAIL_ADDRESS := ${CASP_SUBMIT} endif ifndef EMAIL_SECONDARY_SUBJECT EMAIL_SECONDARY_SUBJECT := SAM-${SAM_YEAR} hand ${TARGET} endif ifdef EMAIL_SECONDARY mail_secondary: ${SECONDARY_TARGET} mail -s '${EMAIL_SECONDARY_SUBJECT} $^' ${EMAIL_ADDRESS} < $^ echo mailing $^ to ${EMAIL_ADDRESS} done else mail_secondary: echo no email address to mail ${SECONDARY_TARGET} to. endif %.small_divider: date ifndef NO_SUMMARY # Prints a small
divider into the file. echo '
' >> ${WORKDIR}/summary.html endif #start the target model scores section header_target_mod_scores: ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_section_head_summary_html \ "Target model scores" >> ${WORKDIR}/summary.html endif 2track_target_mod_scores: \ 2track_t06_target_mod_scores \ 2.small_divider \ 2track_t04_target_mod_scores \ 2.small_divider \ 2track_t2k_target_mod_scores 2track_%_target_mod_scores: -$(foreach x,${SECONDARY_ALPHABETS},\ ${MAKE} -k STRUCT_ALPH=${x} AL_METHOD=$* \ ${TARGET}.$*-100-30-${x}.mlib \ ${TARGET}.$*-100-30-${x}.dist \ $*-100-30-${x}-scores;) -${MAKE} -k STRUCT_ALPH=str2 BURIAL_ALPH=CB_burial_14_7 \ AL_METHOD=$* \ $*-100-40-40-str2+CB_burial_14_7-scores -${MAKE} -k STRUCT_ALPH=str2 BURIAL_ALPH=near-backbone-11 \ AL_METHOD=$* \ $*-80-60-80-str2+near-backbone-11-scores ${AL_METHOD}-%-scores: ${TARG_AL}-%-scores.rdb \ ${TARG_AL}-%-scores.html ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_html \ "amino acid/$* multi-track target model scores" \ ${TARG_AL}-$*-scores.html >> ${WORKDIR}/summary.html endif 1track_target_mod_scores: -$(foreach d,${MA_METHODS}, \ ${MAKE} -k AL_METHOD=${d} 1track_target_mod_scores_${d};) 1track_target_mod_scores_${AL_METHOD}: ${TARG_AL}.w0.5.mlib \ ${TARG_AL}.w0.5.dist \ ${TARG_AL}-w0.5-scores.rdb \ ${TARG_AL}-w0.5-scores.html ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_html \ "amino acid single-track target model scores of PDB" \ ${TARG_AL}-w0.5-scores.html >> ${WORKDIR}/summary.html endif #start the template model scores section header_template_mod_scores: ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_section_head_summary_html \ "Template model scores" >> ${WORKDIR}/summary.html endif template_mod_scores: $(foreach AL,${MA_METHODS}, \ ${TARGET}.${AL}-template-lib-scores.rdb \ ${TARGET}.${AL}-template-lib-scores.html) ifndef NO_SUMMARY $(foreach AL,${MA_METHODS}, \ ${MAIN_PRINTING_SCRIPT} add_summary_html \ "Annotated ${AL} template model scores" \ ${TARGET}.${AL}-template-lib-scores.html >> ${WORKDIR}/summary.html;) endif #start the top hits section header_top_hits: ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_section_head_summary_html \ "Top Hits" >> ${WORKDIR}/summary.html endif top_hits: -$(foreach d,${MA_METHODS}, \ ${MAKE} -k AL_METHOD=${d} \ ${TARGET}.${d}.best-scores.rdb ${TARGET}.${d}.best-scores.html;) ${MAKE} -k ${TARGET}.best-scores.rdb ${TARGET}.best-scores.html ifndef NO_SUMMARY -$(foreach d,${MA_METHODS}, \ ${MAIN_PRINTING_SCRIPT} add_summary_html \ "Best scoring hits from $d HMMs" \ ${TARGET}.$d.best-scores.html >> ${WORKDIR}/summary.html;) -${MAIN_PRINTING_SCRIPT} add_summary_html \ "Best scoring hits from combining ${MA_METHODS}" \ ${TARGET}.best-scores.html >> ${WORKDIR}/summary.html endif #start the top alignments section header_top_alignments: ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_section_head_summary_html \ "Alignments for Top Hits" >> ${WORKDIR}/summary.html endif # NUM_ALIGN_TEMPLATES specifies the number of templates to include # from each best-scores file when building pairwise alignments. ifndef NUM_ALIGN_TEMPLATES NUM_ALIGN_TEMPLATES := 40 NUM_ALIGN_TEMPLATES_PLUS_TWO :=42 endif build_top_alignments: $(foreach A,${MA_METHODS},${TARGET}.${A}.best-scores.rdb ) \ ${TARGET}.best-scores.rdb ${MAKE} -k ${TARGET}.mod $(foreach R,$^, \ grep -v '^ ' < ${R} \ | head -${NUM_ALIGN_TEMPLATES_PLUS_TWO} \ > tmp-truncated-${R};) $(foreach R,$^, ${PCB}/sorttbl Sequence_ID < tmp-truncated-${R} > tmp-sorted-${R};) $(foreach R,$^, rm tmp-truncated-${R};) ${PCB}/mergetbl Sequence_ID < tmp-sorted-${TARGET}.best-scores.rdb \ tmp-sorted-${TARGET}.t2k.best-scores.rdb >tmp.merged.rdb ${PCB}/mergetbl Sequence_ID < tmp.merged.rdb \ tmp-sorted-${TARGET}.t04.best-scores.rdb >tmp.merged2.rdb ${PCB}/mergetbl Sequence_ID < tmp.merged2.rdb \ tmp-sorted-${TARGET}.t04.best-scores.rdb >tmp.merged3.rdb $(foreach R,$^, rm tmp-sorted-${R};) ifdef ALIGN_VITERBI ${YEAST_SCRIPTS}/make-alignments ${TARGET} VITERBI_ALIGN=1 < tmp.merged3.rdb else ${YEAST_SCRIPTS}/make-alignments ${TARGET} < tmp.merged3.rdb endif rm tmp.merged.rdb tmp.merged2.rdb tmp.merged3.rdb show_top_alignments: $(foreach A,${MA_METHODS},${TARGET}.${A}.top_reported_alignments.html \ ${TARGET}.${A}.top_reported_alignments.rdb ) \ ${TARGET}.top_reported_alignments.rdb \ ${TARGET}.top_reported_alignments.html ifndef NO_SUMMARY -$(foreach A,${MA_METHODS}, ${MAIN_PRINTING_SCRIPT} add_summary_html \ "Top alignments from ${A} hits" \ ${TARGET}.${A}.top_reported_alignments.html >> ${WORKDIR}/summary.html ; ) -${MAIN_PRINTING_SCRIPT} add_summary_html \ "Top alignments from combined hits" \ ${TARGET}.top_reported_alignments.html >> ${WORKDIR}/summary.html endif # do recursive make to make sure that the wildcard in all-align.a2m.gz is up to date all_align: -${MAKE} -k all-align.a2m.gz all-align.pa ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_multiple_formats \ "multiple alignment of templates" \ all-align \ a2m.gz pa \ >> ${WORKDIR}/summary.html endif undertaker_start_section: ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_section_head_summary_html \ "Undertaker (3d) files" >> ${WORKDIR}/summary.html endif %.undertaker-align.under: %.top_reported_alignments.rdb ${CASP7_SCRIPTS}/make_undertaker_alignment_list < $^ > $@ undertaker_from_many: $(foreach A,${MA_METHODS},${TARGET}.${A}.undertaker-align.under )\ ${TARGET}.undertaker-align.under ifndef NO_SUMMARY -$(foreach A,${MA_METHODS}, ${MAIN_PRINTING_SCRIPT} add_summary_html \ "Undertaker input for top ${A} alignments" \ ${TARGET}.${A}.undertaker-align.under >> ${WORKDIR}/summary.html ;) -${MAIN_PRINTING_SCRIPT} add_summary_html \ "Undertaker input for top combined alignments" \ ${TARGET}.undertaker-align.under >> ${WORKDIR}/summary.html endif ifdef NOGZIP_PDB PDBEXT := pdb else PDBEXT := pdb.gz endif undertaker_show_alignment: ${TARGET}.undertaker-align.${PDBEXT} ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_html \ 'PDB file with \ \ model(s) for top alignments' \ $^ >> ${WORKDIR}/summary.html endif #BUG: the .make_jpeg target causes the top_alignments.rdb file to be remade, # which in turn causes the undertaker-align.${PDBEXT} file to be remade. undertaker_pictures: ${TARGET}.undertaker-align.make_jpeg ${CASP7_SCRIPTS}/add_jpeg_views_html \ -basename ${TARGET}.undertaker-align \ -caption "Images of an (incomplete) model created by sidechain replacement on the backbone of the highest scoring template." \ >> ${WORKDIR}/summary.html frag_a2m: $(foreach AL_METHOD, ${MA_METHODS}, ${MAKE} AL_METHOD=${AL_METHOD} ${TARGET}.${AL_METHOD}.many.frag.gz;) ifndef NO_SUMMARY $(foreach AL_METHOD, ${MA_METHODS}, \ ${MAIN_PRINTING_SCRIPT} add_summary_html \ "Fragment list for undertaker (from fragfinder)" \ ${TARGET}.${AL_METHOD}.many.frag.gz >> ${WORKDIR}/summary.html; \ ) endif define extra_op -${MAKE} -k PRED=${x} \ single-track-target-alignments \ template-alignments \ two-track-alignments \ three-track-alignments endef extra_alignments: $(foreach x,${MANUAL_TOP_HITS}, $(call extra_op)) read_alignments: \ $(foreach x,${MANUAL_TOP_HITS},${x}/read-alignments-noscwrl.under ${x}/read-alignments-scwrl.under) undertaker_try1: try1.under try1.costfcn ${TARGET}.undertaker-align.sheets -${MAKE} ${TARGET}.do1 ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_html \ "First attempted model using undertaker" \ decoys/${TARGET}.try1-opt2.pdb.gz >> ${WORKDIR}/summary.html endif undertaker_try1_pictures: decoys/${TARGET}.try1-opt2.make_jpeg ${CASP7_SCRIPTS}/add_jpeg_views_html \ -basename decoys/${TARGET}.try1-opt2 \ -caption "Images of complete model in decoys/${TARGET}.try1-opt2.pdb.gz" \ >> ${WORKDIR}/summary.html score_all: decoys/score-all.try1.rdb ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_html \ "Undertaker scores for decoys" \ $^ >> ${WORKDIR}/summary.html endif # end html results page summary_end: ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} end_summary_html \ >> ${WORKDIR}/summary.html endif ################# # web-interface # ################# %.end_section: date ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} end_section_summary_html \ >> ${WORKDIR}/summary.html endif ############################################### # common operations, applicable to many files # ############################################### rm_empty: find . -empty -exec rm -f '{}' \; -print %.padded.seq: %.seq ${TARGET}.a2m ${PCEM_SCRIPTS}/add-inserts $^ > $@ #thin the alignment to 90% sequence identity for use with the neural nets #(which were trained on thinned alignments) %-thin90.a2m.gz: %.a2m.gz ${BIN_SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.90 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ %-thin62.a2m.gz: %.a2m.gz ${BIN_SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.62 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ %-thin50.a2m.gz: %.a2m.gz ${BIN_SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.50 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ %-thin40.a2m.gz: %.a2m.gz ${BIN_SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.40 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ %-thin35.a2m.gz: %.a2m.gz ${BIN_SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.35 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ %-thin30.a2m.gz: %.a2m.gz ${BIN_SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.30 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ #convert an a2m.gz alignment to a human-readable model %.w0.5.mod: %.a2m.gz ${PCBS}/w0.5 $^ $@.tmp -${BIN_SAM}/hmmconvert $*.w0.5 -model_file $@.tmp -rm -f $@.tmp %-w1.0.mod: %.a2m.gz ${PCBS}/w1.0 $^ $@ #compress a file %.gz: % gzip -f $^ #make a pretty-aligned alignment from a compressed a2m alignment %.pa: %.a2m.gz ${BIN_SAM}/prettyalign $^ -m8 -i -n -L3333 > $@ %.pa: %.a2m ${BIN_SAM}/prettyalign $^ -m8 -i -n -L3333 > $@ # Make a2m file with dots. %.dotted-a2m: %.a2m.gz ${BIN_SAM}/prettyalign $^ -f \ | grep -v '^;' > $@ %.dotted-a2m: %.a2m ${BIN_SAM}/prettyalign $^ -f \ | grep -v '^;' > $@ %.pa.html: %.a2m.gz gunzip -c $^ > tmp.a2m ${A2M2HTML} -a2m_in tmp.a2m > $@ rm tmp.a2m %.pa.html: %.a2m ${A2M2HTML} -a2m_in $^ > $@ ################################## # ANNOTATING A TARGET # ################################## # secondary structure prediction # ################################## # PREDICT_2ND := /cse/faculty/karplus/dna/predict-2nd/predict-2nd PREDICT_2ND := /projects/compbio/programs/predict-2nd/bin/i686/opt/predict-2nd TEMPLATE_SEQS := ${PCEM_INDEXES}/${AL_METHOD}.x-seqs MIXTURE := ${PCL}/recode3.20comp TRANS_REG := ${PCL}/fssp-trained.regularizer TWOTRACKDBS := ${TEMPLATE_SEQS},${TEMPLATE_STRUCT} ifdef REDO_SEARCHES TWOTRACK_DEPEND := ${TEMPLATE_SEQS} ${TEMPLATE_STRUCT} else TWOTRACK_DEPEND := endif # $(warning PREDICT_NET is ${PREDICT_NET}) ifdef PREDICT_NET %.${STRUCT_ALPH} %.${STRUCT_ALPH}.rdb %.${STRUCT_ALPH}.seq : %-thin90.a2m.gz ${PREDICT_NET} ifdef STRUCT_ALPH_FILE echo ReadAlphabet ${STRUCT_ALPH_FILE} > tmp.script else echo > tmp.script endif echo ReadNeuralNet ${PREDICT_NET} >> tmp.script echo ReadA2M $< >> tmp.script echo PrintPredictionFasta $*.${STRUCT_ALPH}.seq >> tmp.script echo PrintRDB $*.${STRUCT_ALPH}.rdb >> tmp.script ${PREDICT_2ND} < tmp.script rm tmp.script endif %.${STRUCT_ALPH}.constraints: %.${STRUCT_ALPH}.rdb ${CASP7_SCRIPTS}/constraints-from-rdb -start ${START_COL} < $^ > $@ %.${STRUCT_ALPH}.mod: %.${STRUCT_ALPH}.rdb ${PCEM_SCRIPTS}/2nd-rdb-to-sam-model -alphabet ${SAM_STRUCT_ALPH} $^ $@ EMAX_FOR_HMMS := 90.0 # This target creates the mlib and dist file with no scores. %-100-30-${STRUCT_ALPH}.dist %-100-30-${STRUCT_ALPH}.mlib: %.w0.5.mod \ %.${STRUCT_ALPH}.mod \ ${TWOTRACK_DEPEND} ${HMMSCORE} $*-100-30-${STRUCT_ALPH} \ -verbose 0 \ -calibrate 1 \ -alphabet protein,${SAM_STRUCT_ALPH} \ -trackmod $*.w0.5.mod,$*.${STRUCT_ALPH}.mod \ -db ${TWOTRACKDBS} \ -trackcoeff 1.0,0.3 \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 4 -Emax ${EMAX_FOR_HMMS} # This target remakes the dist file without recalibrating the library. %-100-30-${STRUCT_ALPH}.dist: %-100-30-${STRUCT_ALPH}.mlib \ ${TWOTRACK_DEPEND} ${HMMSCORE} dsspfoo \ -verbose 0 \ -modellibrary $< \ -db ${TWOTRACKDBS} \ -db_size ${LIBSIZE} \ -select_score 4 -Emax ${EMAX_FOR_HMMS} mv -f dsspfoo.1.$@ $@ %-scores.rdb: %.dist ${SCOP} ${ANNOTATE} $(subst .rdb,,$(subst ${TARGET}.,,$@)) < $< > $@ # # The following awkard syntax is to make sure that the template-lib-scores.html # # file is not incorrectly made without the initial pruning. # # This is ugly and ridiculous---a better naming convention would fix # # the problem more cleanly. # # BUG: Mon Sep 18 16:25:33 PDT 2006 Kevin Karplus # # Not only is it ugly, but it doesn't work, probably because the # # naming convention changed or the Make.main file was reordered. # # It hasn't worked right since the beginning of the summer (it # # was ok for T0283, but not subsequent ones). # # # Replaced by generic conversion below Mon Sep 18 16:46:38 PDT 2006 Kevin Karplus # # ${TARGET}.t2k-%-scores.html: ${TARGET}.t2k-%-scores.rdb # ${CASP7_SCRIPTS}/oneway_hits_rdb2html ${TARGET}.t2k.$*-scores < $^ > $@ # ${TARGET}.t04-%-scores.html: ${TARGET}.t04-%-scores.rdb # ${CASP7_SCRIPTS}/oneway_hits_rdb2html ${TARGET}.t04.$*-scores < $^ > $@ # ${TARGET}.t06-%-scores.html: ${TARGET}.t06-%-scores.rdb # ${CASP7_SCRIPTS}/oneway_hits_rdb2html ${TARGET}.t06.$*-scores < $^ > $@ # generic pattern ${TARGET}.%-scores.html: ${TARGET}.%-scores.rdb ${SCOP} if grep --silent 'X_CNT' $< ; then \ head -n 500 < $< \ | ${CASP7_SCRIPTS}/annotate_template_scores \ | ${CASP7_SCRIPTS}/oneway_hits_rdb2html $*-scores \ > $@ ; \ else ${CASP7_SCRIPTS}/oneway_hits_rdb2html ${TARGET}.$*-scores < $< > $@ ;\ fi # three-track str2 + CB_BURIAL_14_7 angle stuff: ifeq (${BURIAL_ALPH},CB_burial_14_7) SAM_BURIAL_ALPH :=CB_BURIAL_14_7 TEMPLATE_BURIAL:=${PCEM_INDEXES}/${AL_METHOD}.CB-burial-14-7s endif ifeq (${BURIAL_ALPH},near-backbone-11) SAM_BURIAL_ALPH := NEAR-BACKBONE-11 TEMPLATE_BURIAL:=${PCEM_INDEXES}/${AL_METHOD}.near-backbone-11s endif THREETRACKDBS := ${TEMPLATE_SEQS},${TEMPLATE_STRUCT},${TEMPLATE_BURIAL} ifdef REDO_SEARCHES THREETRACK_DEPEND := ${TEMPLATE_SEQS} ${TEMPLATE_STRUCT} ${TEMPLATE_BURIAL} else THREETRACK_DEPEND := endif # This target creates the mlib and dist file %-100-40-40-${STRUCT_ALPH}+${BURIAL_ALPH}.dist \ %-100-40-40-${STRUCT_ALPH}+${BURIAL_ALPH}.mlib: \ %.w0.5.mod %.${STRUCT_ALPH}.mod %.${BURIAL_ALPH}.mod \ ${THREETRACK_DEPEND} ${HMMSCORE} $*-100-40-40-${STRUCT_ALPH}+${BURIAL_ALPH} \ -verbose 0 \ -calibrate 1 \ -alphabet protein,${STRUCT_ALPH},${SAM_BURIAL_ALPH} \ -trackmod $*.w0.5.mod,$*.${STRUCT_ALPH}.mod,$*.${BURIAL_ALPH}.mod \ -db ${THREETRACKDBS} \ -trackcoeff 1.0,0.4,0.4 \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 4 -Emax ${EMAX_FOR_HMMS} # This target creates the dist file if the mlib files already exists. %-100-40-40-${STRUCT_ALPH}+${BURIAL_ALPH}.dist: %-100-40-40-${STRUCT_ALPH}+${BURIAL_ALPH}.mlib \ ${THREETRACK_DEPEND} ${HMMSCORE} ${BURIAL_ALPH}foo \ -verbose 0 \ -modellibrary $< \ -db ${THREETRACKDBS} \ -db_size ${LIBSIZE} \ -select_score 4 -Emax ${EMAX_FOR_HMMS} mv -f ${BURIAL_ALPH}foo.1.$@ $@ # This target creates the mlib and dist file %-80-60-80-${STRUCT_ALPH}+${BURIAL_ALPH}.dist \ %-80-60-80-${STRUCT_ALPH}+${BURIAL_ALPH}.mlib: \ %.w0.5.mod %.${STRUCT_ALPH}.mod %.${BURIAL_ALPH}.mod ${HMMSCORE} $*-80-60-80-${STRUCT_ALPH}+${BURIAL_ALPH} \ -verbose 0 \ -calibrate 1 \ -alphabet protein,${STRUCT_ALPH},${SAM_BURIAL_ALPH} \ -trackmod $*.w0.5.mod,$*.${STRUCT_ALPH}.mod,$*.${BURIAL_ALPH}.mod \ -db ${THREETRACKDBS} \ -trackcoeff 0.8,0.6,0.8 \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 4 -Emax ${EMAX_FOR_HMMS} # This target creates the dist file if the mlib files already exists. %-80-60-80-${STRUCT_ALPH}+${BURIAL_ALPH}.dist: %-80-60-80-${STRUCT_ALPH}+${BURIAL_ALPH}.mlib \ ${THREETRACK_DEPEND} ${HMMSCORE} ${BURIAL_ALPH}foo \ -verbose 0 \ -modellibrary $< \ -db ${THREETRACKDBS} \ -db_size ${LIBSIZE} \ -select_score 4 -Emax ${EMAX_FOR_HMMS} mv -f ${BURIAL_ALPH}foo.1.$@ $@ ########################## # TEMPLATE MODEL SCORING # ########################## ifdef REDO_SEARCHES T2K_TEMPLATE_LIB_DEPEND := ${PCEM_INDEXES}/t2k-w0.5-db.mlib T04_TEMPLATE_LIB_DEPEND := ${PCEM_INDEXES}/t04-w0.5-db.mlib T06_TEMPLATE_LIB_DEPEND := ${PCEM_INDEXES}/t06-w0.5-db.mlib else T2K_TEMPLATE_LIB_DEPEND := T04_TEMPLATE_LIB_DEPEND := T06_TEMPLATE_LIB_DEPEND := endif #template library scores %.t2k-template-lib-scores.rdb: ${T2K_TEMPLATE_LIB_DEPEND} guide.a2m.gz ${HMMSCORE} $*.t2k-template-lib \ -verbose 0 \ -modellibrary ${PCEM_INDEXES}/t2k-w0.5-db.mlib \ -db_size ${LIBSIZE} \ -db guide.a2m.gz -rdb 1 \ -select_score 4 -emax ${EMAX_FOR_HMMS} grep -v '^[#]' < $*.t2k-template-lib.dist-rdb \ | ${YEAST_SCRIPTS}/shorten_mod_names \ | ${PCB}/row SEQID eq ${TARGET} \ | ${PCB}/sorttbl EVALUE \ > $@ -rm $*.t2k-template-lib.dist-rdb %.t04-template-lib-scores.rdb: ${T04_TEMPLATE_LIB_DEPEND} guide.a2m.gz ${HMMSCORE} $*.t04-template-lib \ -verbose 0 \ -modellibrary ${PCEM_INDEXES}/t04-w0.5-db.mlib \ -db_size ${LIBSIZE} \ -db guide.a2m.gz -rdb 1 \ -select_score 4 -emax ${EMAX_FOR_HMMS} grep -v '^[#]' < $*.t04-template-lib.dist-rdb \ | ${YEAST_SCRIPTS}/shorten_mod_names \ | ${PCB}/row SEQID eq ${TARGET} \ | ${PCB}/sorttbl EVALUE \ > $@ -rm $*.t04-template-lib.dist-rdb %.t06-template-lib-scores.rdb: ${T06_TEMPLATE_LIB_DEPEND} guide.a2m.gz ${HMMSCORE} $*.t06-template-lib \ -verbose 0 \ -modellibrary ${PCEM_INDEXES}/t06-w0.5-db.mlib \ -db_size ${LIBSIZE} \ -db guide.a2m.gz -rdb 1 \ -select_score 4 -emax ${EMAX_FOR_HMMS} grep -v '^[#]' < $*.t06-template-lib.dist-rdb \ | ${YEAST_SCRIPTS}/shorten_mod_names \ | ${PCB}/row SEQID eq ${TARGET} \ | ${PCB}/sorttbl EVALUE \ > $@ -rm $*.t06-template-lib.dist-rdb # Replaced by generic conversion Mon Sep 18 16:46:38 PDT 2006 Kevin Karplus # # ${TARGET}.%-template-lib-scores.html: ${TARGET}.%-template-lib-scores.rdb ${SCOP} # head -n 500 < $< \ # | ${CASP7_SCRIPTS}/annotate_template_scores \ # | ${CASP7_SCRIPTS}/oneway_hits_rdb2html $*-template-lib-scores \ # > $@ ############################## # SINGLE-TRACK TARGET MODELS # ############################## # single-track model pdb scoring # Where to find the PDB sequences. We now use the dunbrack-pdbaa set, # which has identical sequences merged. # PDB_DB := /projects/compbio/data/pdb/all-protein PDB_DB := /projects/compbio/data/pdb/dunbrack-pdbaa #calibrate the single track model #this target creates the mlib and dist file with no scores # we then remove the dist file so it will be created when # %.w0.5.dist is called %.w0.5.mlib: %.w0.5.mod ${HMMSCORE} $*.w0.5 \ -verbose 0 \ -calibrate 1 \ -i $*.w0.5.mod \ -db ${TEMPLATE_SEQS} \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 0 -rm -f $*.w0.5.dist # -trackprior rsdb-comp2.32comp ifdef REDO_SEARCHES PDB_LIB_DEPEND := ${PDB_DB} else PDB_LIB_DEPEND := endif %.w0.5.dist: %.w0.5.mlib ${PDB_LIB_DEPEND} ${HMMSCORE} w0.5foo \ -verbose 0 \ -modellibrary $< \ -db ${PDB_DB} \ -select_score 4 -Emax ${EMAX_FOR_HMMS} mv -f w0.5foo.1.$*.w0.5.mod.dist $@ %-w0.5-scores.rdb: %.w0.5.dist ${SCOP} ${ANNOTATE} $*-w0.5 < $< > $@ %-w0.5-scores.html: %-w0.5-scores.rdb ${CASP7_SCRIPTS}/oneway_hits_rdb2html $*-w0.5-scores < $^ > $@ ############# # LOGOS # ############# ${TARGET}.upper-only.a2m : ${TARGET}.a2m sed -e '/>/!s/[ .a-z]//g' <$^ > $@ %.nothin.mod: %.a2m.gz modelfromalign $*.nothin -alignfile $^ \ -insert ${TRANS_REG} \ -prior_library ${MIXTURE} \ -binary_output 1 \ -aweight_method 1 -aweight_bits 0.5 -aweight_exponent 10 %.nothin.mod: %.frag.gz modelfromalign $*.nothin -alignfile $^ \ -insert ${TRANS_REG} \ -prior_library ${MIXTURE} \ -binary_output 1 \ -aweight_method 1 -aweight_bits 0.5 -aweight_exponent 10 %.nothin-logo.eps %.nothin.saves: %.nothin.mod %.${STRUCT_ALPH}.seq ${TARGET}.upper-only.a2m ${BIN_SAM}/makelogo $*.nothin-logo -i $< \ -logo_start_num ${START_COL} \ -logo_rel_entropy 1 \ -logo_bars_per_line ${LOGO_WIDTH} -logo_title "$* nothin" \ -logo_caption_f $*.${STRUCT_ALPH}.seq \ -logo_under_file ${TARGET}.upper-only.a2m \ -logo_savings_output $*.nothin.saves %.w0.5-logo.eps %.w0.5.saves: %.w0.5.mod %.${STRUCT_ALPH}.seq ${TARGET}.upper-only.a2m ${BIN_SAM}/makelogo $*.w0.5-logo -i $< \ -logo_start_num ${START_COL} \ -logo_rel_entropy 1 \ -logo_bars_per_line ${LOGO_WIDTH} -logo_title "$* w0.5" \ -logo_caption_f $*.${STRUCT_ALPH}.seq \ -logo_under_file ${TARGET}.upper-only.a2m \ -logo_savings_output $*.w0.5.saves %.${STRUCT_ALPH}-logo.eps: %.${STRUCT_ALPH}.mod ${TARGET}.upper-only.a2m ${BIN_SAM}/makelogo $*.${STRUCT_ALPH}-logo -i $< \ -logo_start_num ${START_COL} \ -logo_rel_entropy 1 \ -logo_bars_per_line ${LOGO_WIDTH} -logo_title "$* ${STRUCT_ALPH}" \ -logo_caption_f $*.${STRUCT_ALPH}.seq \ -logo_under_file ${TARGET}.upper-only.a2m \ -logo_color_file ${STRUCT_ALPH_COLOR_FILE} %.${STRUCT_ALPH}-color.rasmol: %.${STRUCT_ALPH}.seq ${YEAST_SCRIPTS}/rasmol_color_from_burial \ -pdb ${TARGET}.blank.pdb.gz \ -start_col ${START_COL} \ -color ${STRUCT_RASMOL_COLOR} \ < $^ > $@ ############### # TOP HITS # ############### # define the threshold below which you want hits reported. # If ANY of the methods reports a hit this good, it will be included # in ${TARGET}.best_scores.rdb ifndef BEST_EVALUE BEST_EVALUE := 1.e-05 endif # report at least this many hits, even if there are no good evalues. ifndef NUM_BEST NUM_BEST := 20 endif # report at most this many hits, even if there are more good evalues ifndef MAX_NUM_BEST MAX_NUM_BEST := 200 endif #find the best hits (include dupes) %.${AL_METHOD}.best-scores.rdb: \ %.${AL_METHOD}-template-lib-scores.rdb \ %.${AL_METHOD}-w0.5-scores.rdb \ %.${AL_METHOD}-100-30-dssp-ebghstl-scores.rdb \ %.${AL_METHOD}-100-30-stride-ebghtl-scores.rdb \ %.${AL_METHOD}-100-30-str2-scores.rdb \ %.${AL_METHOD}-100-30-alpha-scores.rdb \ %.${AL_METHOD}-100-30-bys-scores.rdb \ %.${AL_METHOD}-100-30-o_notor2-scores.rdb \ %.${AL_METHOD}-100-30-n_notor2-scores.rdb \ %.${AL_METHOD}-100-30-o_sep-scores.rdb \ %.${AL_METHOD}-100-30-n_sep-scores.rdb \ %.${AL_METHOD}-100-30-CB_burial_14_7-scores.rdb \ %.${AL_METHOD}-100-30-near-backbone-11-scores.rdb \ %.${AL_METHOD}-100-40-40-str2+CB_burial_14_7-scores.rdb \ %.${AL_METHOD}-80-60-80-str2+near-backbone-11-scores.rdb \ %.${AL_METHOD}-100-30-dssp-ehl2-scores.rdb ${CASP7_SCRIPTS}/best_scores \ -num ${NUM_BEST} -E ${BEST_EVALUE} -lib_size ${LIBSIZE} \ -maxnum ${MAX_NUM_BEST} \ -scop_file ${SCOP} \ $^ > $@ #find the best hits (include dupes) %.best-scores.rdb: \ $(foreach AL,${MA_METHODS},\ %.${AL}-template-lib-scores.rdb \ %.${AL}-w0.5-scores.rdb \ %.${AL}-100-30-dssp-ebghstl-scores.rdb \ %.${AL}-100-30-stride-ebghtl-scores.rdb \ %.${AL}-100-30-str2-scores.rdb \ %.${AL}-100-30-alpha-scores.rdb \ %.${AL}-100-30-bys-scores.rdb \ %.${AL}-100-30-o_notor2-scores.rdb \ %.${AL}-100-30-n_notor2-scores.rdb \ %.${AL}-100-30-o_sep-scores.rdb \ %.${AL}-100-30-n_sep-scores.rdb \ %.${AL}-100-30-CB_burial_14_7-scores.rdb \ %.${AL}-100-30-near-backbone-11-scores.rdb \ %.${AL}-100-40-40-str2+CB_burial_14_7-scores.rdb \ %.${AL}-80-60-80-str2+near-backbone-11-scores.rdb \ %.${AL}-100-30-dssp-ehl2-scores.rdb) ${CASP7_SCRIPTS}/best_scores \ -num ${NUM_BEST} -E ${BEST_EVALUE} -lib_size ${LIBSIZE} \ -maxnum ${MAX_NUM_BEST} \ -scop_file ${SCOP} \ $^ > $@ %.best-scores.html: %.best-scores.rdb ${CASP7_SCRIPTS}/oneway_hits_rdb2html $*.best_hits < $^ > $@ ########################################### # ALIGNMENTS # ########################################### #track models to be used in pairwise alignments TRACKMOD_STRUCT :=${SECONDARY_TARGET}.mod STRUCT_MLIB :=${TARG_AL}-100-30-${STRUCT_ALPH}.mod SEED_PAIR := guide.a2m.gz,${TARG_AL}.${STRUCT_ALPH}.padded.seq EBGHSTL_TRACKMOD := ${TARG_AL}.dssp-ebghstl.mod EBGHTL_TRACKMOD := ${TARG_AL}.stride-ebghtl.mod STR2_TRACKMOD := ${TARG_AL}.str2.mod W05_MLIB := ${TARG_AL}.w0.5.mlib EBGHSTL_MLIB := ${TARG_AL}-100-30-dssp-ebghstl.mlib EBGHTL_MLIB := ${TARG_AL}-100-30-stride-ebghtl.mlib STR2_MLIB := ${TARG_AL}-100-30-str2.mlib ALPHA_MLIB := ${TARG_AL}-100-30-alpha.mlib DSSP_EHL2_MLIB := ${TARG_AL}-100-30-dssp-ehl2.mlib #settings of how many templates to predict vs. number of #alignments to convert to CASP format must be done carefully # need to add error checking so these numbers don't conflict #with each other # how many alignments to select from best templates ifndef NUM_ALIGNMENTS NUM_ALIGNMENTS := 250 endif #top alignments we report for top_reported ifndef NUM_TOP NUM_TOP := 10 endif #build an HMM from target sequence only to produce alignments #similar to simple Smith-Waterman. We observe that the #T2K HMMs are so general that they may drift away from the #original seed sequence %.mod: %.a2m ${BIN_SAM}/modelfromalign $* \ -alignfile $^ \ -insert /projects/compbio/lib/fssp-trained.regularizer \ -aweight_bits 0.8\ -fimtrans -1\ -fimstrength 1\ -ins_jump_conf 1 \ -match_jump_conf 1 \ -del_jump_conf 1 \ -binary_output 1\ -prior_library ${MIXTURE} \ -a2mdots 0 \ -a protein \ -sw 2 -jump_in_prob 0.2 -jump_out_prob 1 \ -aweight_method 1\ -aweight_exponent 10 ALIGNMENT_FILES = $(wildcard [1-9]*/*a2m*) #get the ${NUM_TOP} best alignments ${TARGET}.top_reported_alignments.rdb: \ ${TARGET}.best-scores.rdb ${ALIGNMENT_FILES} ${CASP7_SCRIPTS}/pick_alignments -target ${TARGET} \ -max_align ${NUM_TOP} -scores_file $< \ > $@ ${TARGET}.%.top_reported_alignments.rdb: \ ${TARGET}.%.best-scores.rdb ${ALIGNMENT_FILES} ${CASP7_SCRIPTS}/pick_alignments -target ${TARGET} \ -max_align ${NUM_TOP} -scores_file $< \ -select_re $* \ > $@ #convert this to html and add to the summary page %.top_reported_alignments.html: %.top_reported_alignments.rdb %.best-scores.rdb ${CASP7_SCRIPTS}/casp_summary_report_html \ --align $*.top_reported_alignments.rdb \ --target ${TARGET} --make_al > $@ ######################################## # CONTACT PREDICTION USING NEURAL NETS # ######################################## ifndef RR_EXT RR_EXT:=${PREFERRED_RR_EXT} endif ifndef VALIDATE2RR VALIDATE2RR := ${RR_BIN}/validate2rr endif RR_PROG ?= traincontactnn7 ifndef CORR_COLUMNS CORR_COLUMNS := ${RR_BIN}/correlated-columns endif # needed for string substitutions comma:= , empty:= space:= ${empty} ${empty} # CASP7 extension to RR_ARGS RR_ARGS := -l 3 -predict ./ -id ${TARGET} ifeq (${RR_EXT},248_20) RR_AL:=t04 RR_NN_NAME:= logsep.t04.5_ent.burNS_str2.miRvp_entR_pplR.n20.net STAT_LIST:=entraw,mi,omesraw,pplraw_CB8_w STATS:= $(subst ${comma},${empty},${STAT_LIST}) RR_FEATURE_COMMENT:= significance of mutual information, pairwise entropy, pairwise contact propensity, log(separation), predicted burial (near-backbone-11), predicted secondary structure(str2), amino acid profile, log(protein length) THIN:=50 RR_ARGS:= -extra logsep \ -entropy -W 5 -distribution .t04 \ -L 1 -S .t04-CB-burial-14-7.rdb,.t04.str2.rdb -noSummary \ -M 1 -C .t04-thin50.${STATS}.rdb.gz -Cstats mi,entraw,pplraw -Copts R,pRv \ ${RR_ARGS} RR_DEPENDS:=${TARGET}.t04.w0.5.saves \ ${TARGET}.t04-CB-burial-14-7.rdb ${TARGET}.t04.str2.rdb \ ${TARGET}.t04-thin${THIN}.${STATS}.rdb.gz endif ifeq (${RR_EXT},352_28) RR_AL=t04 RR_NN_NAME:= logseploglen.5xt04_ent.3xnearNS_str2.miRvp_pplR.n28.net STAT_LIST:=entraw,mi,omesraw,pplraw_CB8_w STATS:= $(subst ${comma},${empty},${STAT_LIST}) RR_FEATURE_COMMENT:= significance of mutual information, pairwise contact propensity, log(separation), predicted burial (near-backbone-11), predicted secondary structure(str2), amino acid profile, log(protein length) THIN:=50 RR_ARGS:= -extra logsep,loglen \ -W 5 -distribution .t04 -entropy \ -L 3 -S .t04.near-backbone-11.rdb,.t04.str2.rdb -noSummary \ -M 1 -C .t04-thin${THIN}.${STATS}.rdb.gz -Cstats mi,pplraw -Copts R,pRv \ ${RR_ARGS} RR_DEPENDS:=${TARGET}.t04.w0.5.saves \ ${TARGET}.t04.near-backbone-11.rdb ${TARGET}.t04.str2.rdb \ ${TARGET}.t04-thin${THIN}.${STATS}.rdb.gz endif ifeq (${RR_EXT},449a_45) RR_AL=t04 RR_NN_NAME:= logseploglen.5xt04_ent.5xnearNS_str2.miRpz_entR_pplR.n45.net STAT_LIST:=entraw,mi,omesraw,pplraw_CB8_w STATS:= $(subst ${comma},${empty},${STAT_LIST}) THIN:=50 RR_ARGS:= -extra logsep,loglen \ -W 5 -distribution .t04 -entropy \ -L 5 -S .t04.near-backbone-11.rdb,.t04.str2.rdb -noSummary \ -M 1 -C .t04-thin${THIN}.${STATS}.rdb.gz -Cstats mi,ent,pplraw -Copts R,pRz \ ${RR_ARGS} RR_DEPENDS:=${TARGET}.t04.w0.5.saves \ ${TARGET}.t04.near-backbone-11.rdb ${TARGET}.t04.str2.rdb \ ${TARGET}.t04-thin${THIN}.${STATS}.rdb.gz endif ifeq (${RR_EXT},449a_45t2k) RR_AL:=t04 RR_NN_NAME:= logseploglen.5xt04_ent.5xnearNS_str2.miRpz_entR_pplR.n45.net STAT_LIST:=entraw,mi,omesraw,pplraw_CB8_w STATS:= $(subst ${comma},${empty},${STAT_LIST}) THIN:=50 RR_ARGS:= -extra logsep,loglen \ -W 5 -distribution .t2k -entropy \ -L 5 -S .t2k.near-backbone-11.rdb,.t2k.str2.rdb -noSummary \ -M 1 -C .t2k-thin${THIN}.${STATS}.rdb.gz -Cstats mi,ent,pplraw -Copts R,pRz \ ${RR_ARGS} RR_DEPENDS:=${TARGET}.t2k.w0.5.saves \ ${TARGET}.t2k.near-backbone-11.rdb ${TARGET}.t2k.str2.rdb \ ${TARGET}.t2k-thin${THIN}.${STATS}.rdb.gz endif # Start of new CASP8 rr predictions ifeq (${RR_EXT},647_47) RR_PROG := predictlocal RR_AL=t04 RR_NN_NAME:= con.aa5str2_5near5nsep5.entmi_epplcc.th62.47.net STAT_LIST:=ent,mi_eval,cc,chi2,ppl,pplw STATS:= $(subst ${comma},${empty},${STAT_LIST}) THIN:=62 LENFRAC:=12 RR_ARGS:= -align t04 \ -alphabets aa,str2,near,n_sep -windows 5,5,5,5 \ -paired .${RR_AL}-thin${THIN}.${STATS}.rdb.gz,ent,pR,mi_eval,R,ppl,R,cc,R \ -predict ${TARGET} RR_DEPENDS:=${TARGET}.${RR_AL}.probs \ ${TARGET}.${RR_AL}.near-backbone-11.rdb \ ${TARGET}.${RR_AL}.str2.rdb \ ${TARGET}.${RR_AL}.n_sep.rdb \ ${TARGET}.${RR_AL}-thin${THIN}.${STATS}.rdb.gz endif ifeq (${RR_EXT},730_47) RR_PROG := predictlocal RR_AL:=t04 RR_NN_NAME:= con.aa7str2_7near3nsep5.47.data03.net LENFRAC:=12 RR_ARGS:= -align t04 \ -alphabets aa,str2,near,n_sep -windows 7,7,3,5 \ -paired none \ -predict ${TARGET} RR_DEPENDS:=${TARGET}.${RR_AL}.probs \ ${TARGET}.${RR_AL}.near-backbone-11.rdb \ ${TARGET}.${RR_AL}.str2.rdb \ ${TARGET}.${RR_AL}.n_sep.rdb endif ifeq (${RR_EXT},648_17.730_47) RR_PROG := predictlocal RR_AL=t04 RR_NN_NAME:= con.aa5str2_5near5nsep5.entmi_epplccrr.th62.17.730_47.net RR_FEATURE_COMMENT:= significance of mutual information, pairwise contact propensity, log(separation), predicted burial (near-backbone-11), predicted secondary structure(str2), predicted H-bonds (n_sep), amino acid profile, log(protein length), log(rank of 730_47 prediction). Limited to pairs generated by 730_47 FIRST_STAGE:=730_47 FIRST_STAGE_EXT:=${FIRST_STAGE}.rr STAT_LIST:=ent,mi_eval,cc,omes,ppl STATS:= $(subst ${comma},${empty},${STAT_LIST}) THIN:=62 LENFRAC:=11 RR_ARGS:= -align t04 \ -alphabets aa,str2,near,n_sep -windows 5,5,5,5 \ -paired .${RR_AL}-thin${THIN}.${STATS}.rdb.gz,ent,pR,mi_eval,R,ppl,R,cc,R,rr,R \ -predict ${TARGET} RR_DEPENDS:=${TARGET}.${RR_AL}.probs \ ${TARGET}.${RR_AL}.near-backbone-11.rdb \ ${TARGET}.${RR_AL}.str2.rdb \ ${TARGET}.${RR_AL}.n_sep.rdb \ ${TARGET}.${FIRST_STAGE_EXT} \ ${TARGET}.${RR_AL}-thin${THIN}.${STATS}.rdb.gz endif ifndef TRAIN_CONTACT_NN TRAIN_CONTACT_NN := ${RR_BIN}/${RR_PROG} endif STAT_LIST2:= $(subst ${comma},${space},${STAT_LIST}) RR_NEURAL_NET:=${RR_NETWORKS}/${RR_NN_NAME} CONTACT_PRED_TARGET := ${TARGET}.${RR_EXT}.rr # setup for building first stage, if necessary ifdef FIRST_STAGE FIRST_STAGE_CMD := SetRRPairs ${TARGET}.${FIRST_STAGE_EXT} $(warning First stage -----is ${FIRST_STAGE}) #if we have to, build the first stage! first_stage: ${MAKE} -k AL_METHOD=${RR_AL} RR_EXT=${FIRST_STAGE} ${TARGET}.${FIRST_STAGE_EXT} TARGET=${TARGET} endif rr_start_section: ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_section_head_summary_html \ "Contact predictions, based on ${RR_FEATURE_COMMENT}" >> ${WORKDIR}/summary.html endif # The prediction requires using the 'traincontactnn7' program # to build a list of inputs for a specified neural network # to make predictions. Those predictions are sorted by raw score # and the sequence_length*2 best scoring predictions form # the submitted RR predictions. ifndef RR_FACTOR RR_FACTOR := 0.05 endif ${CONTACT_PRED_TARGET} ${CONTACT_PRED_TARGET}.rasmol ${CONTACT_PRED_TARGET}.constraints : \ ${RR_DEPENDS} ${RR_NEURAL_NET} ${TRAIN_CONTACT_NN} ${TRAIN_CONTACT_NN} -start ${START_COL} -i ${RR_NEURAL_NET} ${RR_ARGS} \ | ${VALIDATE2RR} --format raw --constraints --network ${RR_NN_NAME} --abbrv ${RR_EXT} \ --cutoffs 0.3,0.5 \ --author ${CASP_ID} \ --factor ${RR_FACTOR} \ --target ${TARGET} --start ${START_COL} \ > ${CONTACT_PRED_TARGET} # This rather awkard recursive make is to ensure that the process # continues, even if the contact prediction fails. contact_prediction: ALWAYS -${MAKE} -k RR_EXT=${RR_EXT} ${CONTACT_PRED_TARGET} ${CONTACT_PRED_TARGET}.rasmol ${CONTACT_PRED_TARGET}.constraints %.contact_prediction: -${MAKE} RR_EXT=$* TARGET=${TARGET} contact_prediction # This conditional avoids undesired actions when doing/testing new rr's # comment out if we really want a different rr_ext (or change the preferred!!) # ifeq(${RR_EXT},${PREFERRED_RR_EXT}) # # rr rr.constraints : ${CONTACT_PRED_TARGET} ${CONTACT_PRED_TARGET}.rasmol ${CONTACT_PRED_TARGET}.constraints # ln -sf ${CONTACT_PRED_TARGET}.rasmol rr # ln -sf ${CONTACT_PRED_TARGET}.constraints rr.constraints add_contact_prediction: contact_prediction ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_multiple_formats \ "contact predictions" \ ${TARGET}.${RR_EXT} \ rr rr.constraints \ >> ${WORKDIR}/summary.html endif ifndef EMAIL_CONTACT_PRED_SUBJECT EMAIL_CONTACT_PRED_SUBJECT := SAM-${SAM_YEAR} ${TARGET} endif mail_contact_pred: ${CONTACT_PRED_TARGET} mail -s '${EMAIL_CONTACT_PRED_SUBJECT} $^' ${EMAIL_ADDRESS} < $^ echo mailing $^ to ${EMAIL_ADDRESS} done # endif ################################################# # CORRELATION STATISTICS FOR CONTACT PREDICTION # ################################################# # # computable strings NOTE: these are based on the # functions defined in CTStatistics and included files # mi_eval:=MutualInformation LogGamma mi:=MutualInformation LogGamma miraw:=MutualInformation mieraw:=MutualInformationEntropy ccraw:=aaCorrCoefficient cc1raw:=CorrCoefficient1 wccraw:=WeightedCorrCoefficient hgraw:=Hypergeometric omesraw:=OMES entraw:=Entropy mie:=MutualInformationEntropy ccx:=aaCorrCoefficient cc:=CorrCoefficient1 wcc:=WeightedCorrCoefficient hg:=Hypergeometric omes:=OMES ent:=Entropy chi2:=Chi2 ppraw_near7_5:=Propensity dunbrack-2191-near7.5-sep9.residue_pairs ppraw_near7:=Propensity dunbrack-2191-near7-sep9.residue_pairs ppraw_CB8:=Propensity ${RR_DATA}/dunbrack-2191-CB8-sep9.residue_pairs pplraw_CB8:=PropensityLog ${RR_DATA}/dunbrack-2191-CB8-sep9.residue_pairs ppraw_CB8_w:=Propensity ${RR_DATA}/dunbrack-2191-CB8-sep9-w.residue_pairs ppraw_CB8_ww:=Propensity ${RR_DATA}/dunbrack-2191-CB8-sep9-ww.residue_pairs pplraw_CB8_w:=PropensityLog ${RR_DATA}/dunbrack-2191-CB8-sep9-w.residue_pairs pplraw_CB8_ww:=PropensityLog ${RR_DATA}/dunbrack-2191-CB8-sep9-ww.residue_pairs # the newest 'keys' for AddStatistics hg:=Hypergeometric omes:=OMES ent:=Entropy mi_eval:=MutualInformation LogGamma pp:=Propensity ${RR_DATA}/dunbrack-2191-CB8-sep9.residue_pairs ppl:=PropensityLog ${RR_DATA}/dunbrack-2191-CB8-sep9.residue_pairs ppw:=Propensity ${RR_DATA}/dunbrack-2191-CB8-sep9-w.residue_pairs ppww:=Propensity ${RR_DATA}/dunbrack-2191-CB8-sep9-ww.residue_pairs pplw:=PropensityLog ${RR_DATA}/dunbrack-2191-CB8-sep9-w.residue_pairs pplww:=PropensityLog ${RR_DATA}/dunbrack-2191-CB8-sep9-ww.residue_pairs cc:=CorrCoefficient1 chi2:=Chi2 # only consider column pairs with at least ${MINPAIR}* num_sequences # sequences that have both columns occupied ifndef MINPAIR MINPAIR:=0.7 endif # only consider column pairs with at least ${MINNUMPAIR} sequences # that have both columns occupied ifndef MINNUMPAIR MINNUMPAIR:=3 endif # generate ${LENFRAC}*sequence length pairs of columns to feed to neural net ifndef LENFRAC LENFRAC:=10 endif ifndef RR_AL RR_AL:=t04 endif ifndef AL_THIN AL_THIN:=${RR_AL}-thin${THIN} endif build_correlations : ${TARGET}.${AL_THIN}.${STATS}.rdb.gz %.${STATS}.rdb.gz : %.a2m.gz echo Building ${STATS} ${STAT_LIST2} echo SetAlphabet ExtAA > tmp-$*-${STATS}.corr_col echo ReadA2m $< >> tmp-$*-${STATS}.corr_col echo SetFractionOfLen ${LENFRAC} >> tmp-$*-${STATS}.corr_col echo SetMinNumPairs ${MINNUMPAIR} >> tmp-$*-${STATS}.corr_col echo SetMinPairs ${MINPAIR} >> tmp-$*-${STATS}.corr_col echo SetMinSep 7 >> tmp-$*-${STATS}.corr_col -$(foreach i,${STAT_LIST2},\ echo AddStatistic ${${i}} >> tmp-$*-${STATS}.corr_col ;) echo ${FIRST_STAGE_CMD} >> tmp-$*-${STATS}.corr_col echo MutualInfoAll ${@:.gz=} >> tmp-$*-${STATS}.corr_col ${CORR_COLUMNS} < tmp-$*-${STATS}.corr_col rm -f tmp-$*-${STATS}.corr_col gzip -9f ${@:.gz=} # The CASP8 predictor will likely use the *.probs %.probs : %.a2m.gz echo Alphabet ExtAA > tmp-$*-probs.script echo ClipWeight 1.0 >> tmp-$*-probs.script echo PushReg /projects/compbio/lib/recode3.20comp >> tmp-$*-probs.script echo SequenceWeight HenikoffWeight 1.0 1.0 >> tmp-$*-probs.script echo ReadA2M $*.a2m.gz >> tmp-$*-probs.script echo PrintProbs $*.probs >> tmp-$*-probs.script echo quit >> tmp-$*-probs.script estimate-dist < tmp-$*-probs.script rm -f tmp-$*-probs.script ############## # FRAGFINDER # ############## FRAGFINDER_SEQS := ${PCEM_INDEXES}/calibration.x-seqs FRAGFINDER_STR2 := ${PCEM_INDEXES}/calibration.str2s FRAGFINDER_CB_BURIAL_14_7 := ${PCEM_INDEXES}/calibration.CB-burial-14-7s FRAGFINDER_NEAR-BACKBONE-11 := ${PCEM_INDEXES}/calibration.near-backbone-11s FRAGFINDER_STR2_TWOTRACKDBS := ${FRAGFINDER_SEQS},${FRAGFINDER_STR2} FRAGFINDER_STR2+CB_BURIAL_14_7_THREETRACKDBS := ${FRAGFINDER_SEQS},${FRAGFINDER_STR2},${FRAGFINDER_CB_BURIAL_14_7} FRAGFINDER_STR2+NEAR-BACKBONE-11_THREETRACKDBS := ${FRAGFINDER_SEQS},${FRAGFINDER_STR2},${FRAGFINDER_NEAR-BACKBONE-11} %.frag.gz: %.w0.5.mod %.str2.mod guide.a2m.gz ${FRAGFINDER} $* \ -a protein,STR2 \ -trackmod $*.w0.5.mod,$*.str2.mod \ -track_coeff 1.0,0.3 \ -db ${FRAGFINDER_STR2_TWOTRACKDBS} \ -firstsequence guide.a2m.gz,- \ -fraglen 9 -numpermatch 6 gzip -9f $*.frag gzip -9f $*.fstat %.many.frag.gz: %.w0.5.mod %.str2.mod %.near-backbone-11.mod guide.a2m.gz ${FRAGFINDER} $*.many \ -alphabet protein,str2,near-backbone-11 \ -trackmod $*.w0.5.mod,$*.str2.mod,$*.near-backbone-11.mod \ -trackcoeff 0.8,0.6,0.8 \ -db ${FRAGFINDER_STR2+NEAR-BACKBONE-11_THREETRACKDBS} \ -firstsequence guide.a2m.gz,-,- \ -fraglen 9 -numpermatch 30 gzip -9f $*.many.frag gzip -9f $*.many.fstat ############## # UNDERTAKER # ############## # will need to create a "decoys" directory before any full 3D building # with undertaker can be done. decoys/${TARGET}.%-opt2.pdb.gz: %.under %.costfcn -mkdir -p decoys nice -5 ${UNDERTAKER} < $*.under > $*.log 2>&1 -gzip -f decoys/${TARGET}.$**pdb -gzip -9f $*.log Template.atoms read-decoys.under: decoys echo "InfilePrefix decoys/"> $@ ls decoys/*${TARGET}*pdb* \ | sed 's;decoys/;ReadConformPDB ;' \ >> $@ echo "InfilePrefix" >> $@ -chgrp protein $@ -chmod g+w $@ %.undertaker-align.${PDBEXT} %.undertaker-align.sheets: %.undertaker-align.under cat ${STARTER}/show-align.under \ | sed s/XXX0000.t2k/$*/g \ |sed s/XXX0000/${TARGET}/g \ | sed s/START_COL/${START_COL}/g \ | nice -2 ${UNDERTAKER} >& show-align.log gzip -f show-align.log ifndef NOGZIP_PDB gzip -f $*.undertaker-align.pdb endif CONVERT_200_OPTIONS := -resize 200x200 -quality 85 -frame 1x1 -mattecolor '\#000000' CONVERT_500_OPTIONS := -resize 500x500 -quality 75 -frame 1x1 -mattecolor '\#000000' %.make_jpeg: ${MAKE} -k $*.${PDBEXT} \ $*.view1_200.jpg $*.view2_200.jpg $*.view3_200.jpg \ $*.view1_500.jpg $*.view2_500.jpg $*.view3_500.jpg %.view1_200.jpg %.view2_200.jpg %.view3_200.jpg \ %.view1_500.jpg %.view2_500.jpg %.view3_500.jpg : %.${PDBEXT} ${STARTER}/make-eps.rasmol ${RASMOL} -nodisplay $*.${PDBEXT} < ${STARTER}/make-eps.rasmol convert ${CONVERT_200_OPTIONS} tmp1.eps $*.view1_200.jpg convert ${CONVERT_200_OPTIONS} tmp2.eps $*.view2_200.jpg convert ${CONVERT_200_OPTIONS} tmp3.eps $*.view3_200.jpg convert ${CONVERT_500_OPTIONS} tmp1.eps $*.view1_500.jpg convert ${CONVERT_500_OPTIONS} tmp2.eps $*.view2_500.jpg convert ${CONVERT_500_OPTIONS} tmp3.eps $*.view3_500.jpg -rm -f tmp*eps %/read-alignments-noscwrl.under: % cd $*; \ ${UNDERTAKER_SCRIPTS}/make-read-fragments.csh \ > read-alignments-noscwrl.under %/read-alignments-scwrl.under: % cd $*; \ ${UNDERTAKER_SCRIPTS}/make-read-fragments.csh SCWRL \ > read-alignments-scwrl.under # for close homology modeling, may want to pick out best scores using # single sequences sw-best: [1-9]* echo 'foreach x ([0-9]*)' > tmp.script echo 'grep -h "$$x " $$x/*SW*dist' >> tmp.script echo 'end' >> tmp.script chmod +x tmp.script csh tmp.script \ | sort -n +3 \ | uniq \ > $@ rm tmp.script guide.a2m.gz: ${TARGET}.upper-only.a2m ${CASP7_SCRIPTS}/extract-guide < $^ \ | gzip > $@ ifdef PRED PRED2 := $(shell echo ${PRED} | sed 's/\(..\).*/\1/') ifdef PRED2 PRED_NOSTRUCT := ${PCEM}/pdb/${PRED2}/${PRED}/nostruct-align PRED_INFO := ${PCEM}/pdb/${PRED2}/${PRED}/info ifdef ALIGN_TYPE ifeq (${ALIGN_TYPE},local) SW=2 endif ifeq (${ALIGN_TYPE},global) SW=0 endif ifeq (${ALIGN_TYPE},simplesw) SW=2 endif endif ifeq (${MASTER},target) ifeq (${ALIGN_TYPE},local) ALIGN_MODEL=${TARG_AL}.w0.5.mod ALIGN_NAME := ${PRED}/${TARGET}-${PRED}-${AL_METHOD}-${ALIGN_TYPE}-adpstyle${ADP} endif ifeq (${ALIGN_TYPE},global) ALIGN_MODEL=${TARG_AL}.w0.5.mod ALIGN_NAME := ${PRED}/${TARGET}-${PRED}-${AL_METHOD}-${ALIGN_TYPE}-adpstyle${ADP} endif ifeq (${ALIGN_TYPE},simplesw) ALIGN_MODEL=${TARGET}.mod ALIGN_NAME := ${PRED}/${TARGET}-${PRED}-${ALIGN_TYPE}-adpstyle${ADP} endif endif ifeq (${MASTER},template) ALIGN_MODEL := ${PRED_NOSTRUCT}/${PRED}.${AL_METHOD}-w0.5.mod ALIGN_NAME := ${PRED}/${PRED}-${TARGET}-${AL_METHOD}-${ALIGN_TYPE}-adpstyle${ADP} endif ifeq (${MASTER},fssp) FSSP_STRUCT := ${PCEM}/pdb/${FSSP2}/${FSSP}/struct-align ALIGN_MODEL := ${FSSP_STRUCT}/${FSSP}.fssp.w0.5.mod ALIGN_NAME := ${PRED}/${FSSP}-${TARGET}-fssp-${ALIGN_TYPE}-adpstyle${ADP} endif ifdef ALIGN_NAME single-track-alignment: ${ALIGN_NAME}.a2m echo $^ made. ${ALIGN_NAME}.a2m: ${ALIGN_MODEL} ${PRED}/${PRED}.seq guide.a2m.gz ${HMMSCORE} ${ALIGN_NAME} \ -verbose 0 \ -alphabet protein -i $< -db guide.a2m.gz\ -db ${PRED}/${PRED}.seq \ -db_size ${LIBSIZE} \ -simple_threshold 10000 \ -sw ${SW} -dpstyle 0 -subtract_null 4 \ -adpstyle ${ADP} \ -select_align 8 gzip -f ${ALIGN_NAME}.dist endif define single-track-op -${MAKE} -k AL_METHOD=${al} ALIGN_TYPE=${at} \ PRED=${PRED} PRED2=${PRED2} \ ADP=1 MASTER=target single-track-alignment endef define single-track-viterbi-op -${MAKE} -k AL_METHOD=${al} ALIGN_TYPE=${at} \ PRED=${PRED} PRED2=${PRED2} \ ADP=5 MASTER=target single-track-alignment endef single-track-target-alignments: -mkdir -p ${PRED} test -e ${PRED}/${PRED}.seq -o '!' -e ${PRED_INFO}/${PRED}.stride-mixed.seq \ || cp -p ${PRED_INFO}/${PRED}.stride-mixed.seq ${PRED}/${PRED}.seq test -e ${PRED}/${PRED}.seq \ || ${YEAST_SCRIPTS}/extract-one-seq ${PRED} < ${PDB_DB} > ${PRED}/${PRED}.seq \ || { echo removing rm ${PRED}/${PRED}.seq; rm ${PRED}/${PRED}.seq ;} $(foreach al,${MA_METHODS},\ $(foreach at,simplesw local global, $(call single-track-op))) ifndef ALIGN_VITERBI $(foreach al,${MA_METHODS},\ $(foreach at,simplesw local global, $(call single-track-viterbi-op))) endif define template_align_op -grep '${PRED}' ${PCEM_INDEXES}/${al}.ids \ && (\ ${MAKE} -k AL_METHOD=${al} ALIGN_TYPE=${at} \ PRED=${PRED} PRED2=${PRED2} ADP=1 MASTER=template single-track-alignment\ ) endef define template_viterbi_op -grep '${PRED}' ${PCEM_INDEXES}/${al}.ids \ && (${MAKE} -k AL_METHOD=${al} ALIGN_TYPE=${at} \ PRED=${PRED} PRED2=${PRED2} ADP=5 MASTER=template single-track-alignment) endef template-alignments: -mkdir -p ${PRED} $(foreach al,${MA_METHODS},\ $(foreach at,local global, $(call template_align_op))) ifndef ALIGN_VITERBI $(foreach al,${MA_METHODS},\ $(foreach at,local global, $(call template_viterbi_op))) endif # Thu May 18 15:58:46 PDT 2006 Kevin Karplus # muscle alignments sometimes take a long time and don't seem very # good, so I've commented them out # -$(foreach al,${MA_METHODS}, \ # grep '${PRED}' ${PCEM_INDEXES}/${al}.ids \ # && ${MAKE} -k ${PRED}/${TARGET}-${PRED}-${al}-muscle.a2m.gz \ # PRED=${PRED} PRED2=${PRED2} ;) ######################################## # FSSP-based alignments ######################################## ifdef FSSP ifdef FSSP2 fssp-template-alignments: -mkdir -p ${PRED} -${MAKE} -k ALIGN_TYPE=local \ PRED=${PRED} PRED2=${PRED2} ADP=5 \ MASTER=fssp FSSP=${FSSP} FSSP2=${FSSP2} single-track-alignment -${MAKE} -k ALIGN_TYPE=global \ PRED=${PRED} PRED2=${PRED2} ADP=5 \ MASTER=fssp FSSP=${FSSP} FSSP2=${FSSP2} single-track-alignment endif endif ######################################## # Two-track target alignments ######################################## ifndef STRUCT_WEIGHT STRUCT_WEIGHT=0.3 endif ifdef STRUCT_ALPH # copy local structure alphabet name to SEQ_ALPH, renaming as needed to # match sequence names in info directories INFO_ALPH := ${STRUCT_ALPH} ifeq (${STRUCT_ALPH},stride-ebghtl) INFO_ALPH := 2d endif ifeq (${STRUCT_ALPH},dssp-ebghstl) INFO_ALPH := dssp endif ifeq (${STRUCT_ALPH},dssp-ehl2) INFO_ALPH := dssp endif ifeq (${STRUCT_ALPH},CB_burial_12_7) INFO_ALPH := CB-burial-12-7 endif ifeq (${STRUCT_ALPH},CB_burial_14_7) INFO_ALPH := CB-burial-14-7 endif ifndef AA_WEIGHT AA_WEIGHT := 1.0 endif PRED_SEED_PAIR := ${PRED_INFO}/${PRED}.stride-mixed.seq,${PRED_INFO}/${PRED}.stride-mixed.${INFO_ALPH} TWO_ALIGN_NAME := ${PRED}/${TARGET}-${PRED}-${AL_METHOD}-${ALIGN_TYPE}-${STRUCT_ALPH}-${AA_WEIGHT}+${STRUCT_WEIGHT}-adpstyle${ADP} two-track-alignment: ${TWO_ALIGN_NAME}.a2m echo $^ made. ${TWO_ALIGN_NAME}.a2m: ${TARG_AL}.w0.5.mod ${TRACKMOD_STRUCT} \ ${SECONDARY_TARGET}.padded.seq -mkdir -p ${PRED} ${HMMSCORE} ${TWO_ALIGN_NAME} \ -verbose 0 \ -alphabet protein,${SAM_STRUCT_ALPH} \ -trackmod ${TARG_AL}.w0.5.mod,${TRACKMOD_STRUCT} \ -trackcoeff ${AA_WEIGHT},${STRUCT_WEIGHT} \ -db ${SEED_PAIR} \ -db ${PRED_SEED_PAIR} \ -db_size ${LIBSIZE} \ -simple_threshold 10000 \ -sw ${SW} -dpstyle 0 -subtract_null 4 \ -adpstyle ${ADP} \ -select_align 8 gzip -f ${TWO_ALIGN_NAME}.dist endif ifdef ALIGN_VITERBI TWO_TRACK_ADP=1 else TWO_TRACK_ADP=5 endif # TO DO: # REDUCE number of two-track alignments tried, but # be sure to include some of the ones that worked well in # alignment tests. # (Actually, reducing the number may be a bad idea---we may need more diversity.) # ADD a SAM profile-profile alignment. define two_track_op -${MAKE} -k \ AL_METHOD=${al} AA_WEIGHT=1.0 \ ALIGN_TYPE=${l} ADP=${TWO_TRACK_ADP} \ PRED=${PRED} PRED2=${PRED2} \ MASTER=target STRUCT_ALPH=${x}\ two-track-alignment endef two-track-alignments: -mkdir -p ${PRED} $(foreach al,${MA_METHODS},$(foreach l,local global,$(foreach x,${SECONDARY_ALPHABETS},\ $(call two_track_op)))) ######################################## # Three-track target alignments ######################################## ifndef BURIAL_WEIGHT BURIAL_WEIGHT=0.4 endif ifndef BURIAL_ALPH BURIAL_ALPH := CB_burial_14_7 endif BURIAL_INFO_ALPH := ${BURIAL_ALPH} ifeq (${BURIAL_ALPH},CB_burial_14_7) BURIAL_INFO_ALPH := CB-burial-14-7 endif THREE_ALIGN_NAME := ${PRED}/${TARGET}-${PRED}-${AL_METHOD}-${ALIGN_TYPE}-${STRUCT_ALPH}+${BURIAL_ALPH}-${AA_WEIGHT}+${STRUCT_WEIGHT}+${BURIAL_WEIGHT}-adpstyle${ADP} three-track-alignment: ${THREE_ALIGN_NAME}.a2m echo $^ made. ${THREE_ALIGN_NAME}.a2m: ${TARG_AL}.w0.5.mod \ ${SECONDARY_TARGET}.mod \ ${TARG_AL}.${BURIAL_ALPH}.mod \ ${SECONDARY_TARGET}.padded.seq \ ${TARG_AL}.${BURIAL_ALPH}.padded.seq -mkdir -p ${PRED} ${HMMSCORE} ${THREE_ALIGN_NAME} \ -verbose 0 \ -alphabet protein,${SAM_STRUCT_ALPH},${BURIAL_ALPH} \ -trackmod ${TARG_AL}.w0.5.mod,${SECONDARY_TARGET}.mod,${TARG_AL}.${BURIAL_ALPH}.mod \ -trackcoeff ${AA_WEIGHT},${STRUCT_WEIGHT},${BURIAL_WEIGHT} \ -db ${TARGET}.a2m,${SECONDARY_TARGET}.padded.seq,${TARG_AL}.${BURIAL_ALPH}.padded.seq \ -db ${PRED_INFO}/${PRED}.stride-mixed.seq,${PRED_INFO}/${PRED}.stride-mixed.${INFO_ALPH},${PRED_INFO}/${PRED}.stride-mixed.${BURIAL_INFO_ALPH} \ -db_size ${LIBSIZE} \ -simple_threshold 10000 \ -sw ${SW} -dpstyle 0 -subtract_null 4 \ -adpstyle ${ADP} \ -select_align 8 gzip -f ${THREE_ALIGN_NAME}.dist ifdef ALIGN_VITERBI THREE_TRACK_ADP=1 else THREE_TRACK_ADP=5 endif define three_track_op -${MAKE} -k ALIGN_TYPE=$(6) ADP=${THREE_TRACK_ADP} \ AL_METHOD=${al} \ PRED=${PRED} PRED2=${PRED2} \ MASTER=target STRUCT_ALPH=$(1) BURIAL_ALPH=$(2)\ AA_WEIGHT=$(3) STRUCT_WEIGHT=$(4) BURIAL_WEIGHT=$(5) \ three-track-alignment endef three-track-alignments: -mkdir -p ${PRED} $(foreach al,${MA_METHODS},$(call three_track_op,str2,CB_burial_14_7,1.0,0.4,0.4,local)) $(foreach al,${MA_METHODS},$(call three_track_op,str2,CB_burial_14_7,1.0,0.4,0.4,global)) $(foreach al,${MA_METHODS},$(call three_track_op,str2,near-backbone-11,0.8,0.6,0.8,local)) $(foreach al,${MA_METHODS},$(call three_track_op,str2,near-backbone-11,0.8,0.6,0.8,global)) endif #if PRED2 endif #if PRED ####################################### # tree building (not done by default) # ####################################### # programs in non-standard places PHYTREE := /projects/compbio/usr/karplus/src/phytree/phytree DG := /projects/compbio/usr/karplus/src/phytree/dg DTREE := /projects/compbio/usr/karplus/src/phytree/dtree %_sorted.ids %.tree %_sorted.a2m.gz %.phytrace: %.a2m.gz -gunzip -f $*.a2m.gz ${PHYTREE} -f -o -i -r flat $* $*.a2m ${MIXTURE} -gzip -f $*.a2m -gzip -f $*_sorted.a2m -rm $*.phytrace $*.tree_weight %tree.ps: %tree ${DG} $^ %tree-unroot.ps: %tree ${DTREE} $^ # extract the ids stripping off the muldomain-added section. %.bare-ids: %.a2m.gz gunzip -c $^ \ | ${PCBS}/ids-from-fasta -nodom \ >$@ # WARNING: DISTILL is not installed on SoE Linux computers, # but ps2pdf produces very verbose pdf files. # Using the "pdf-logos" in Makefile will make all the PDF files # on ${DISTILL_HOST}. %.pdf: %.eps ssh ${DISTILL_HOST} 'cd ${WORKDIR}; distill $^' < /dev/null ################################# # KEY RESIDUES AND CONSERVATION # ################################# # This section is for realignment using key residues and selecting # sequences that have those key residues. ${AL_METHOD}-selected: \ ${TARGET}.${AL_METHOD}.w0.5.key-residues \ ${TARGET}.${AL_METHOD}.selected.a2m.gz ${AL_METHOD}-realign: \ ${TARGET}.${AL_METHOD}.w0.5.key-residues \ ${TARGET}.${AL_METHOD}.selected.a2m.gz \ ${TARGET}.${AL_METHOD}.realign.a2m.gz ${AL_METHOD}-realign.w0.5: \ ${TARGET}.${AL_METHOD}.w0.5.key-residues \ ${TARGET}.${AL_METHOD}.realign.a2m.gz \ ${TARGET}.${AL_METHOD}.realign.w0.5.mod \ ${TARGET}.${AL_METHOD}.realign.w0.5-logo.eps \ ${TARGET}.${AL_METHOD}.realign.w0.5-logo.pdf ifndef KEY_MIN_SAVINGS KEY_MIN_SAVINGS := 1.5 endif ifndef KEY_MIN_FREQ KEY_MIN_FREQ := 0.04 endif %.key-residues: %.saves ${PCEM_SCRIPTS2K}/pick-key-residues \ -minbits ${KEY_MIN_SAVINGS} \ -minfreq ${KEY_MIN_FREQ} \ -first_residue ${START_COL} \ <$^ >$@ %.selected.a2m.gz: %.a2m.gz %.w0.5.key-residues gunzip -c $< \ | ${PCEM_SCRIPTS2K}/select-by-key-residues \ -first_residue ${START_COL} \ -residues $*.w0.5.key-residues \ | gzip \ >$@ %.realign.a2m.gz: %.selected.w0.5.mod %.a2m.gz hmmscore $*.realign -i $< -db $*.a2m.gz \ -verbose 0 \ -adpstyle 5 -sw 2 -selectalign 8 gzip -8f $*.realign.a2m conserved_%: ${TARGET}.%.w0.5.key-residues ${YEAST_SCRIPTS}/key-to-rasmol \ -set_name conserved_$* \ < $^ > ${TARGET}.$*.conserved.rasmol -ln -sf ${TARGET}.$*.conserved.rasmol $@ ########## # MUSCLE # ########## # profile-profile alignment using Muscle: ${PRED}/${TARGET}-${PRED}-%-muscle.a2m.gz : ${TARGET}.%.a2m.gz ${PCEM_SCRIPTS04}/muscle-profile-profile \ -in $^ \ -in ${PRED_NOSTRUCT}/${PRED}.$*.a2m.gz \ -tmp /tmp \ -out $@ # This section is for realignment using Bob Edgar's "muscle" program. %.muscle.gz: %.a2m.gz gunzip -c $^ \ | muscle -maxhours 2.0 \ | gzip \ > $@ %.muscle.a2m.gz: %.muscle.gz ${PCEM_SCRIPTS}/a2m_from_muscle -in $^ -out $@ -guide 1 ############# # ALL-ALIGN # ############# # all-align.a2m.gz is an alignment created by merging all the # pairwise alignments into a single multiple-alignment. # This is useful for looking for consensus about alignments. # It may also be useful (after thinning at 100%) as an input # for undertaker. all-align.a2m.gz: $(wildcard [1-9][0-9a-z][0-9a-z][0-9a-z]*) $(foreach dir,$^, ${MAKE} ${dir}/merged-a2m;) ${PCEM_SCRIPTS04}/merge_a2m -guide ${TARGET} -out $@ */merged-a2m %/merged-a2m: % ${PCEM_SCRIPTS04}/merge_a2m -guide ${TARGET} -out $*/merged-a2m \ $(wildcard $*/*.a2m $*/*.a2m.gz) # This apparently is a repeat of the dependency a few lines above # all-align.a2m.gz: $(wildcard [1-9][0-9a-z][0-9a-z][0-9a-z]*) # $(foreach dir,$^, ${MAKE} ${dir}/merged-a2m;) # ${PCEM_SCRIPTS04}/merge_a2m -guide ${TARGET} -out $@ */merged-a2m all-align-global.a2m.gz: $(wildcard [1-9][0-9a-z][0-9a-z][0-9a-z]*) $(foreach dir,$^, ${MAKE} ${dir}/merged-global-a2m;) ${PCEM_SCRIPTS04}/merge_a2m -guide ${TARGET} -out $@ */merged-global-a2m %/merged-global-a2m: % ${PCEM_SCRIPTS04}/merge_a2m -guide ${TARGET} -out $*/merged-global-a2m \ $(wildcard $*/*global*.a2m $*/*global*.a2m.gz) all-align-local.a2m.gz: $(wildcard [1-9][0-9a-z][0-9a-z][0-9a-z]*) $(foreach dir,$^, ${MAKE} ${dir}/merged-local-a2m;) ${PCEM_SCRIPTS04}/merge_a2m -guide ${TARGET} -out $@ */merged-local-a2m %/merged-local-a2m: % ${PCEM_SCRIPTS04}/merge_a2m -guide ${TARGET} -out $*/merged-local-a2m \ $(wildcard $*/*local*.a2m $*/*local*.a2m.gz) all-align-good.a2m.gz: $(wildcard [1-9][0-9a-z][0-9a-z][0-9a-z]*) $(foreach dir,$^, ${MAKE} ${dir}/merged-good-a2m;) ${PCEM_SCRIPTS04}/merge_a2m -guide ${TARGET} -out $@ */merged-good-a2m %/merged-good-a2m: % ${PCEM_SCRIPTS04}/merge_a2m -guide ${TARGET} -out $*/merged-good-a2m \ $(wildcard $*/*-t06-local-str2+near-backbone*a2m $*/*-t06-local-str2+near-backbone*a2m.gz) all-align-good-all.a2m.gz: $(wildcard [1-9][0-9a-z][0-9a-z][0-9a-z]*) $(foreach dir,$^, ${MAKE} ${dir}/merged-good-all-a2m;) ${PCEM_SCRIPTS04}/merge_a2m -guide ${TARGET} -out $@ */merged-good-all-a2m %/merged-good-all-a2m: % ${PCEM_SCRIPTS04}/merge_a2m -guide ${TARGET} -out $*/merged-good-all-a2m \ $(wildcard $*/*local-str2+near-backbone*a2m $*/*local-str2+near-backbone*a2m.gz) all-align.pa: all-align.a2m.gz ${BIN_SAM}/prettyalign $^ -m5 > $@ # A no-thin model built from all-align.a2m may be useful for # finding consensus columns. %.no-thin.mod: %.a2m.gz ${PCL}/make-weights.pl $^ tmp.weight ${MIXTURE} \ "EntropyWeight 0.7 10" 1.0 > tmp.log ${BIN_SAM}/modelfromalign $*.no-thin -alignfile $^ \ -prior_library ${MIXTURE} \ -alignment_weights tmp.weight -rm -f tmp.weight tmp.log %.no-thin.logo.eps: %.no-thin.mod ${BIN_SAM}/makelogo $*.no-thin.logo -i $< \ -logo_start_num ${START_COL} \ -logo_rel_entropy 1 \ -logo_bars_per_line ${LOGO_WIDTH} -logo_title "$* no thinning" \ -logo_caption_f ${TARG_AL}.dssp-ebghstl.seq \ -logo_under_file ${TARGET}.a2m \ -logo_savings_output $*.no-thin.saves ######################################## # Building a t2k alignment from a seed # ######################################## TARGET2K := ${PCEM_SCRIPTS2K}/target2k NR:=/scratch/data/nrp/nr ifeq ($(wildcard ${NR}.*),) NR:=/var/tmp/nrp/nr endif ifeq ($(wildcard ${NR}.*),) NR=/projects/compbio/data/nrp/nr endif ifndef ADPSTYLE ADPSTYLE := 5 endif ifndef BLAST_MAX BLAST_MAX := 10000 endif ifdef REDO_T2K T2K_DEPEND := ${NR} else T2K_DEPEND := endif %.t2k.a2m.gz: %.a2m ${T2K_DEPEND} ${TARGET2K} -out $*.t2k \ -final_adpstyle ${ADPSTYLE} \ -blast_max_report ${BLAST_MAX} \ -db ${NR} \ -seed $< -tmp_dir /var/tmp gzip -f $*.t2k.a2m ######################################## # Building a t04 alignment from a seed # ######################################## ifdef REDO_T04 T04_DEPEND := ${NR} else T04_DEPEND= endif ${TARGET}.t04.a2m.gz: ${TARGET}.a2m ${T04_DEPEND} echo "making T04 alignment" ${PCEM_SCRIPTS04}/target04 \ -seed $< -out $@ \ -tmp /var/tmp -db ${NR} \ -final_align viterbi \ -thresh 0.0001 -thresh 0.0005 -thresh 0.002 -thresh 0.01 ######################################## # Building a t05 alignment from a seed # ######################################## ifdef REDO_T05 T05_DEPEND := ALWAYS else T05_DEPEND= endif ${TARGET}.t05.a2m.gz: ${TARGET}.a2m ${T05_DEPEND} echo "making T05 alignment" ${PCEM_SCRIPTS04}/target05 \ -seed $< -out $@ \ -tmp /var/tmp -db ${NR} ######################################## # Building a t06 alignment from a seed # ######################################## ifdef REDO_T06 T06_DEPEND := ${NR} else T06_DEPEND= endif ${TARGET}.t06.a2m.gz: ${TARGET}.a2m ${T06_DEPEND} echo "making T06 alignment" ${PCEM_SCRIPTS04}/target06 \ -seed $< -out $@ \ -tmp /var/tmp -db ${NR} ######################################## # TARGETS FOR REMOVING FILES TO REMAKE # ######################################## remove-top-reported-alignments: -rm ${TARG_AL}.top_reported_alignments.rdb remove-best-scores: -rm ${TARG_AL}.best-scores.* ################## # SCORING DECOYS # ################## %/read-pdb.under: % -mkdir -p $* -rm $@ cd $*; shopt -s nullglob ; for x in *.pdb* ; do echo ReadConformPDB $$x >> read-pdb.under ; done -chgrp protein $@ -chmod g+w $@ %/read-pdb+proteinshop.under: % -mkdir -p $* -rm $@ cd $*; shopt -s nullglob ; for x in *.pdb* ; do echo ReadConformPDB $$x >> ${subst $*/,,$@} ; done cd $*; shopt -s nullglob ; for x in ProteinShop/*.pdb* ; do \ echo ReadConformPDB $$x >> ${subst $*/,,$@} ; \ y=$${x%.pdb} ; \ z=$${y#ProteinShop/} ; \ echo NameConform $$z >> ${subst $*/,,$@} ; \ done -chgrp protein $@ -chmod g+w $@ %/read-pdb+servers.under: % -mkdir -p $* -rm $@ cd $*; shopt -s nullglob ; for x in ../*.ts-submitted* ; do echo ReadConformPDB $$x >> ${subst $*/,,$@} ; done cd $*; shopt -s nullglob ; for x in *.pdb* ; do echo ReadConformPDB $$x >> ${subst $*/,,$@} ; done cd $*; shopt -s nullglob ; for x in ../*mer/decoys/*.pdb* ; \ do echo ReadConformPDB $$x chain A >> ${subst $*/,,$@} ; \ y=$${x#../} ;\ z=$${y/decoys} ;\ a=$${z/${TARGET}.} ;\ b=$${a%.gz} ;\ c=$${b%.pdb} ;\ echo NameConform $$c >> ${subst $*/,,$@} ; \ done cd $*; shopt -s nullglob ; for x in servers/*.pdb.gz ; do \ echo ReadConformPDB $$x >> ${subst $*/,,$@} ; \ y=$${x%.pdb.gz} ; \ z=$${y#servers/} ; \ echo NameConform $$z >> ${subst $*/,,$@} ; \ echo SCWRLConform >> ${subst $*/,,$@} ; \ echo NameConform $$z-scwrl >> ${subst $*/,,$@} ; \ done -chgrp protein $@ -chmod g+w $@ %/scwrl-predictions.under: %/predictions -mkdir -p $* -rm $@ cd $*; shopt -s nullglob ; for x in predictions/* ; do \ echo ReadConformPDB $$x >> ${subst $*/,,$@} ; \ y=$${x%.pdb.gz} ; \ z=$${y#predictions/} ; \ echo NameConform $$z >> ${subst $*/,,$@} ; \ echo SCWRLConform >> ${subst $*/,,$@} ; \ echo NameConform $$z-scwrl >> ${subst $*/,,$@} ; \ echo PrintConformPDB predictions-scwrl/$$z-scwrl \ >> ${subst $*/,,$@} ; \ done -chgrp protein $@ -chmod g+w $@ %/scwrl-servers.under: %/servers -mkdir -p $* -rm $@ cd $*; shopt -s nullglob ; for x in servers/* ; do \ echo ReadConformPDB $$x >> ${subst $*/,,$@} ; \ y=$${x%.pdb.gz} ; \ z=$${y#servers/} ; \ echo NameConform $$z >> ${subst $*/,,$@} ; \ echo SCWRLConform >> ${subst $*/,,$@} ; \ echo NameConform $$z-scwrl >> ${subst $*/,,$@} ; \ echo PrintConformPDB servers-scwrl/$$z-scwrl \ >> ${subst $*/,,$@} ; \ done -chgrp protein $@ -chmod g+w $@ %/predictions-scwrl: %/predictions %/scwrl-predictions.under -mkdir -p $@ -chgrp protein $@ -chmod g+w $@ umask 002; \ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e s/_domain// \ -e s/read-pdb/scwrl-predictions/ \ -e s/REAL_PDB/${REAL_PDB}/ \ < ${STARTER}/read-decoys.under \ | nice -2 ${UNDERTAKER} %/servers-scwrl: %/servers %/scwrl-servers.under -mkdir -p $@ -chgrp protein $@ -chmod g+w $@ umask 002; \ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e s/_domain// \ -e s/read-pdb/scwrl-servers/ \ -e s/REAL_PDB/${REAL_PDB}/ \ < ${STARTER}/read-decoys.under \ | nice -2 ${UNDERTAKER} %/read-predictions.under: %/predictions %/predictions-scwrl -mkdir -p $* -rm $@ cd $*; shopt -s nullglob ; for x in predictions/*; do \ echo ReadConformPDB $$x >> ${subst $*/,,$@} ; \ y=$${x%.pdb.gz} ; \ z=$${y#predictions/} ; \ echo NameConform $$z >> ${subst $*/,,$@} ; \ done cd $*; shopt -s nullglob ; for x in predictions-scwrl/*; do \ echo ReadConformPDB $$x >> ${subst $*/,,$@} ; \ y=$${x%.pdb.gz} ; \ z=$${y#predictions-scwrl/} ; \ echo NameConform $$z >> ${subst $*/,,$@} ; \ done -chgrp protein $@ -chmod g+w $@ %/read-servers.under: %/servers -mkdir -p $* -rm $@ cd $*; shopt -s nullglob ; for x in servers/*; do \ echo ReadConformPDB $$x >> ${subst $*/,,$@} ; \ y=$${x%.pdb.gz} ; \ z=$${y#servers/} ; \ echo NameConform $$z >> ${subst $*/,,$@} ; \ done -chgrp protein $@ -chmod g+w $@ %/read-servers-scwrl.under: %/servers-scwrl -mkdir -p $* -rm $@ cd $*; shopt -s nullglob ; for x in servers-scwrl/*; do \ echo ReadConformPDB $$x >> ${subst $*/,,$@} ; \ y=$${x%.pdb.gz} ; \ z=$${y#servers-scwrl/} ; \ echo NameConform $$z >> ${subst $*/,,$@} ; \ done -chgrp protein $@ -chmod g+w $@ %/read-predictions-scwrl.under: %/predictions-scwrl -mkdir -p $* -rm $@ cd $*; shopt -s nullglob ; for x in predictions-scwrl/*; do \ echo ReadConformPDB $$x >> ${subst $*/,,$@} ; \ y=$${x%.pdb.gz} ; \ z=$${y#predictions-scwrl/} ; \ echo NameConform $$z >> ${subst $*/,,$@} ; \ done -chgrp protein $@ -chmod g+w $@ decoys/%.pretty: decoys/%.rdb -mv -f $@ $@.old ${CASP7_SCRIPTS}/prettyscore -terse -targpfx -decpoint < $^ > $@ decoys/score-all.%.rdb: %.costfcn decoys/read-pdb.under -mv -f $@ $@.old -rm -f decoys/all.clashes* -rm -f decoys/all.breaks* cat ${STARTER}/score-all.under \ | sed s/START_COL/${START_COL}/g \ | sed -e s/XXX0000/${TARGET}/ -e s/try1/$*/ \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp ${PCB}/sorttbl cost < $@ > sort.tmp mv -f sort.tmp $@ gzip -9f decoys/all.clashes gzip -9f decoys/all.breaks decoys/score-all+proteinshop.%.rdb: %.costfcn decoys/read-pdb+proteinshop.under -mv -f $@ $@.old -rm -f decoys/all.clashes* -rm -f decoys/all.breaks* cat ${STARTER}/score-all.under \ | sed -e s/START_COL/${START_COL}/g \ -e s/XXX0000/${TARGET}/ -e s/try1/$*/ \ -e s/read-pdb/read-pdb+proteinshop/ \ -e s/score-all/score-all+proteinshop/ \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp ${PCB}/sorttbl cost < $@ > sort.tmp mv -f sort.tmp $@ gzip -9f decoys/all.clashes gzip -9f decoys/all.breaks decoys/score-all+servers.%.rdb: %.costfcn decoys/read-pdb+servers.under -mv -f $@ $@.old -rm -f decoys/all.clashes* -rm -f decoys/all.breaks* cat ${STARTER}/score-all.under \ | sed -e s/START_COL/${START_COL}/g \ -e s/XXX0000/${TARGET}/ -e s/try1/$*/ \ -e s/read-pdb/read-pdb+servers/ \ -e s/score-all/score-all+servers/ \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp ${PCB}/sorttbl cost < $@ > sort.tmp mv -f sort.tmp $@ gzip -9f decoys/all.clashes gzip -9f decoys/all.breaks manual_models: if test -e superimpose-best.under; then make best-models.pdb.gz best-models.pictures; fi superimpose-best.under: ${STARTER}/superimpose-best.under cat $^ \ | sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/g \ > $@ # edit superimpose-best.under to pick which models to superimpose best-models.pdb.gz: superimpose-best.under nice -1 ${UNDERTAKER} < superimpose-best.under -gzip -f best-models.pdb best-models.pictures: best-models.pdb.gz best-models.make_jpeg -${CASP7_SCRIPTS}/add_jpeg_views_html \ -basename ${TARGET} \ -rootname best-models \ -explain manual \ >> ${WORKDIR}/summary.html ############################################ # FETCHING ROBETTA MODELS FROM CASP7 CACHE # ############################################ decoys/robetta-model%.pdb.gz: wget -N \ 'http://robetta.bakerlab.org/servlet/robetta.GetModel?m=$*&t=${TARGET}'\ -O ${@:.gz=} if grep 'DOES NOT EXIST' ${@:.gz=} ; then rm ${@:.gz=} ; else gzip -9f ${@:.gz=} ; fi fetch_robetta: $(foreach m,1 2 3 4 5 6 7 8 9 10,decoys/robetta-model${m}.pdb.gz) ############################################### # FETCHING TARBALLS OF ALL SERVER PREDICTIONS # ############################################### fetch_tarball: decoys/${TARGET}.3D.srv.tar.gz decoys/${TARGET}.3D.srv.tar.gz: wget -N \ 'http://www2.predictioncenter.org/tarballs/${TARGET}.3D.srv.tar.gz' \ -O decoys/${TARGET}.3D.srv.tar.gz # The unpacking should probably be done on silo, as native file I/O is # *so much* faster than I/O over the network. unpack_tarball: decoys/${TARGET}.3D.srv.tar.gz -rm -rf decoys/servers tar -x --gunzip -f $^ for x in ${TNUM}/*TS[1-5] ; do mv $$x $$x.pdb; done cd ${TNUM}; gzip -9f *.pdb -mv -f ${TNUM} decoys/servers ###################################### # USING ROSETTA TO REPACK SIDECHAINS # ###################################### # Which version of rosetta to use ifndef ROSETTA # ROSETTA = /projects/compbio/usr/karplus/rosetta ROSETTA = ${PCB_SUB}/rosetta endif paths.txt: ${PCB_SUB}/paths.txt cp -p $^ $@ ifdef DISULF_FILE DISULF_ARGS= -fix_disulf ${DISULF_FILE} -norepack_disulf else DISULF_ARGS= -find_disulf -norepack_disulf endif %.repack.res: %.a2m ${CASP7_SCRIPTS}/make-repack-res-file -start_col ${START_COL} < $^ > $@ %.dimer.repack.res: %.a2m ${CASP7_SCRIPTS}/make-repack-res-file -multimer 2 -start_col ${START_COL} < $^ > $@ # score a file using Rosetta, producing an annotated .score.pdb file # and adding to decoys/%.fasc decoys/%.score.pdb: decoys/%.pdb paths.txt ${ROSETTA} \ -s $< -read_all_chains \ -score -scorefile $* \ -decoystats \ -fa_output -fa_input\ ${DISULF_ARGS} \ -nstruct 1 mv decoys/$*_0001.pdb decoys/$*.score.pdb RR_CONSTRAINTS:= ${CONTACT_PRED_TARGET}.constraints try1.costfcn: if [[ -r ${RR_CONSTRAINTS} ]] ; \ then sed s/XXX0000/${TARGET}/g < ${STARTER}/try1.costfcn > $@; \ else grep -v ${RR_CONSTRAINTS} < ${STARTER}/try1.costfcn | sed s/XXX0000/${TARGET}/g > $@; \ fi unconstrained.costfcn: if [[ -r ${RR_CONSTRAINTS} ]] ; \ then sed s/XXX0000/${TARGET}/g < ${STARTER}/unconstrained.costfcn > $@; \ else grep -v ${RR_CONSTRAINTS} < ${STARTER}/unconstrained.costfcn | sed s/XXX0000/${TARGET}/g > $@; \ fi %.under: ${STARTER}/%.under ${TARGET}.upper-only.a2m cat < $< \ | sed s/XXX0000/${TARGET}/g \ | sed s/START_COL/${START_COL}/g \ > $@ try1.under: make -k ${TARGET}.upper-only.a2m cat < ${STARTER}/try1.under \ | sed s/XXX0000/${TARGET}/g \ | sed s/START_COL/${START_COL}/g \ > $@ # Shorthand for a common request---do an optimization run and repack ${TARGET}.do%: ${MAKE} -k decoys/${TARGET}.try$*-opt2.pdb.gz \ decoys/${TARGET}.try$*-opt2.repack-nonPC.pdb.gz \ decoys/${TARGET}.try$*-opt2.gromacs0.pdb.gz \ decoys/${TARGET}.try$*-opt2.gromacs0.repack-nonPC.pdb.gz \ decoys/grep-best-rosetta \ decoys/read-pdb.under \ decoys/score-all.try$*.rdb decoys/score-all.try$*.pretty -gzip -9f decoys/${TARGET}.try$**.pdb # same as do%, but for multimer targets (unpacks before calling gromacs) ${TARGET}.mult%: ${MAKE} -k decoys/${TARGET}.try$*-opt2.pdb.gz \ decoys/${TARGET}.try$*-opt2.repack-nonPC.pdb.gz \ decoys/${TARGET}.try$*-opt2.unpack.pdb.gz \ decoys/${TARGET}.try$*-opt2.unpack.gromacs0.pdb.gz \ decoys/${TARGET}.try$*-opt2.unpack.gromacs0.repack-nonPC.pdb.gz \ decoys/grep-best-rosetta \ decoys/read-pdb.under \ decoys/score-all.try$*.rdb decoys/score-all.try$*.pretty -gzip -9f decoys/${TARGET}.try$**.pdb # Shorthand for a common repacking request--just make "try12.repack" %.repack: ${MAKE} -k decoys/${TARGET}.$*-opt2.repack-nonPC.pdb.gz decoys/score-all.$*.rdb decoys/score-all.$*.pretty # repack sidechains using Rosetta design mode, not changing # CYS and PRO residues decoys/%.repack-nonPC.pdb.gz: decoys/%.pdb paths.txt \ ${TARGET}.repack.res guide.a2m.gz cp -f $< XXXX.pdb gunzip -c guide.a2m.gz > XXXXA.fasta ${ROSETTA} aa XXXX A \ -s ./XXXX.pdb \ -scorefile $* \ -read_all_chains \ -design -fixbb -resfile ${TARGET}.repack.res \ -fa_output -fa_input \ -ex1 -ex2 -ex34 \ -use_input_sc \ ${DISULF_ARGS} \ -nstruct 1 \ > $*.repack.log 2>&1 -gzip -9f $*.repack.log rm XXXX.pdb XXXXA.fasta mv decoys/XXXX_0001.pdb decoys/$*.repack-nonPC.pdb -gzip -9f decoys/$*.repack-nonPC.pdb # same as above, but starting with gzipped pdb file. decoys/%.repack-nonPC.pdb.gz: decoys/%.pdb.gz paths.txt \ ${TARGET}.repack.res guide.a2m.gz gunzip -c $< > XXXX.pdb gunzip -c guide.a2m.gz > XXXXA.fasta ${ROSETTA} aa XXXX A \ -s ./XXXX.pdb \ -scorefile $* \ -read_all_chains \ -design -fixbb -resfile ${TARGET}.repack.res \ -fa_output -fa_input \ -ex1 -ex2 -ex34 \ -use_input_sc \ ${DISULF_ARGS} \ -nstruct 1 rm XXXX.pdb XXXXA.fasta mv decoys/XXXX_0001.pdb decoys/$*.repack-nonPC.pdb -gzip -9f decoys/$*.repack-nonPC.pdb # repack sidechains using Rosetta design mode, not changing # CYS and PRO residues dimer%.repack-nonPC.pdb.gz: dimer%.pdb paths.txt \ ${TARGET}.dimer.repack.res guide.a2m.gz cp -f $< XXXX.pdb gunzip -c guide.a2m.gz > XXXXA.fasta grep -v '>' ${TARGET}.a2m >> XXXXA.fasta ${ROSETTA} aa XXXX A \ -s XXXX.pdb \ -scorefile $* \ -read_all_chains \ -design -fixbb -resfile ${TARGET}.dimer.repack.res \ -fa_output -fa_input \ -ex1 -ex2 -ex34 \ -use_input_sc \ ${DISULF_ARGS} \ -nstruct 1 \ > dimer$*.repack.log 2>&1 -gzip -9f dimer$*.repack.log rm XXXX.pdb XXXXA.fasta mv decoys/XXXX_0001.pdb dimer$*.repack-nonPC.pdb gzip -9f dimer$*.repack-nonPC.pdb # The following full-atom relax does not seem to be working yet. decoys/%.relax.pdb: decoys/%.pdb paths.txt guide.a2m.gz cp -f $< XXXX.pdb gunzip -c guide.a2m.gz > XXXXA.fasta ${ROSETTA} aa XXXX A \ -s $< -read_all_chains \ -relax -minimize -farlx \ -new_refold \ -scorefile $* \ -fa_output -fa_input \ -nstruct 1 rm XXXX.pdb XXXXA.fasta mv decoys/aa$*_0001.pdb decoys/$*.relax.pdb decoys/grep-best-rosetta: decoys ${CASP7_SCRIPTS}/sort-by-rosetta ########################## # MAKING CASP SUMBISSION # ########################## ifndef CASP_ID CASP_ID := SAM_${SAM_YEAR} endif ###################################### # DSSP_EHL2 merged prediction stuff: # ###################################### %.${AL_METHOD}.dssp-ehl2.rdb: %.${AL_METHOD}.dssp-ebghstl.rdb \ %.${AL_METHOD}.stride-ebghtl.rdb \ %.${AL_METHOD}.str2.rdb \ %.${AL_METHOD}.alpha.rdb ${CASP7_SCRIPTS}/RDBCombine $^ -a ${CASP_ID} > $@ ${TARGET}.dssp-ehl2.rdb: $(foreach AL_METHOD, ${MA_METHODS}, \ ${TARGET}.${AL_METHOD}.dssp-ebghstl.rdb \ ${TARGET}.${AL_METHOD}.stride-ebghtl.rdb \ ${TARGET}.${AL_METHOD}.str2.rdb \ ${TARGET}.${AL_METHOD}.alpha.rdb) ${CASP7_SCRIPTS}/RDBCombine $^ -a ${CASP_ID} > $@ %.dssp-ehl2.seq: %.dssp-ehl2.rdb ${CASP7_SCRIPTS}/seq-from-rdb < $^ > $@ # CASP formatted prediction---may also be useful for EVA or LiveBench ${TARGET}.dssp-ehl2: ${TARGET}.dssp-ehl2.rdb ${CASP7_SCRIPTS}/rdb2casp $^ ${CASP_ID} > $@ ifndef METHOD_FILE METHOD_FILE := ${TARGET}.method endif ifdef MANUAL_TOP_HITS USE_CASP_PARENT := -parent '${MANUAL_TOP_HITS}' else USE_CASP_PARENT := -parent "N/A" endif %.method: ${STARTER}/generic-method-file cp $^ $@ model%.ts: best-models.pdb.gz ${METHOD_FILE} gunzip -c $< \ | ${CASP7_SCRIPTS}/pdb2casp \ -target ${TARGET} -author ${CASP_ID} \ -method ${METHOD_FILE} \ ${USE_CASP_PARENT} casp_models: $(foreach x,1 2 3 4 5, model${x}.ts) ifndef EMAIL_3D_SUBJECT EMAIL_3D_SUBJECT := SAM-${SAM_YEAR} hand ${TARGET} endif model%.email: model%.ts mail -s '${EMAIL_3D_SUBJECT} $^' ${EMAIL_ADDRESS} < $^ -mv -f $^ $^-submitted echo mailing model $* to ${EMAIL_ADDRESS} done email: $(foreach x,1 2 3 4 5, model${x}.email) ##################### # Mailing multimers # ##################### define multimer_to_ts $(1) | \ ${CASP7_SCRIPTS}/pdb2casp \ -target ${TARGET} -author ${CASP_ID} \ -method ${METHOD_FILE} \ -noerase_chain_ids \ ${USE_CASP_PARENT} sed -e 's/TARGET ${TARGET}/TARGET ${TARGET} OLIGOMER/' \ -e 's/^MODEL 1/MODEL $(2)/' \ < model1.ts > dimer$(2).ts rm model1.ts endef # takes an ordinary gzipped model that resulted from a try...run # (must have MODEL records) define modelfullname_to_ts ${MAKE} $(subst pdb,unpack.pdb,$(1)) $(call multimer_to_ts, gunzip -c $(subst pdb,unpack.pdb,$(1)),$(2)) endef # takes an ordinary gzipped model that resulted from a try...run # (must have MODEL records) define model_to_ts $(call modelfullname_to_ts,decoys/${TARGET}.$(1).pdb.gz,$(2)) endef # takes an unpacked (separate chains) multimer that # lacks a model record and converts it to ts format define modelless_to_ts echo 'MODEL 1' > $(1)-tmp gunzip -c $(1).gz >> $(1)-tmp $(call multimer_to_ts,cat $(1)-tmp,$(2)) rm $(1)-tmp endef # In the Makefile, you need to have targets for each of the dimer.ts models: # dimer1.ts: # $(call model_to_ts,try5-opt2,1) # submake, so that you don't need all five targets dimer_models: ${MAKE} -k $(foreach x,1 2 3 4 5, dimer${x}.ts) dimer%.email: dimer%.ts mail -s '${EMAIL_3D_SUBJECT} $^' ${EMAIL_ADDRESS} < $^ -mv -f $^ $^-submitted echo mailing dimer $* to ${EMAIL_ADDRESS} done email_dimers: $(foreach x,1 2 3 4 5, dimer${x}.email) ############################### # Refinement targets for CASP # ############################### REFINE := ${TARGET:T0%=tr%} fetch_refinement: decoys/${REFINE}.pdb.gz decoys/${REFINE}.pdb.gz: -wget -N \ ${CASP_WEBSITE}/${REFINE}.pdb.txt mv ${REFINE}.pdb.txt ${@:.gz=} gzip -9f ${@:.gz=} refine-check: decoys/${REFINE}.pdb.gz \ decoys/${REFINE}.repack-nonPC.pdb.gz \ decoys/${REFINE}.gromacs0.pdb.gz \ decoys/${REFINE}.gromacs0.repack-nonPC.pdb.gz \ decoys/score-all.unconstrained.pretty # edit superimpose-refine.under to pick which models to superimpose refine-models.pdb.gz: superimpose-refine.under nice -1 ${UNDERTAKER} < $^ -gzip -f refine-models.pdb ifndef REFINE_METHOD_FILE REFINE_METHOD_FILE := ${TARGET}.refine_method endif casp_refines: refine-models.pdb.gz gunzip -c $< \ | ${CASP7_SCRIPTS}/pdb2casp \ -prefix refine. \ -target ${TARGET:T0%=TR%} -author ${CASP_ID} \ -method ${REFINE_METHOD_FILE} \ -parent "N/A" ifndef EMAIL_REFINE_SUBJECT EMAIL_REFINE_SUBJECT := SAM-${SAM_YEAR} hand ${TARGET} endif refine%.email: refine.model%.ts mail -s '${EMAIL_REFINE_SUBJECT} $^' ${EMAIL_ADDRESS} < $^ -mv -f $^ $^-submitted echo mailing refine $* to ${EMAIL_ADDRESS} done email_refines: $(foreach x,1 2 3 4 5, refine${x}.email) ################################ # EVALUATING THE FINAL RESULTS # ################################ best-evalue: ${CASP7_SCRIPTS}/best_evalue.pl ${TARGET}.best-scores.rdb > $@ ${TARGET}.real.pdb.gz: ${CASP6}/casp6-real-structures.tar.gz gunzip -c $^ | tar xf - ${TARGET}.pdb if test -s ${TARGET}.pdb ;\ then mv ${TARGET}.pdb ${TARGET}.real.pdb ;\ gzip -9f ${TARGET}.real.pdb ;\ fi decoys/evaluate_%.rdb: decoys/read-pdb+servers.under ${TARGET}_%.real.pdb.gz -rm -f $@ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e s/REAL_PDB/${REAL_PDB}/ \ -e s/_domain/_$*/ \ -e s/read-pdb/read-pdb+servers/ \ -e 's/ReadConformPDBids/# ReadConformPDBids/' \ -e 's/# ReadConformPDB /ReadConformPDB /' \ -e 's/include COSTFCN.costfcn//' \ -e s/.COSTFCN// \ < ${STARTER}/evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp ${PCB}/sorttbl real_cost < $@ > sort.tmp mv -f sort.tmp $@ decoys/evaluate_%.unconstrained.rdb: decoys/read-pdb+servers.under ${TARGET}_%.real.pdb.gz -rm -f $@ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e s/REAL_PDB/${REAL_PDB}/ \ -e s/_domain/_$*/ \ -e s/read-pdb/read-pdb+servers/ \ -e 's/ReadConformPDBids/# ReadConformPDBids/' \ -e 's/# ReadConformPDB /ReadConformPDB /' \ -e s/COSTFCN/unconstrained/ \ < ${STARTER}/evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp ${PCB}/sorttbl real_cost < $@ > sort.tmp mv -f sort.tmp $@ decoys/evaluate.rdb: decoys/read-pdb+servers.under -rm -f $@ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e s/REAL_PDB/${REAL_PDB}/ \ -e s/_domain// \ -e s/read-pdb/read-pdb+servers/ \ -e 's/include COSTFCN.costfcn//' \ -e s/.COSTFCN// \ < ${STARTER}/evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp ${PCB}/sorttbl real_cost < $@ > sort.tmp mv -f sort.tmp $@ evaluate.%.under: ${STARTER}/evaluate.under Makefile sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e s/COSTFCN/$*/ \ -e s/_domain// \ -e s/read-pdb/read-pdb+servers/ \ -e s/REAL_PDB/${REAL_PDB}/ \ < $< > $@ decoys/servers.rosetta.rdb: decoys/read-servers.under find decoys/servers -type f -print \ | xargs ${CASP7_SCRIPTS}/rosetta_score.pl -v > $@ ${PCB}/sorttbl name < $@ > sort.tmp mv -v sort.tmp $@ decoys/predictions.rosetta.rdb: decoys/read-predictions.under find decoys/predictions decoys/predictions-scwrl -type f -print \ | xargs ${CASP7_SCRIPTS}/rosetta_score.pl -v > $@ ${PCB}/sorttbl name < $@ > sort.tmp mv -v sort.tmp $@ decoys/predictions.real-cost.rdb: decoys/read-predictions.under -rm -f $@ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e 's/include COSTFCN.costfcn//g' \ -e s/evaluate.COSTFCN.rdb/real-cost.rdb/g \ -e s/_domain// \ -e s/read-pdb/read-predictions/ \ -e s/REAL_PDB/${REAL_PDB}/ \ < ${STARTER}/predictions.evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp ${PCB}/sorttbl name < $@ > sort.tmp mv -f sort.tmp $@ decoys/predictions.evaluate.%.rdb: %.costfcn decoys/read-predictions.under decoys/predictions.real-cost.rdb -rm -f $@ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e s/SetCost/SetRealCost\\nSetCost/g \ -e s/COSTFCN/$*/g \ -e s/_domain// \ -e s/read-pdb/read-predictions/ \ -e s/REAL_PDB/${REAL_PDB}/ \ < ${STARTER}/predictions.evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f join.$*.pred.$(PID).tmp sort.$*.pred.$(PID).tmp ${PCB}/sorttbl name < $@ > sort.$*.pred.$(PID).tmp (${CASP7_SCRIPTS}/unique_rdb_headers.pl name decoys/predictions.real-cost.rdb < sort.$*.pred.$(PID).tmp) | ${PCB}/jointbl name decoys/predictions.real-cost.rdb > join.$*.pred.$(PID).tmp ${PCB}/sorttbl real_cost < join.$*.pred.$(PID).tmp > sort.$*.pred.$(PID).tmp rm -f join.$*.pred.$(PID).tmp mv -f sort.$*.pred.$(PID).tmp $@ decoys/similarity.predictions.evaluate.%.rdb: decoys/predictions.evaluate.%.rdb decoys/predictions.similarity.rdb ${PCB}/sorttbl name < $< > sort.$*.eval.$(PID).tmp ${PCB}/jointbl name decoys/predictions.similarity.rdb \ < sort.$*.eval.$(PID).tmp > $@ rm -f sort.$*.eval.$(PID).tmp decoys/similarity.servers.evaluate.%.rdb: decoys/servers.evaluate.%.rdb decoys/servers.similarity.rdb ${PCB}/sorttbl name < $< > sort.$*.eval.$(PID).tmp ${PCB}/jointbl name decoys/servers.similarity.rdb \ < sort.$*.eval.$(PID).tmp > $@ rm -f sort.$*.eval.$(PID).tmp decoys/rosetta.predictions.evaluate.%.rdb: decoys/predictions.evaluate.%.rdb decoys/predictions.rosetta.rdb ${PCB}/sorttbl name < $< > sort.$*.eval.$(PID).tmp ${PCB}/jointbl name decoys/predictions.rosetta.rdb \ < sort.$*.eval.$(PID).tmp > $@ rm -f sort.$*.eval.$(PID).tmp decoys/nozhang.%.rdb: decoys/%.rdb grep -v ^Zhang-Server_ $< > $@ decoys/servers.real-cost.rdb: decoys/read-servers.under -rm -f $@ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e 's/include COSTFCN.costfcn//g' \ -e s/evaluate.COSTFCN.rdb/real-cost.rdb/g \ -e s/_domain// \ -e s/read-pdb/read-servers/ \ -e s/REAL_PDB/${REAL_PDB}/ \ < ${STARTER}/servers.evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f sort.real.$(PID).tmp ${PCB}/sorttbl name < $@ > sort.real.$(PID).tmp mv -f sort.real.$(PID).tmp $@ decoys/predictions-scwrl.real-cost.rdb: decoys/read-predictions-scwrl.under -rm -f $@ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e 's/include COSTFCN.costfcn//g' \ -e s/servers.evaluate.COSTFCN.rdb/predictions-scwrl.real-cost.rdb/g \ -e s/_domain// \ -e s/read-pdb/read-predictions-scwrl/ \ -e s/REAL_PDB/${REAL_PDB}/ \ < ${STARTER}/servers.evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f sort.real.$(PID).tmp ${PCB}/sorttbl name < $@ > sort.real.$(PID).tmp mv -f sort.real.$(PID).tmp $@ decoys/servers-scwrl.real-cost.rdb: decoys/read-servers-scwrl.under -rm -f $@ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e 's/include COSTFCN.costfcn//g' \ -e s/servers.evaluate.COSTFCN.rdb/servers-scwrl.real-cost.rdb/g \ -e s/_domain// \ -e s/read-pdb/read-servers-scwrl/ \ -e s/REAL_PDB/${REAL_PDB}/ \ < ${STARTER}/servers.evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f sort.real.$(PID).tmp ${PCB}/sorttbl name < $@ > sort.real.$(PID).tmp mv -f sort.real.$(PID).tmp $@ decoys/predictions.similarity.rdb: -rm -f $@ cp -r decoys/servers /tmp/$(TARGET)-$(PID)-servers gunzip /tmp/$(TARGET)-$(PID)-servers/*.gz $(CASP7_SCRIPTS)/medianSimilarity.pl -f /tmp/$(TARGET)-$(PID)-servers decoys/predictions | sorttbl name > $@ -rm -rf /tmp/$(TARGET)-$(PID)-servers decoys/servers.similarity.rdb: -rm -f $@ cp -r decoys/servers /tmp/$(TARGET)-$(PID)-servers gunzip /tmp/$(TARGET)-$(PID)-servers/*.gz $(CASP7_SCRIPTS)/medianSimilarity.pl -cf /tmp/$(TARGET)-$(PID)-servers /tmp/$(TARGET)-$(PID)-servers | sorttbl name > $@ -rm -rf /tmp/$(TARGET)-$(PID)-servers decoys/servers.sim50.rdb: decoys/similarity.servers.evaluate.everything.rdb -rm -f $@ cp -r decoys/servers /tmp/$(TARGET)-$(PID)-servers gunzip /tmp/$(TARGET)-$(PID)-servers/*.gz mkdir /tmp/$(TARGET)-$(PID)-servers-top50 if $(CASP7_SCRIPTS)/best_evalue_lt.pl $(QA_SOURCE)/$(TARGET).best-scores.rdb 0.0069435; then wt=lev; else wt=hev; fi; \ for m in `($(CASP7_SCRIPTS)/reweight_rdb.pl $(QA_DATA)/align+under+sim-$$wt.weights < $<) | $(CASP7_SCRIPTS)/rdb_best.pl 50`; do cp decoys/servers/$$m* /tmp/$(TARGET)-$(PID)-servers-top50/; done gunzip /tmp/$(TARGET)-$(PID)-servers-top50/*.gz $(CASP7_SCRIPTS)/medianSimilarity.pl -cf /tmp/$(TARGET)-$(PID)-servers-top50 /tmp/$(TARGET)-$(PID)-servers | sorttbl name > $@ -rm -rf /tmp/$(TARGET)-$(PID)-servers /tmp/$(TARGET)-$(PID)-servers-top50 decoys/sim50.servers.evaluate.%.rdb: decoys/servers.evaluate.%.rdb decoys/servers.sim50.rdb ${PCB}/sorttbl name < $< > sort.$*.eval.$(PID).tmp ${PCB}/jointbl name decoys/servers.sim50.rdb \ < sort.$*.eval.$(PID).tmp > $@ rm -f sort.$*.eval.$(PID).tmp decoys/servers.evaluate.%.rdb: %.costfcn decoys/read-servers.under decoys/servers.real-cost.rdb -rm -f $@ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e s/SetCost/SetRealCost\\nSetCost/g \ -e s/COSTFCN/$*/g \ -e s/_domain// \ -e s/read-pdb/read-servers/ \ -e s/REAL_PDB/${REAL_PDB}/ \ < ${STARTER}/servers.evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f join.$*.serv.$(PID).tmp sort.$*.serv.$(PID).tmp ${PCB}/sorttbl name < $@ > sort.$*.serv.$(PID).tmp (${CASP7_SCRIPTS}/unique_rdb_headers.pl name decoys/servers.real-cost.rdb < sort.$*.serv.$(PID).tmp) | ${PCB}/jointbl name decoys/servers.real-cost.rdb > join.$*.serv.$(PID).tmp ${PCB}/sorttbl real_cost < join.$*.serv.$(PID).tmp > sort.$*.serv.$(PID).tmp rm -f join.$*.serv.$(PID).tmp mv -f sort.$*.serv.$(PID).tmp $@ decoys/servers-scwrl.evaluate.%.rdb: %.costfcn decoys/read-servers-scwrl.under decoys/servers-scwrl.real-cost.rdb -rm -f $@ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e s/servers.evaluate.COSTFCN/servers-scwrl.evaluate.$*/ \ -e s/SetCost/SetRealCost\\nSetCost/g \ -e s/COSTFCN/$*/g \ -e s/_domain// \ -e s/read-pdb/read-servers-scwrl/ \ -e s/REAL_PDB/${REAL_PDB}/ \ < ${STARTER}/servers.evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f join.$*.serv.$(PID).tmp sort.$*.serv.$(PID).tmp ${PCB}/sorttbl name < $@ > sort.$*.serv.$(PID).tmp (${CASP7_SCRIPTS}/unique_rdb_headers.pl name decoys/servers-scwrl.real-cost.rdb < sort.$*.serv.$(PID).tmp) | ${PCB}/jointbl name decoys/servers-scwrl.real-cost.rdb > join.$*.serv.$(PID).tmp ${PCB}/sorttbl real_cost < join.$*.serv.$(PID).tmp > sort.$*.serv.$(PID).tmp rm -f join.$*.serv.$(PID).tmp mv -f sort.$*.serv.$(PID).tmp $@ decoys/predictions-scwrl.evaluate.%.rdb: %.costfcn decoys/read-predictions-scwrl.under decoys/predictions-scwrl.real-cost.rdb -rm -f $@ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e s/servers.evaluate.COSTFCN/predictions-scwrl.evaluate.$*/ \ -e s/SetCost/SetRealCost\\nSetCost/g \ -e s/COSTFCN/$*/g \ -e s/_domain// \ -e s/read-pdb/read-predictions-scwrl/ \ -e s/REAL_PDB/${REAL_PDB}/ \ < ${STARTER}/servers.evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f join.$*.serv.$(PID).tmp sort.$*.serv.$(PID).tmp ${PCB}/sorttbl name < $@ > sort.$*.serv.$(PID).tmp (${CASP7_SCRIPTS}/unique_rdb_headers.pl name decoys/predictions-scwrl.real-cost.rdb < sort.$*.serv.$(PID).tmp) | ${PCB}/jointbl name decoys/predictions-scwrl.real-cost.rdb > join.$*.serv.$(PID).tmp ${PCB}/sorttbl real_cost < join.$*.serv.$(PID).tmp > sort.$*.serv.$(PID).tmp rm -f join.$*.serv.$(PID).tmp mv -f sort.$*.serv.$(PID).tmp $@ mqa2.%.under: %.costfcn sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e s/COSTFCN/$*/g \ -e s/_domain// \ -e s/read-pdb/read-servers/ \ -e s/REAL_PDB/${REAL_PDB}/ \ < ${STARTER}/servers.mqa2.under \ > $@ decoys/servers.mqa2.%.rdb: %.costfcn decoys/read-servers.under decoys/servers.real-cost.rdb -rm -f $@ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e s/COSTFCN/$*/g \ -e s/_domain// \ -e s/read-pdb/read-servers/ \ -e s/REAL_PDB/${REAL_PDB}/ \ < ${STARTER}/servers.mqa2.under \ | nice -2 ${UNDERTAKER} -rm -f join.$*.serv.$(PID).tmp sort.$*.serv.$(PID).tmp ${PCB}/sorttbl name < $@ > sort.$*.serv.$(PID).tmp (${CASP7_SCRIPTS}/unique_rdb_headers.pl name decoys/servers.real-cost.rdb < sort.$*.serv.$(PID).tmp) | ${PCB}/jointbl name decoys/servers.real-cost.rdb > join.$*.serv.$(PID).tmp ${PCB}/sorttbl real_cost < join.$*.serv.$(PID).tmp > sort.$*.serv.$(PID).tmp rm -f join.$*.serv.$(PID).tmp mv -f sort.$*.serv.$(PID).tmp $@ decoys/initial.evaluate.%.rdb: %.costfcn -rm -f $@ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e s/COSTFCN/$*/ \ -e s/_domain// \ -e s/REAL_PDB/${REAL_PDB}/ \ < ${STARTER}/initial.evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f sort.$*.eval.$(PID).tmp ${PCB}/sorttbl real_cost < $@ > sort.$*.eval.$(PID).tmp mv -f sort.$*.eval.$(PID).tmp $@ decoys/evaluate.%.rdb: %.costfcn decoys/read-pdb+servers.under -rm -f $@ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e s/COSTFCN/$*/ \ -e s/_domain// \ -e s/read-pdb/read-pdb+servers/ \ -e s/REAL_PDB/${REAL_PDB}/ \ < ${STARTER}/evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f sort.$*.eval.$(PID).tmp ${PCB}/sorttbl real_cost < $@ > sort.$*.eval.$(PID).tmp mv -f sort.$*.eval.$(PID).tmp $@ QA_PREDICTDIR := . MQAO_ID := 3724-8702-5528 MQAU_ID := 7072-3475-1278 MQAC_ID := 2165-1648-9790 SORTTBL = ${PCB}/sorttbl qa-all: SAM-T08-MQAO.qa1 SAM-T08-MQAU.qa1 SAM-T08-MQAC.qa1 SAM-T08-MQAO.qa1: ${QA_PREDICTDIR}/best-evalue decoys/similarity.servers.evaluate.everything.rdb cat decoys/similarity.servers.evaluate.everything.rdb \ | ${CASP7_SCRIPTS}/reweight_rdb.pl ${QA_DATA}/align.weights \ | ${SORTTBL} cost \ | ${CASP7_SCRIPTS}/rdb2qa1.pl -- \ ${MQAO_ID} ${TARGET} 0.03836759 0.90146034 \ > $@; \ SAM-T08-MQAU.qa1: ${QA_PREDICTDIR}/best-evalue decoys/similarity.servers.evaluate.everything.rdb if perl -e "exit not "`cat $<`" < 0.31687" ; then \ cat decoys/similarity.servers.evaluate.everything.rdb \ | ${CASP7_SCRIPTS}/reweight_rdb.pl \ ${QA_DATA}/align+under-lev.weights \ | ${SORTTBL} cost \ | ${CASP7_SCRIPTS}/rdb2qa1.pl -- \ ${MQAU_ID} ${TARGET} 0.01746253 -0.04650613 \ > $@; \ else \ cat decoys/similarity.servers.evaluate.everything.rdb \ | ${CASP7_SCRIPTS}/reweight_rdb.pl \ ${QA_DATA}/align+under-hev.weights \ | ${SORTTBL} cost \ | ${CASP7_SCRIPTS}/rdb2qa1.pl \ -- ${MQAU_ID} ${TARGET} 0.03445373 2.09727401 \ > $@; \ fi SAM-T08-MQAC.qa1: ${QA_PREDICTDIR}/best-evalue decoys/similarity.servers.evaluate.everything.rdb if perl -e "exit not "`cat $<`" < 6.9768e-15" ; then \ cat decoys/similarity.servers.evaluate.everything.rdb \ | ${CASP7_SCRIPTS}/reweight_rdb.pl \ ${QA_DATA}/align+under+sim-lev.weights \ | ${SORTTBL} cost \ | ${CASP7_SCRIPTS}/rdb2qa1.pl -- \ ${MQAC_ID} ${TARGET} 0.04554128 2.34628144 \ > $@; \ else \ cat decoys/similarity.servers.evaluate.everything.rdb \ | ${CASP7_SCRIPTS}/reweight_rdb.pl \ ${QA_DATA}/align+under+sim-hev.weights \ | ${SORTTBL} cost \ | ${CASP7_SCRIPTS}/rdb2qa1.pl -- \ ${MQAC_ID} ${TARGET} 0.04055935 1.79455986 \ > $@; \ fi ############################################# # GROMACS optimization # Sat Jan 22 19:31:50 PST 2005 Kevin Karplus ############################################# %.gromacs0.pdb.gz: %.pdb.gz gunzip -c $^ \ | ${CASP7_SCRIPTS}/run-gromacs -force_field 0 \ -nokeeptmp -tmp /tmp \ | gzip -9 \ > $@ %.gromacs4.pdb.gz: %.pdb.gz gunzip -c $^ \ | ${CASP7_SCRIPTS}/run-gromacs -force_field 4 \ -nokeeptmp -tmp /tmp \ | gzip -9 \ > $@ # Shorthand for a common request: do undertaker then gromacs ${TARGET}.gro%: ${MAKE} -k decoys/${TARGET}.try$*-opt2.pdb.gz \ decoys/${TARGET}.try$*-opt2.gromacs0.pdb.gz \ decoys/${TARGET}.try$*-opt2.gromacs4.pdb.gz \ decoys/score-all.try$*.rdb decoys/score-all.try$*.pretty ifndef MONOMER_LENGTH %.unpack.pdb.gz: echo "can't make $@ without specification of MONOMER_LENGTH" ${TARGET}.multgro%: echo "can't make $@ without specification of MONOMER_LENGTH" endif ifdef MONOMER_LENGTH # Rule for unpacking a single chain into a homo-multimer: %.unpack.pdb.gz: %.pdb.gz gunzip -c $^ \ | ${CASP7_SCRIPTS}/unpack-multimer -length ${MONOMER_LENGTH} \ | gzip > $@ # The multgro target is needed for multimers, # to make sure that gromacs sees separate chains on its inputs, # and to resep ${TARGET}.multgro%: ${MAKE} -k decoys/${TARGET}.try$*-opt2.pdb.gz \ decoys/${TARGET}.try$*-opt2.unpack.pdb.gz \ decoys/${TARGET}.try$*-opt2.unpack.gromacs0.unpack.pdb.gz \ decoys/${TARGET}.try$*-opt2.unpack.gromacs4.unpack.pdb.gz \ decoys/score-all.try$*.rdb decoys/score-all.try$*.pretty endif ############################################################ # # target for selecting among very close templates # ############################################################ # for selecting among close templates, we need to look at the simple # Smith-Waterman scores, not the HMM-based scores, as the HMM may # have drifted away a bit from the original target. # This method just uses blastp on the dunbrack-pdbaa set ${TARGET}.pdb.blast : ${TARGET}.a2m ${PCB_SUB}/blastall -p blastp -d ${PDB_DB} -i $^ \ -e 100 -I -m 9 -o $@ #################################################### # # target for generating constraints from alignments # Added for testing MQA methods, not used in casp7 # #################################################### CASP8 := /projects/compbio/experiments/protein-predict/casp8 CASP8_STARTER := ${CASP8}/starter-directory CASP8_STARTER_SCRIPTS := ${CASP8_STARTER}/scripts all-templates.under: ${CASP8_STARTER_SCRIPTS}/make_all_templates_under \ --target ${TARGET} \ < ${TARGET}.best-scores.rdb \ > $@ log_align.constraints log_align_bonus.constraints \ log_rejected.constraints log_rejected_bonus.constraints \ log_noncontact.constraints log_noncontact_bonus.constraints \ : all-templates.under sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ < ${CASP8_STARTER}/optimize.under \ | ${UNDERTAKER} >& log_optimize-constraints.log log_align.under: sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ < ${CASP8_STARTER}/optimize.under \ > $@ align.constraints align_bonus.constraints \ rejected.constraints rejected_bonus.constraints \ noncontact.constraints noncontact_bonus.constraints \ : all-templates.under sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e s/log_// \ < ${CASP8_STARTER}/optimize.under \ | ${UNDERTAKER} >& optimize-constraints.log # clean up after prediction is over clean: ALWAYS REMAKE echo REMOVING find . -name '*~' -exec rm -f '{}' \; -print find . -name '.*~' -exec rm -f '{}' \; -print find . -name '#*' -exec rm -f '{}' \; -print find . -name '*.pretty.old' -exec rm -f '{}' \; -print find . -name '*.rdb.old' -exec rm -f '{}' \; -print find . -name 'Template.atoms*' -exec rm -f '{}' \; -print echo GZIPPING find . -name '*.log' -exec gzip -9f '{}' \; -print find . -name '*.pdb' -exec gzip -9f '{}' \; -print ${PCB}/fixmode . # fake targets .PSEUDO: ALWAYS REMAKE ALWAYS REMAKE: # End of Make.main # 2007/10/07 - GGS # added logic for rr neural net 647_47. # Uses a different program (predictlocal) # so changes were made to pass an RR_PROG parameter. # Changes to the RR_ARGS to handle the call to the predictor. # Added target for building *.probs files.