# MACROS that need to be set: # TARGET := SET_IN_Makefile # When you are calling this Make.main file, you have to set # TARGET := my_sequence . Then you include Make.main # i.e., do not define TARGET in THIS file, define it in the caller. SAM_YEAR := T08 # SAM_YEAR change this to change the SAM year version. Note that # SAM_YEAR is HARD-CODED in the paths of many scripts, so you # will have to root out every instance of this in order # to change the year. This is an attempt to make less # search-and-replace-ing required in the future. # SITE_TYPE should be set to "test" if this is the test site # and unset if this is the live site # START_COL defaults to 1 # PREFERRED_AL_METHOD defaults to t06 # MANUAL_TOP_HITS which chains to make pairwise alignments for # in extra_alignments and which to make # read_alignments.under files for in read_alignments # (default empty) # if NO_SUMMARY is set, then don't do any of the output to the # summary.html file (useful for updating bits and pieces) # $@ : is the name of the file to be made. # $? : is the names of the changed dependents. # $< : the name of the related file that caused the action. # $* : the prefix shared by target and dependent files. # "What is the first column for numbering alignments and mutual information?" START_COL ?= 1 # export all variables to sub-makes # export # Tue Jun 13 14:41:47 PDT 2006 Kevin Karplus # This "export" was killing the makes with # "execvp: ...: Argument list too long" # error messages. # Commenting it out fixed the problems. # (even if someone turns off export, still export these macros) export TARGET export START_COL # Determine the CASP domains DOMAINS := $(shell perl -e '/T...._(D.)/ && print "$$1\n" foreach <*_D?.pdb>') $(warning DOMAINS are '${DOMAINS}') # Prevent automatic deletion of intermediates, but providing # .SECONDARY with no pre-requisites .SECONDARY: # TNUM is the TARGET missing the first T TNUM := ${subst T,,${TARGET}} PREFERRED_AL_METHOD ?= t06 AL_METHOD ?= ${PREFERRED_AL_METHOD} TARG_AL := ${TARGET}.${AL_METHOD} # TO DO: # Clean up (get rid of?) script make-alignments, # using standard programs to get hit list from *best-scores.rdb # into a list that can be run with $(foreach ...) # the way that extra_alignments now are. # REDO_SEARCHES if set, causes the template library to be # included in the dependencies of the searches. # (if not defined, then template library not in dependencies # REDO_T2K if set, causes NR to be included in the dependencies # for the t2k iterative search, usually triggering rebuilding # the t2k.a2m.gz file. # REDO_T04 if set, causes NR to be included in the dependencies # for the t04 iterative search, usually triggering rebuilding # the t04.a2m.gz file. # REDO_T06 if set, causes NR to be included in the dependencies # for the t04 iterative search, usually triggering rebuilding # the t04.a2m.gz file. # REDO_MI if set, add dependency on correlated_columns executable. # REDO_ALL set all the above redo macros ifdef REDO_ALL REDO_SEARCHES:=1 REDO_T2K:=1 REDO_T04:=1 REDO_T06:=1 REDO_MI:=1 endif # macros that control the t2k iterative search # BLAST_MAX default 10,000, set it smaller for long proteins with many # homologs that take a long time to run. # ADPSTYLE default 5, set it to 1 for long proteins that cause hmmscore # to crash in the final alignment step of target2k # macros that control compression # NOGZIP_PDB if set causes undertaker.pdb file not to be gzipped. # macros used in recursive makes for pairwise alignments: # MANUAL_TOP_HITS which chains to make pairwise alignments for # provided manually # PRED needs to be set to template chain ID in recursive makes for # pairwise alignments # PRED2 first two letters of PRED, now computed # automatically from PRED. # ALIGN_VITERBI if set, causes only ADP=1 and not ADP=5 alignments to # be tried # The following macros are set automatically when making pairwise alignments. # ALIGN_TYPE local or global, set when generating pairwise alignments # ALIGN_NAME # ADP # TWO_TRACK_ADP (used to set ADP for two-track alignments) # SW # FSSP # FSSP2 # STRUCT_ALPH (also used for generating secondary structure predictions) # BURIAL_ALPH (used for 3-track HMMs) # for 2-track alignment # AA_ALIGN_WEIGHT # TR1_ALIGN_WEIGHT # for 3-track alignment # AA_ALIGN_WEIGHT # STRUCT_WEIGHT # BURIAL_WEIGHT PID := ${shell echo $$$$} UNIQ := $(shell echo $$$$)-$(shell hostname) PCEP := /projects/compbio/experiments/protein-predict PCPR := /projects/compbio/programs NR?=/scratch/data/nrp/nr ifeq ($(wildcard ${NR}.*),) NR:=/var/tmp/nrp/nr endif ifeq ($(wildcard ${NR}.*),) NR=/projects/compbio/data/nrp/nr endif # # AUTHOR CODE for CASP8 submissions of hand prediction group # CODE FOR SAM-T08-human (all bogus for CASP9---no hand submission) CASP_ID := 4008-1775-0004 MQAO_ID := 3724-8702-5528 MQAU_ID := 7072-3475-1278 MQAC_ID := 2165-1648-9790 CASP_WEBSITE ?= http://predictioncenter.org CASP_WEBSITE_TARGET ?= ${CASP_WEBSITE}/casp9/target.cgi CASP_WEBSITE_SERVERS_DOWNLOAD ?= ${CASP_WEBSITE}/download_area/CASP9/server_predictions CASP_WEBSITE_PREDICTIONS_DOWNLOAD ?= ${CASP_WEBSITE}/download_area/CASP9/predictions CASP_SUBMIT ?= submit@predictioncenter.org UNAME_M ?= $(shell uname -m) UNAME_P ?= $(shell uname -p) PCL := /projects/compbio/lib PCB := /projects/compbio/bin PCBS := ${PCB}/scripts PCB_SUB := ${PCB}/${UNAME_P} ifeq (${PCB_SUB},/projects/compbio/bin/x86_64) PCB_SUB2 := /projects/compbio/bin/i686 endif SAM_YEAR := T08 SITE_TYPE := test SAM_MAIN := ${PCEP}/SAM_${SAM_YEAR}${SITE_TYPE} BIN_FREEZE := ${SAM_MAIN}/bin_freeze BIN_SAM := ${BIN_FREEZE}/sam FREEZE_SCRIPTS := ${BIN_FREEZE}/scripts CASP9 := /projects/compbio/experiments/protein-predict/casp9 STARTER := ${CASP9}/starter-directory STARTER_NETWORKS := ${STARTER}/networks STARTER_SCRIPTS := ${STARTER}/scripts CASP9_NETWORKS := ${STARTER}/networks CASP9_SCRIPTS := ${STARTER}/scripts QA_DATA := ${STARTER}/qa_data CASP_QA_GROUPS := $(shell cat $(CASP9)/qa_groups.txt) PCEM := /projects/compbio/experiments/models.97 PCEM_SCRIPTS := ${PCEM}/scripts PCEM_SCRIPTS2K := ${PCEM}/scripts2k PCEM_SCRIPTS04 := ${PCEM}/scripts04 PCEM_INDEXES := /projects/compbio/experiments/models.97/indexes EXTRACT := ${PCBS}/extract-from-fasta SORTTBL := ${PCB}/sorttbl JOINTBL := ${PCB}/jointbl ROW := ${PCB}/row MERGETBL := ${PCB}/mergetbl # Sat Apr 12 09:23:48 PDT 2008 Kevin Karplus # temporarily use frozen copy in server, until Richard # installs new version. # BIN_SAM := ${PCB_SUB} FRAGFINDER := ${BIN_SAM}/fragfinder UNDERTAKER_SCRIPTS := /projects/compbio/experiments/undertaker/scripts UNDERTAKER ?= ${PCPR}/undertaker/undertaker UNDERTAKER ?= ${PCB_SUB}/undertaker ifeq ($(wildcard ${UNDERTAKER}*),) UNDERTAKER := ${PCB_SUB2}/undertaker endif RASMOL ?= ${PCB_SUB}/rasmol ifeq ($(wildcard ${RASMOL}*),) RASMOL:= ${PCB_SUB2}/rasmol endif ESTIMATE_DIST ?= ${PCB_SUB}/estimate-dist ifeq ($(wildcard ${ESTIMATE_DIST}*),) ESTIMATE_DIST:= ${PCB_SUB2}/estimate-dist endif FIXMODE ?= ${PCB}/fixmode ADD_INSERTS ?= ${PCEM_SCRIPTS}/add-inserts W0.5 ?= ${PCBS}/w0.5 TARGET2K ?= ${PCEM_SCRIPTS2K}/target2k TARGET04 ?= ${PCEM_SCRIPTS04}/target04 TARGET06 ?= ${PCEM_SCRIPTS04}/target06 # Which version of rosetta to use ROSETTA ?= ${PCB_SUB}/rosetta ifeq ($(wildcard ${ROSETTA}*),) ROSETTA:=${PCB_SUB2}/rosetta endif WORKDIR ?= $(shell pwd) HOST ?= $(shell hostname) $(warning job running on ${HOST}) ifneq "$(findstring kilokluster,${HOST})" "" CLUSTER_HEAD ?= moai-10 else ifneq "$(findstring bmecluster,${HOST})" "" CLUSTER_HEAD ?= bmecluster endif endif MAKE := /usr/bin/gmake HMMSCORE := ${BIN_SAM}/hmmscore A2M2HTML := ${PCBS}/a2m2html SCOP := /projects/compbio/data/scop/dir.cla.scop.txt.gz ANNOTATE :=${STARTER_SCRIPTS}/annotate_target_scores -scop ${SCOP} MAIN_PRINTING_SCRIPT := ${STARTER_SCRIPTS}/handle_summary_html.pl # Note about MAIN_PRINTING_SCRIPT: # handle_summary_html.pl takes in an argument (when called on the command line) # that tells it what action to perform. For example, it used to be that there # was an "add_summary" perl script. Now, however, you call handle_summary_html.pl add_summary # to accomplish the same thing. "add_summary" is the argument to the script. # Additional arguments may also be included, depending on the action being performed. # Check scripts/handle_summary_html.pl to see what actions can be performed. LENGTH ?= $(shell checkseq foo -db ${TARGET}.a2m \ | awk '/AlignColumns/ {print $$2}') $(warning LENGTH= ${LENGTH}) # how many residues wide should each row of the logos be? # Ideally, we'd like this to be computed from the sequence length, # with length<=200 yielding 50 # 200> README date >> README echo "Running on "${HOST} >> README date #create html results page summary_create: ${TARGET}.a2m ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} create_summary_html \ ${TARGET} ${TARGET}.a2m > ${WORKDIR}/summary.html endif #start the inputs section header_inputs: ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_section_head_summary_html \ Inputs >> ${WORKDIR}/summary.html endif #acknowledge sequence receipt #add pointer to sequence file to the html results page receipt_ack: ${TARGET}.a2m echo Received sequence ${TARGET}.a2m ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_html \ "Submitted sequence(s)" \ ${TARGET}.a2m >> ${WORKDIR}/summary.html -${MAIN_PRINTING_SCRIPT} add_summary_html \ "README file" \ README >> ${WORKDIR}/summary.html -${MAIN_PRINTING_SCRIPT} add_summary_html \ "Provided documentation" \ ${TARGET}.doc.html >> ${WORKDIR}/summary.html endif pdb_blast: ${TARGET}.pdb_blast.txt ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_html \ "Quick blastp of non-redundant PDB" \ $^ >> ${WORKDIR}/summary.html endif header_alignment: ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_section_head_summary_html \ 'Multiple alignment' >> ${WORKDIR}/summary.html endif define build_multiple_op -${MAKE} -k AL_METHOD=${AL_METHOD} \ build_multiple_alignment build_pretty build_mod w0.5_logo \ consensus conserved_script 1.small_divider endef build_multiple: ${TARGET}.upper-only.a2m $(foreach AL_METHOD,${MA_METHODS}, $(call build_multiple_op)) build_multiple_alignment: ${TARG_AL}.a2m.gz ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_html \ "SAM_${AL_METHOD} multiple alignment in a2m format" \ $^ >> ${WORKDIR}/summary.html endif build_pretty: ${TARG_AL}.pa.html ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_html \ "SAM_${AL_METHOD} multiple alignment in pretty html format" \ $^ >> ${WORKDIR}/summary.html endif build_mod: ${TARG_AL}.w0.5.mod ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_html \ "SAM_${AL_METHOD} target hidden Markov model" \ $^ >> ${WORKDIR}/summary.html endif w0.5_logo: ${TARG_AL}.w0.5-logo.eps ${TARG_AL}.w0.5-logo.pdf ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_multiple_formats \ "SAM_${AL_METHOD} multiple alignment---sequence logo" \ ${TARG_AL}.w0.5-logo \ eps pdf >> ${WORKDIR}/summary.html endif consensus: ${TARG_AL}.w0.5.maxp ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_html \ "SAM_${AL_METHOD} consensus sequence" \ ${TARG_AL}.w0.5.maxp \ eps pdf >> ${WORKDIR}/summary.html endif conserved_script: conserved_${AL_METHOD} ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_html \ "Script for $* highlighting in rasmol" \ $^ >> ${WORKDIR}/summary.html endif #start the local structure section header_local_structure: ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_section_head_summary_html \ ' Secondary Structure Prediction (Explanation of secondary-structure predictions) ' \ >> ${WORKDIR}/summary.html endif # returns before and after make are important here define do_secondary_op -${MAKE} -k AL_METHOD=${AL} STRUCT_ALPH=${STRUCT_ALPH} do_secondary do_secondary_logo endef # returns before and after make are important here define do_burial_op -${MAKE} -k AL_METHOD=${AL} \ STRUCT_ALPH=${STRUCT_ALPH} STRUCT_ALPH=${STRUCT_ALPH} do_burial_or_secondary \ ${TARGET}.${AL}.${STRUCT_ALPH}.mod do_secondary_logo endef local_structure: $(foreach AL,${MA_METHODS},${TARGET}.${AL}-thin90.a2m.gz) $(foreach STRUCT_ALPH,${PURE_SECONDARY_ALPHABETS}, \ $(foreach AL,${MA_METHODS},$(call do_secondary_op)) \ ${MAKE} 1.small_divider;) -${MAKE} -k MERGE_SECONDARY=1 STRUCT_ALPH=dssp-ehl2 \ do_secondary do_secondary_logo mail_secondary 1.small_divider $(foreach STRUCT_ALPH,${BURIAL_ALPHABETS}, \ $(foreach AL,${MA_METHODS},$(call do_burial_op)) \ ${MAKE} 1.small_divider) ifdef MERGE_SECONDARY SECONDARY_TARGET := ${TARGET}.${STRUCT_ALPH} else SECONDARY_TARGET := ${TARG_AL}.${STRUCT_ALPH} endif # $(warning SECONDARY_TARGET = ${SECONDARY_TARGET}) do_burial_or_secondary: ${SECONDARY_TARGET}.rdb \ ${SECONDARY_TARGET}.seq \ ${SECONDARY_TARGET}-color.rasmol ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_multiple_formats \ "${STRUCT_ALPH} structure prediction" \ ${SECONDARY_TARGET} \ rdb seq \ >> ${WORKDIR}/summary.html -${MAIN_PRINTING_SCRIPT} add_summary_html \ "Script for ${SECONDARY_TARGET} coloring in rasmol" \ ${SECONDARY_TARGET}-color.rasmol >> ${WORKDIR}/summary.html endif ifeq (${AL_METHOD},${PREFERRED_AL_METHOD}) -ln -sf ${SECONDARY_TARGET}-color.rasmol ${COLOR_SCRIPT_SHORT_NAME} endif do_secondary: do_burial_or_secondary \ ${SECONDARY_TARGET}.constraints do_secondary_logo: ${SECONDARY_TARGET}.mod ${SECONDARY_TARGET}-logo.eps ${SECONDARY_TARGET}-logo.pdf ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_multiple_formats \ "${STRUCT_ALPH}---sequence logo" \ ${SECONDARY_TARGET}-logo \ eps pdf \ >> ${WORKDIR}/summary.html endif # CASP e-mail submission EMAIL_ADDRESS ?= ${CASP_SUBMIT} EMAIL_SECONDARY_SUBJECT ?= SAM-${SAM_YEAR} hand ${TARGET} mail_secondary: ${SECONDARY_TARGET} ifdef EMAIL_SECONDARY ifneq (${CLUSTER_HEAD},) ssh ${CLUSTER_HEAD} "cd ${WORKDIR}; mail -s '${EMAIL_SECONDARY_SUBJECT} $^' ${EMAIL_ADDRESS} < $^" else mail -s '${EMAIL_SECONDARY_SUBJECT} $^' ${EMAIL_ADDRESS} < $^ endif echo mailing $^ to ${EMAIL_ADDRESS} done else echo No request to mail ${SECONDARY_TARGET} endif %.small_divider: date ifndef NO_SUMMARY # Prints a small
divider into the file. echo '
' >> ${WORKDIR}/summary.html endif #start the target model scores section header_target_mod_scores: ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_section_head_summary_html \ "Target model scores" >> ${WORKDIR}/summary.html endif 2track_target_mod_scores: \ 2track_t06_target_mod_scores \ 2.small_divider \ 2track_t04_target_mod_scores \ 3.small_divider \ 2track_t2k_target_mod_scores 2track_%_target_mod_scores: -$(foreach x,${SECONDARY_ALPHABETS}, \ ${MAKE} -k TARGET=${TARGET} STRUCT_ALPH=${x} AL_METHOD=$* \ 2track-calibrate;) -${MAKE} -k STRUCT_ALPH=str2 BURIAL_ALPH=near-backbone-11 \ AL_METHOD=$* \ $*-80-60-80-str2+near-backbone-11-scores ${AL_METHOD}-%-scores: ${TARG_AL}-%-scores.rdb \ ${TARG_AL}-%-scores.html ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_html \ "amino acid/$* multi-track target model scores" \ ${TARG_AL}-$*-scores.html >> ${WORKDIR}/summary.html endif 1track_target_mod_scores: -$(foreach d,${MA_METHODS}, \ ${MAKE} -k AL_METHOD=${d} 1track_target_mod_scores_${d};) 1track_target_mod_scores_${AL_METHOD}: ${TARG_AL}.w0.5.mlib \ ${TARG_AL}.w0.5.dist \ ${TARG_AL}-w0.5-scores.rdb \ ${TARG_AL}-w0.5-scores.html ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_html \ "amino acid single-track target model scores of PDB" \ ${TARG_AL}-w0.5-scores.html >> ${WORKDIR}/summary.html endif #start the template model scores section header_template_mod_scores: ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_section_head_summary_html \ "Template model scores" >> ${WORKDIR}/summary.html endif template_mod_scores: $(foreach AL,${MA_METHODS}, ${AL}_template_mod_scores) %_template_mod_scores: ${TARGET}.%-template-lib-scores.rdb \ ${TARGET}.%-template-lib-scores.html ifndef NO_SUMMARY ${MAIN_PRINTING_SCRIPT} add_summary_html \ "Annotated $* template model scores" \ ${TARGET}.$*-template-lib-scores.html >> ${WORKDIR}/summary.html endif #start the top hits section header_top_hits: ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_section_head_summary_html \ "Top Hits" >> ${WORKDIR}/summary.html endif ALL_BEST := $(foreach A,${MA_METHODS},${TARGET}.${A}.best-scores.rdb ) \ ${TARGET}.best-scores.rdb top_hits: -$(foreach d,${MA_METHODS}, \ ${MAKE} -k AL_METHOD=${d} \ ${TARGET}.${d}.best-scores.rdb ${TARGET}.${d}.best-scores.html;) ${MAKE} -k ${TARGET}.best-scores.rdb ${TARGET}.best-scores.html ifndef NO_SUMMARY -$(foreach d,${MA_METHODS}, \ ${MAIN_PRINTING_SCRIPT} add_summary_html \ "Best scoring hits from $d HMMs" \ ${TARGET}.$d.best-scores.html >> ${WORKDIR}/summary.html;) -${MAIN_PRINTING_SCRIPT} add_summary_html \ "Best scoring hits from combining ${MA_METHODS}" \ ${TARGET}.best-scores.html >> ${WORKDIR}/summary.html endif #start the top alignments section header_top_alignments: ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_section_head_summary_html \ "Alignments for Top Hits" >> ${WORKDIR}/summary.html endif # NUM_ALIGN_TEMPLATES specifies the number of templates to include # from each best-scores file when building pairwise alignments. NUM_ALIGN_TEMPLATES ?= 25 NUM_ALIGN_TEMPLATES_PLUS_TWO ?= $(shell perl -e 'print ${NUM_ALIGN_TEMPLATES} + 2') build_top_alignments: ${MAKE} -k ${TARGET}.mod $(foreach R,${ALL_BEST}, \ grep -v '^ ' < ${R} \ | head -${NUM_ALIGN_TEMPLATES_PLUS_TWO} \ > tmp-truncated-${R};) $(foreach R,${ALL_BEST}, ${SORTTBL} Sequence_ID < tmp-truncated-${R} > tmp-sorted-${R};) $(foreach R,${ALL_BEST}, rm tmp-truncated-${R};) ${MERGETBL} Sequence_ID < tmp-sorted-${TARGET}.best-scores.rdb \ tmp-sorted-${TARGET}.t2k.best-scores.rdb >tmp.merged.rdb ${MERGETBL} Sequence_ID < tmp.merged.rdb \ tmp-sorted-${TARGET}.t04.best-scores.rdb >tmp.merged2.rdb ${MERGETBL} Sequence_ID < tmp.merged2.rdb \ tmp-sorted-${TARGET}.t06.best-scores.rdb >tmp.merged3.rdb $(foreach R,${ALL_BEST}, rm tmp-sorted-${R};) ifdef ALIGN_VITERBI ${STARTER_SCRIPTS}/make-alignments ${TARGET} VITERBI_ALIGN=1 < tmp.merged3.rdb else ${STARTER_SCRIPTS}/make-alignments ${TARGET} < tmp.merged3.rdb endif -rm tmp.merged.rdb tmp.merged2.rdb tmp.merged3.rdb show_top_alignments: $(foreach A,${MA_METHODS},${TARGET}.${A}.top_reported_alignments.html \ ${TARGET}.${A}.top_reported_alignments.rdb ) \ ${TARGET}.top_reported_alignments.rdb \ ${TARGET}.top_reported_alignments.html ifndef NO_SUMMARY -$(foreach A,${MA_METHODS}, ${MAIN_PRINTING_SCRIPT} add_summary_html \ "Top alignments from ${A} hits" \ ${TARGET}.${A}.top_reported_alignments.html >> ${WORKDIR}/summary.html ; ) -${MAIN_PRINTING_SCRIPT} add_summary_html \ "Top alignments from combined hits" \ ${TARGET}.top_reported_alignments.html >> ${WORKDIR}/summary.html endif # do recursive make to make sure that the wildcard in all-align.a2m.gz is up to date all_align: -${MAKE} -k all-align.a2m.gz all-align.pa ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_multiple_formats \ "multiple alignment of templates" \ all-align \ a2m.gz pa \ >> ${WORKDIR}/summary.html endif undertaker_start_section: ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_section_head_summary_html \ "Undertaker (3d) files" >> ${WORKDIR}/summary.html endif %.undertaker-align.under: %.top_reported_alignments.rdb ${STARTER_SCRIPTS}/make_undertaker_alignment_list < $^ > $@ undertaker_from_many: $(foreach A,${MA_METHODS},${TARGET}.${A}.undertaker-align.under ) \ ${TARGET}.undertaker-align.under ifndef NO_SUMMARY -$(foreach A,${MA_METHODS}, ${MAIN_PRINTING_SCRIPT} add_summary_html \ "Undertaker input for top ${A} alignments" \ ${TARGET}.${A}.undertaker-align.under >> ${WORKDIR}/summary.html ;) -${MAIN_PRINTING_SCRIPT} add_summary_html \ "Undertaker input for top combined alignments" \ ${TARGET}.undertaker-align.under >> ${WORKDIR}/summary.html endif ifdef NOGZIP_PDB PDBEXT := pdb else PDBEXT := pdb.gz endif undertaker_show_alignment: ${TARGET}.undertaker-align.${PDBEXT} ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_html \ 'PDB file with \ \ model(s) for top alignments' \ $^ >> ${WORKDIR}/summary.html endif #BUG: the .make_jpeg target causes the top_alignments.rdb file to be remade, # which in turn causes the undertaker-align.${PDBEXT} file to be remade. undertaker_pictures: ${TARGET}.undertaker-align.make_jpeg ${STARTER_SCRIPTS}/add_jpeg_views_html \ -basename ${TARGET}.undertaker-align \ -caption "Images of an (incomplete) model created by sidechain replacement on the backbone of the highest scoring template." \ -color_script ehl2 \ >> ${WORKDIR}/summary.html frag_a2m: $(foreach AL_METHOD, ${MA_METHODS}, ${MAKE} AL_METHOD=${AL_METHOD} ${TARGET}.${AL_METHOD}.many.frag.gz;) ifndef NO_SUMMARY $(foreach AL_METHOD, ${MA_METHODS}, \ ${MAIN_PRINTING_SCRIPT} add_summary_html \ "Fragment list for undertaker (from fragfinder)" \ ${TARGET}.${AL_METHOD}.many.frag.gz >> ${WORKDIR}/summary.html; \ ) endif define extra_op -${MAKE} -k PRED=${x} \ single-track-target-alignments \ template-alignments \ two-track-alignments \ three-track-alignments endef extra_alignments: ALWAYS $(foreach x,${MANUAL_TOP_HITS}, $(call extra_op)) read_alignments: \ $(foreach x,${MANUAL_TOP_HITS},${x}/read-alignments-noscwrl.under ${x}/read-alignments-scwrl.under) undertaker_try1: try1.under try1.costfcn costfcn-init.under ${TARGET}.undertaker-align.sheets -${MAKE} ${TARGET}.do1 ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_html \ "First attempted model using undertaker" \ decoys/${TARGET}.try1-opt3.pdb.gz >> ${WORKDIR}/summary.html endif undertaker_try1_pictures: decoys/${TARGET}.try1-opt3.make_jpeg ${STARTER_SCRIPTS}/add_jpeg_views_html \ -basename decoys/${TARGET}.try1-opt3 \ -caption "Images of complete model in decoys/${TARGET}.try1-opt3.pdb.gz" \ >> ${WORKDIR}/summary.html score_all: decoys/score-all.try1.rdb ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_html \ "Undertaker scores for decoys" \ $^ >> ${WORKDIR}/summary.html endif # end html results page summary_end: date ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} end_summary_html \ >> ${WORKDIR}/summary.html endif ${FIXMODE} . ################# # web-interface # ################# %.end_section: date ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} end_section_summary_html \ >> ${WORKDIR}/summary.html endif ${FIXMODE} . ############################################### # common operations, applicable to many files # ############################################### rm_empty: find . -empty -exec rm -f '{}' \; -print rm_tiny: -find . -size -22c -not -type l -not -name best-evalue -exec rm -rf '{}' \; -print %.padded.seq: %.seq ${TARGET}.a2m ${ADD_INSERTS} $^ > $@ # thin to remove just identical sequences. %-thin100.a2m.gz: %.a2m.gz ${BIN_SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 1.00 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ #thin the alignment to 90% sequence identity for use with the neural nets #(which were trained on thinned alignments) %-thin90.a2m.gz: %.a2m.gz ${BIN_SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.90 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ %-thin62.a2m.gz: %.a2m.gz ${BIN_SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.62 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ %-thin50.a2m.gz: %.a2m.gz ${BIN_SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.50 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ %-thin40.a2m.gz: %.a2m.gz ${BIN_SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.40 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ %-thin35.a2m.gz: %.a2m.gz ${BIN_SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.35 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ %-thin30.a2m.gz: %.a2m.gz ${BIN_SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.30 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ #convert an a2m.gz alignment to a human-readable model %.w0.5.mod: %.a2m.gz ${W0.5} $^ $@.tmp -${BIN_SAM}/hmmconvert $*.w0.5 -model_file $@.tmp -rm -f $@.tmp %-w1.0.mod: %.a2m.gz ${PCBS}/w1.0 $^ $@ #compress a file %.gz: % gzip -f $^ #make a pretty-aligned alignment from a compressed a2m alignment %.pa: %.a2m.gz ${BIN_SAM}/prettyalign $^ -m8 -i -n -L3333 > $@ %.pa: %.a2m ${BIN_SAM}/prettyalign $^ -m8 -i -n -L3333 > $@ # Make a2m file with dots. %.dotted-a2m: %.a2m.gz ${BIN_SAM}/prettyalign $^ -f \ | grep -v '^;' > $@ %.dotted-a2m: %.a2m ${BIN_SAM}/prettyalign $^ -f \ | grep -v '^;' > $@ %.pa.html: %.a2m.gz gunzip -c $^ > tmp.a2m ifneq (${CLUSTER_HEAD},) ssh ${CLUSTER_HEAD} 'cd ${WORKDIR}; ${A2M2HTML} -a2m_in tmp.a2m > $@' else ${A2M2HTML} -a2m_in tmp.a2m > $@ endif -rm tmp.a2m %.pa.html: %.a2m ifneq (${CLUSTER_HEAD},) ssh ${CLUSTER_HEAD} 'cd ${WORKDIR}; ${A2M2HTML} -a2m_in $^ > $@' else ${A2M2HTML} -a2m_in $^ > $@ endif ################################## # ANNOTATING A TARGET # ################################## # secondary structure prediction # ################################## # PREDICT_2ND := /cse/faculty/karplus/dna/predict-2nd/predict-2nd PREDICT_2ND := ${PCPR}/predict-2nd/bin/i686/opt/predict-2nd # Tue Jan 1 13:26:11 PST 2008 Kevin Karplus # Changed template database from x-seqs to maxps # to take advantage of consensus-sequence scoring # TEMPLATE_SEQS := ${PCEM_INDEXES}/${AL_METHOD}.x-seqs TEMPLATE_SEQS := ${PCEM_INDEXES}/${AL_METHOD}.w0.5.maxps MIXTURE := ${PCL}/recode3.20comp TRANS_REG := ${PCL}/fssp-trained.regularizer TWOTRACKDBS := ${TEMPLATE_SEQS},${TEMPLATE_STRUCT} ifdef REDO_SEARCHES TWOTRACK_DEPEND := ${TEMPLATE_SEQS} ${TEMPLATE_STRUCT} else TWOTRACK_DEPEND := endif ifdef PREDICT_NET %.${STRUCT_ALPH} %.${STRUCT_ALPH}.rdb %.${STRUCT_ALPH}.seq : %-thin90.a2m.gz ${PREDICT_NET} ifdef STRUCT_ALPH_FILE echo ReadAlphabet ${STRUCT_ALPH_FILE} > tmp.script else echo > tmp.script endif echo ReadNeuralNet ${PREDICT_NET} >> tmp.script echo ReadForPredict $< >> tmp.script echo PrintPredictionFasta $*.${STRUCT_ALPH}.seq >> tmp.script echo PrintRDB $*.${STRUCT_ALPH}.rdb >> tmp.script ${PREDICT_2ND} < tmp.script -rm tmp.script endif %.${STRUCT_ALPH}.constraints: %.${STRUCT_ALPH}.rdb ${STARTER_SCRIPTS}/constraints-from-rdb \ -alphabet ${STRUCT_ALPH} \ -start ${START_COL} < $^ > $@ %.${STRUCT_ALPH}.mod: %.${STRUCT_ALPH}.rdb ${PCEM_SCRIPTS}/2nd-rdb-to-sam-model -alphabet ${SAM_STRUCT_ALPH} $^ $@ EMAX_FOR_HMMS := 90.0 # This target creates the mlib and dist file. CALIBRATE_TARGETS := ${TARG_AL}-w0.5-${AA_SCORE_WEIGHT}-${STRUCT_ALPH}-${TR1_SCORE_WEIGHT}.dist \ ${TARG_AL}-w0.5-${AA_SCORE_WEIGHT}-${STRUCT_ALPH}-${TR1_SCORE_WEIGHT}.mlib \ ${AL_METHOD}-w0.5-${AA_SCORE_WEIGHT}-${STRUCT_ALPH}-${TR1_SCORE_WEIGHT}-scores 2track-calibrate: ${CALIBRATE_TARGETS} date echo $@ done. %-w0.5-${AA_SCORE_WEIGHT}-${STRUCT_ALPH}-${TR1_SCORE_WEIGHT}.dist \ %-w0.5-${AA_SCORE_WEIGHT}-${STRUCT_ALPH}-${TR1_SCORE_WEIGHT}.mlib: \ %.w0.5.mod %.${STRUCT_ALPH}.mod ${TWOTRACK_DEPEND} ifeq ($(wildcard ${TEMPLATE_STRUCT}),) echo ${TEMPLATE_STRUCT} does not exist, so no calibration done. else ${HMMSCORE} $*-w0.5-${AA_SCORE_WEIGHT}-${STRUCT_ALPH}-${TR1_SCORE_WEIGHT} \ -verbose 0 \ -calibrate 1 \ -alphabet protein,${SAM_STRUCT_ALPH} \ -trackmod $*.w0.5.mod,$*.${STRUCT_ALPH}.mod \ -db ${TWOTRACKDBS} \ -trackcoeff ${AA_SCORE_WEIGHT},${TR1_SCORE_WEIGHT} \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 4 -Emax ${EMAX_FOR_HMMS} endif %-scores.rdb: %.dist ${SCOP} ${ANNOTATE} $(subst .rdb,,$(subst ${TARGET}.,,$@)) < $< > $@ # generic pattern ${TARGET}.%-scores.html: ${TARGET}.%-scores.rdb ${SCOP} if grep --silent 'X_CNT' $< ; then \ head -n 500 < $< \ | ${STARTER_SCRIPTS}/annotate_template_scores \ | ${STARTER_SCRIPTS}/oneway_hits_rdb2html $*-scores \ > $@ ; \ else ${STARTER_SCRIPTS}/oneway_hits_rdb2html ${TARGET}.$*-scores < $< > $@ ; \ fi # three-track str2 + CB_BURIAL_14_7 angle stuff: ifeq (${BURIAL_ALPH},CB_burial_14_7) SAM_BURIAL_ALPH :=CB_BURIAL_14_7 TEMPLATE_BURIAL:=${PCEM_INDEXES}/${AL_METHOD}.CB-burial-14-7s endif ifeq (${BURIAL_ALPH},near-backbone-11) SAM_BURIAL_ALPH := NEAR-BACKBONE-11 TEMPLATE_BURIAL:=${PCEM_INDEXES}/${AL_METHOD}.near-backbone-11s endif THREETRACKDBS := ${TEMPLATE_SEQS},${TEMPLATE_STRUCT},${TEMPLATE_BURIAL} ifdef REDO_SEARCHES THREETRACK_DEPEND := ${TEMPLATE_SEQS} ${TEMPLATE_STRUCT} ${TEMPLATE_BURIAL} else THREETRACK_DEPEND := endif # This target creates the mlib and dist file %-80-60-80-${STRUCT_ALPH}+${BURIAL_ALPH}.dist \ %-80-60-80-${STRUCT_ALPH}+${BURIAL_ALPH}.mlib: \ %.w0.5.mod %.${STRUCT_ALPH}.mod %.${BURIAL_ALPH}.mod ${HMMSCORE} $*-80-60-80-${STRUCT_ALPH}+${BURIAL_ALPH} \ -verbose 0 \ -calibrate 1 \ -alphabet protein,${STRUCT_ALPH},${SAM_BURIAL_ALPH} \ -trackmod $*.w0.5.mod,$*.${STRUCT_ALPH}.mod,$*.${BURIAL_ALPH}.mod \ -db ${THREETRACKDBS} \ -trackcoeff 0.8,0.6,0.8 \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 4 -Emax ${EMAX_FOR_HMMS} # This target creates the dist file if the mlib files already exists. %-80-60-80-${STRUCT_ALPH}+${BURIAL_ALPH}.dist: %-80-60-80-${STRUCT_ALPH}+${BURIAL_ALPH}.mlib \ ${THREETRACK_DEPEND} ${HMMSCORE} ${BURIAL_ALPH}foo \ -verbose 0 \ -modellibrary $< \ -db ${THREETRACKDBS} \ -db_size ${LIBSIZE} \ -select_score 4 -Emax ${EMAX_FOR_HMMS} mv -f ${BURIAL_ALPH}foo.1.$@ $@ ########################## # TEMPLATE MODEL SCORING # ########################## ifdef REDO_SEARCHES T2K_TEMPLATE_LIB_DEPEND := ${PCEM_INDEXES}/t2k-w0.5-db.mlib T04_TEMPLATE_LIB_DEPEND := ${PCEM_INDEXES}/t04-w0.5-db.mlib T06_TEMPLATE_LIB_DEPEND := ${PCEM_INDEXES}/t06-w0.5-db.mlib else T2K_TEMPLATE_LIB_DEPEND := T04_TEMPLATE_LIB_DEPEND := T06_TEMPLATE_LIB_DEPEND := endif #template library scores %.t2k-template-lib-scores.rdb: ${T2K_TEMPLATE_LIB_DEPEND} ${TARGET}.t2k.w0.5.maxp ${HMMSCORE} $*.t2k-template-lib \ -verbose 0 \ -modellibrary ${PCEM_INDEXES}/t2k-w0.5-db.mlib \ -db_size ${LIBSIZE} \ -db ${TARGET}.t2k.w0.5.maxp -rdb 1 \ -select_score 4 -emax ${EMAX_FOR_HMMS} grep -v '^[#]' < $*.t2k-template-lib.dist-rdb \ | ${STARTER_SCRIPTS}/shorten_mod_names \ | ${ROW} SEQID eq ${TARGET} \ | ${SORTTBL} EVALUE \ > $@ -rm $*.t2k-template-lib.dist-rdb %.t04-template-lib-scores.rdb: ${T04_TEMPLATE_LIB_DEPEND} ${TARGET}.t04.w0.5.maxp ${HMMSCORE} $*.t04-template-lib \ -verbose 0 \ -modellibrary ${PCEM_INDEXES}/t04-w0.5-db.mlib \ -db_size ${LIBSIZE} \ -db ${TARGET}.t04.w0.5.maxp -rdb 1 \ -select_score 4 -emax ${EMAX_FOR_HMMS} grep -v '^[#]' < $*.t04-template-lib.dist-rdb \ | ${STARTER_SCRIPTS}/shorten_mod_names \ | ${ROW} SEQID eq ${TARGET} \ | ${SORTTBL} EVALUE \ > $@ -rm $*.t04-template-lib.dist-rdb %.t06-template-lib-scores.rdb: ${T06_TEMPLATE_LIB_DEPEND} ${TARGET}.t06.w0.5.maxp ${HMMSCORE} $*.t06-template-lib \ -verbose 0 \ -modellibrary ${PCEM_INDEXES}/t06-w0.5-db.mlib \ -db_size ${LIBSIZE} \ -db ${TARGET}.t06.w0.5.maxp -rdb 1 \ -select_score 4 -emax ${EMAX_FOR_HMMS} grep -v '^[#]' < $*.t06-template-lib.dist-rdb \ | ${STARTER_SCRIPTS}/shorten_mod_names \ | ${ROW} SEQID eq ${TARGET} \ | ${SORTTBL} EVALUE \ > $@ -rm $*.t06-template-lib.dist-rdb ############################## # SINGLE-TRACK TARGET MODELS # ############################## # single-track model pdb scoring # Where to find the PDB sequences. We now use the dunbrack-pdbaa set, # which has identical sequences merged. # PDB_DB := /projects/compbio/data/pdb/all-protein PDB_DB := /projects/compbio/data/pdb/dunbrack-pdbaa ifdef REDO_SEARCHES PDB_LIB_DEPEND := ${PDB_DB} else PDB_LIB_DEPEND := endif # Tue Jan 1 13:30:54 PST 2008 Kevin Karplus # w0.5 search of whole PDB dataset, using original sequences # (not consensus) %.w0.5.mlib %.w0.5.dist: %.w0.5.mod ${PDB_LIB_DEPEND} ${HMMSCORE} $*.w0.5 \ -verbose 0 \ -calibrate 1 \ -i $*.w0.5.mod \ -db ${PDB_DB} \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 4 -Emax ${EMAX_FOR_HMMS} # -trackprior rsdb-comp2.32comp %-w0.5-scores.rdb: %.w0.5.dist ${SCOP} ${ANNOTATE} $*-w0.5 < $< > $@ %-w0.5-scores.html: %-w0.5-scores.rdb ${STARTER_SCRIPTS}/oneway_hits_rdb2html $*-w0.5-scores < $^ > $@ ############# # LOGOS # ############# ${TARGET}.upper-only.a2m : ${TARGET}.a2m sed -e '/>/!s/[ .a-z]//g' <$^ > $@ %.nothin.mod: %.a2m.gz ${BIN_SAM}/modelfromalign $*.nothin -alignfile $^ \ -insert ${TRANS_REG} \ -prior_library ${MIXTURE} \ -binary_output 1 \ -aweight_method 1 -aweight_bits 0.5 -aweight_exponent 10 %.nothin.mod: %.frag.gz ${BIN_SAM}/modelfromalign $*.nothin -alignfile $^ \ -insert ${TRANS_REG} \ -prior_library ${MIXTURE} \ -binary_output 1 \ -aweight_method 1 -aweight_bits 0.5 -aweight_exponent 10 %.nothin-logo.eps %.nothin.saves: %.nothin.mod %.${STRUCT_ALPH}.seq ${TARGET}.upper-only.a2m ${BIN_SAM}/makelogo $*.nothin-logo -i $< \ -logo_start_num ${START_COL} \ -logo_rel_entropy 1 \ -logo_bars_per_line ${LOGO_WIDTH} -logo_title "$* nothin" \ -logo_caption_f $*.${STRUCT_ALPH}.seq \ -logo_under_file ${TARGET}.upper-only.a2m \ -logo_savings_output $*.nothin.saves %.w0.5-logo.eps %.w0.5.saves: %.w0.5.mod %.${STRUCT_ALPH}.seq ${TARGET}.upper-only.a2m ${BIN_SAM}/makelogo $*.w0.5-logo -i $< \ -logo_start_num ${START_COL} \ -logo_rel_entropy 1 \ -logo_bars_per_line ${LOGO_WIDTH} -logo_title "$* w0.5" \ -logo_caption_f $*.${STRUCT_ALPH}.seq \ -logo_under_file ${TARGET}.upper-only.a2m \ -logo_savings_output $*.w0.5.saves %.${STRUCT_ALPH}-logo.eps: %.${STRUCT_ALPH}.mod ${TARGET}.upper-only.a2m ${BIN_SAM}/makelogo $*.${STRUCT_ALPH}-logo -i $< \ -logo_start_num ${START_COL} \ -logo_rel_entropy 1 \ -logo_bars_per_line ${LOGO_WIDTH} -logo_title "$* ${STRUCT_ALPH}" \ -logo_caption_f $*.${STRUCT_ALPH}.seq \ -logo_under_file ${TARGET}.upper-only.a2m \ -logo_color_file ${STRUCT_ALPH_COLOR_FILE} %.${STRUCT_ALPH}-color.rasmol: %.${STRUCT_ALPH}.seq ${STARTER_SCRIPTS}/rasmol_color_from_burial \ -pdb ${TARGET}.blank.pdb.gz \ -start_col ${START_COL} \ -color ${STRUCT_RASMOL_COLOR} \ < $^ > $@ ##################### # Consensus Sequence # ##################### %.maxp: %.saves ${PCEM_SCRIPTS}/get-consensus-seq \ -original ${TARGET}.a2m \ -minbits 0 \ < $^ > $@ ############### # TOP HITS # ############### # define the threshold below which you want hits reported. # If ANY of the methods reports a hit this good, it will be included # in ${TARGET}.best_scores.rdb BEST_EVALUE ?= 1.e-05 # report at least this many hits, even if there are no good evalues. NUM_BEST ?= 20 # report at most this many hits, even if there are more good evalues MAX_NUM_BEST ?= 200 #find the best hits (include dupes) %.${AL_METHOD}.best-scores.rdb: \ %.${AL_METHOD}-template-lib-scores.rdb \ %.${AL_METHOD}-w0.5-scores.rdb ${STARTER_SCRIPTS}/best_scores \ -num ${NUM_BEST} -E ${BEST_EVALUE} -lib_size ${LIBSIZE} \ -maxnum ${MAX_NUM_BEST} \ -scop_file ${SCOP} \ $*.${AL_METHOD}-template-lib-scores.rdb \ $*.${AL_METHOD}-w0.5-*-scores.rdb \ $*.${AL_METHOD}-80-60-80-str2+near-backbone-11-scores.rdb \ > $@ #find the best hits (include dupes) ${TARGET}.best-scores.rdb: $(foreach AL,${MA_METHODS},${TARGET}.${AL}-template-lib-scores.rdb ) ${STARTER_SCRIPTS}/best_scores \ -num ${NUM_BEST} -E ${BEST_EVALUE} -lib_size ${LIBSIZE} \ -maxnum ${MAX_NUM_BEST} \ -scop_file ${SCOP} \ $(foreach AL,${MA_METHODS}, \ ${TARGET}.${AL}-template-lib-scores.rdb \ ${TARGET}.${AL}-w0.5-*-scores.rdb \ ${TARGET}.${AL}-80-60-80-str2+near-backbone-11-scores.rdb ) \ > $@ %.best-scores.html: %.best-scores.rdb ${STARTER_SCRIPTS}/oneway_hits_rdb2html $*.best_hits < $^ > $@ ########################################### # ALIGNMENTS # ########################################### #track models to be used in pairwise alignments TRACKMOD_STRUCT :=${SECONDARY_TARGET}.mod #settings of how many templates to predict vs. number of #alignments to convert to CASP format must be done carefully # need to add error checking so these numbers don't conflict #with each other # how many alignments to select from best templates NUM_ALIGNMENTS ?= 250 #top alignments we report for top_reported NUM_TOP ?= 20 #build an HMM from target sequence only to produce alignments #similar to simple Smith-Waterman. We observe that the #T2K HMMs are so general that they may drift away from the #original seed sequence %.mod: %.a2m ${BIN_SAM}/modelfromalign $* \ -alignfile $^ \ -insert /projects/compbio/lib/fssp-trained.regularizer \ -aweight_bits 0.8 \ -fimtrans -1 \ -fimstrength 1 \ -ins_jump_conf 1 \ -match_jump_conf 1 \ -del_jump_conf 1 \ -binary_output 1 \ -prior_library ${MIXTURE} \ -a2mdots 0 \ -a protein \ -sw 2 -jump_in_prob 0.2 -jump_out_prob 1 \ -aweight_method 1 \ -aweight_exponent 10 # Wed Jan 28 10:52:22 PST 2009 Kevin Karplus # removed dependence on alignment files for top_reported... # ALIGNMENT_FILES = $(wildcard [1-9]*/*a2m*) #get the ${NUM_TOP} best alignments ${TARGET}.top_reported_alignments.rdb: \ ${TARGET}.best-scores.rdb ${ALIGNMENT_FILES} ${STARTER_SCRIPTS}/pick_alignments -target ${TARGET} \ -max_align ${NUM_TOP} -scores_file $< \ > $@ ${TARGET}.%.top_reported_alignments.rdb: \ ${TARGET}.%.best-scores.rdb ${ALIGNMENT_FILES} ${STARTER_SCRIPTS}/pick_alignments -target ${TARGET} \ -max_align ${NUM_TOP} -scores_file $< \ -select_re $* \ > $@ #convert this to html and add to the summary page # Sun Apr 6 11:17:32 PDT 2008 Kevin Karplus # Removed --make_al, since we no longer care about .al files %.top_reported_alignments.html: %.top_reported_alignments.rdb %.best-scores.rdb ifneq (${CLUSTER_HEAD},) ssh ${CLUSTER_HEAD} 'cd ${WORKDIR}; ${STARTER_SCRIPTS}/casp_summary_report_html \ --align $*.top_reported_alignments.rdb \ --target ${TARGET} > $@' else ${STARTER_SCRIPTS}/casp_summary_report_html \ --align $*.top_reported_alignments.rdb \ --target ${TARGET} > $@ endif ifndef NO_CONTACTS ######################################## # CONTACT PREDICTION USING NEURAL NETS # ######################################## # where the residue-residue contact prediction is done: RR_DATA:= ${STARTER}/rr_data RR_NETWORKS := ${STARTER}/rr_data RR_BIN:= ${STARTER}/rr_bin CORR_COLUMNS ?= ${RR_BIN}/correlated-columns VALIDATE2RR ?= ${RR_BIN}/validate2rr # Program name (without directory) for old prediction methods RR_PROG ?= traincontactnn7 # RR_EXT ?= 449a_45 RR_EXT ?= 647_47 # needed for string substitutions comma:= , empty:= space:= ${empty} ${empty} ifeq (${RR_EXT},248_20) RR_AL:=t04 RR_NN_NAME:= logsep.t04.5_ent.burNS_str2.miRvp_entR_pplR.n20.net STAT_LIST:=entraw,mi,omesraw,pplraw_CB8_w STATS:= $(subst ${comma},${empty},${STAT_LIST}) RR_FEATURE_COMMENT:= significance of mutual information, pairwise entropy, pairwise contact propensity, log(separation), predicted burial (near-backbone-11), predicted secondary structure(str2), amino acid profile, log(protein length) THIN:=50 RR_ARGS:= -extra logsep \ -entropy -W 5 -distribution .t04 \ -L 1 -S .t04-CB-burial-14-7.rdb,.t04.str2.rdb -noSummary \ -M 1 -C .t04-thin50.${STATS}.rdb.gz -Cstats mi,entraw,pplraw -Copts R,pRv \ -l 3 RR_DEPENDS:=${TARGET}.t04.w0.5.saves \ ${TARGET}.t04-CB-burial-14-7.rdb ${TARGET}.t04.str2.rdb \ ${TARGET}.t04-thin${THIN}.${STATS}.rdb.gz endif ifeq (${RR_EXT},352_28) RR_AL=t04 RR_NN_NAME:= logseploglen.5xt04_ent.3xnearNS_str2.miRvp_pplR.n28.net STAT_LIST:=entraw,mi,omesraw,pplraw_CB8_w STATS:= $(subst ${comma},${empty},${STAT_LIST}) RR_FEATURE_COMMENT:= significance of mutual information, pairwise contact propensity, log(separation), predicted burial (near-backbone-11), predicted secondary structure(str2), amino acid profile, log(protein length) THIN:=50 RR_ARGS:= -extra logsep,loglen \ -W 5 -distribution .t04 -entropy \ -L 3 -S .t04.near-backbone-11.rdb,.t04.str2.rdb -noSummary \ -M 1 -C .t04-thin${THIN}.${STATS}.rdb.gz -Cstats mi,pplraw -Copts R,pRv \ -l 3 RR_DEPENDS:=${TARGET}.t04.w0.5.saves \ ${TARGET}.t04.near-backbone-11.rdb ${TARGET}.t04.str2.rdb \ ${TARGET}.t04-thin${THIN}.${STATS}.rdb.gz endif ifeq (${RR_EXT},449a_45) RR_AL=t04 RR_NN_NAME:= logseploglen.5xt04_ent.5xnearNS_str2.miRpz_entR_pplR.n45.net STAT_LIST:=entraw,mi,omesraw,pplraw_CB8_w STATS:= $(subst ${comma},${empty},${STAT_LIST}) THIN:=50 RR_FEATURE_COMMENT:= significance of mutual information, pairwise contact propensity, log(separation), predicted burial (near-backbone-11), predicted secondary structure(str2), amino acid profile, log(protein length) RR_ARGS:= -extra logsep,loglen \ -W 5 -distribution .t04 -entropy \ -L 5 -S .t04.near-backbone-11.rdb,.t04.str2.rdb -noSummary \ -M 1 -C .t04-thin${THIN}.${STATS}.rdb.gz -Cstats mi,ent,pplraw -Copts R,pRz \ -l 3 RR_DEPENDS:=${TARGET}.t04.w0.5.saves \ ${TARGET}.t04.near-backbone-11.rdb ${TARGET}.t04.str2.rdb \ ${TARGET}.t04-thin${THIN}.${STATS}.rdb.gz endif ifeq (${RR_EXT},449a_45t2k) RR_AL:=t04 RR_NN_NAME:= logseploglen.5xt04_ent.5xnearNS_str2.miRpz_entR_pplR.n45.net STAT_LIST:=entraw,mi,omesraw,pplraw_CB8_w STATS:= $(subst ${comma},${empty},${STAT_LIST}) THIN:=50 RR_FEATURE_COMMENT:= significance of mutual information, pairwise contact propensity, log(separation), predicted burial (near-backbone-11), predicted secondary structure(str2), amino acid profile, log(protein length) RR_ARGS:= -extra logsep,loglen \ -W 5 -distribution .t2k -entropy \ -L 5 -S .t2k.near-backbone-11.rdb,.t2k.str2.rdb -noSummary \ -M 1 -C .t2k-thin${THIN}.${STATS}.rdb.gz -Cstats mi,ent,pplraw -Copts R,pRz \ -l 3 RR_DEPENDS:=${TARGET}.t2k.w0.5.saves \ ${TARGET}.t2k.near-backbone-11.rdb ${TARGET}.t2k.str2.rdb \ ${TARGET}.t2k-thin${THIN}.${STATS}.rdb.gz endif # if we've defined a neural network, then we're using the old traincontactnn # and the arguments should be finished as follows: ifdef RR_NN_NAME RR_ARGS := RR_ARGS -l 3 -predict ./ -id ${TARGET} endif # Start of new CASP9 rr predictions ifeq (${RR_EXT},647_47) RR_PROG := predictlocal RR_AL=t04 RR_NN_NAME:= con.aa5str2_5near5nsep5.entmi_epplcc.th62.47.net STAT_LIST:=ent,mi_eval,omes,cc,chi2,ppl,pplw STATS:= $(subst ${comma},${empty},${STAT_LIST}) THIN:=62 LENFRAC:=12 RR_ARGS:= -align t04 \ -alphabets aa,str2,near,n_sep -windows 5,5,5,5 \ -paired .${RR_AL}-thin${THIN}.${STATS}.rdb.gz,ent,pR,mi_eval,R,ppl,R,cc,R \ -predict ${TARGET} RR_DEPENDS:=${TARGET}.${RR_AL}.probs \ ${TARGET}.${RR_AL}.near-backbone-11.rdb \ ${TARGET}.${RR_AL}.str2.rdb \ ${TARGET}.${RR_AL}.n_sep.rdb \ ${TARGET}.${RR_AL}-thin${THIN}.${STATS}.rdb.gz endif ifeq (${RR_EXT},730_47) RR_PROG := predictlocal RR_AL:=t04 RR_NN_NAME:= con.aa7str2_7near3nsep5.47.data03.net LENFRAC:=12 RR_ARGS:= -align ${RR_AL} \ -alphabets aa,str2,near,n_sep -windows 7,7,3,5 \ -paired none \ -predict ${TARGET} RR_DEPENDS:=${TARGET}.${RR_AL}.probs \ ${TARGET}.${RR_AL}.near-backbone-11.rdb \ ${TARGET}.${RR_AL}.str2.rdb \ ${TARGET}.${RR_AL}.n_sep.rdb endif ifeq (${RR_EXT},730_47_t06) RR_PROG := predictlocal RR_AL:=t06 RR_NN_NAME:= con.aa7str2_7near3nsep5.47.t06.net LENFRAC:=12 RR_ARGS:= -align ${RR_AL} \ -alphabets aa,str2,near,n_sep -windows 7,7,3,5 \ -paired none \ -predict ${TARGET} RR_DEPENDS:=${TARGET}.${RR_AL}.probs \ ${TARGET}.${RR_AL}.near-backbone-11.rdb \ ${TARGET}.${RR_AL}.str2.rdb \ ${TARGET}.${RR_AL}.n_sep.rdb endif ifeq (${RR_EXT},648_17.730_47) RR_PROG := predictlocal RR_AL=t04 RR_NN_NAME:= con.aa5str2_5near5nsep5.entmi_epplccrr.th62.17.730_47.net RR_FEATURE_COMMENT:= significance of mutual information, pairwise \ contact propensity, log(separation), predicted burial \ (near-backbone-11), predicted secondary structure(str2), predicted \ H-bonds (n_sep), amino acid profile, log(protein length), log(rank of \ 730_47 prediction). Limited to pairs generated by 730_47 FIRST_STAGE:=730_47 FIRST_STAGE_EXT:=${FIRST_STAGE}.rr STAT_LIST:=ent,mi_eval,cc,omes,ppl STATS:= $(subst ${comma},${empty},${STAT_LIST}) THIN:=62 LENFRAC:=11 RR_ARGS:= -align t04 \ -alphabets aa,str2,near,n_sep -windows 5,5,5,5 \ -paired .${RR_AL}-thin${THIN}.${STATS}.rdb.gz,ent,pR,mi_eval,R,ppl,R,cc,R,rr,R \ -predict ${TARGET} RR_DEPENDS:=${TARGET}.${RR_AL}.probs \ ${TARGET}.${RR_AL}.near-backbone-11.rdb \ ${TARGET}.${RR_AL}.str2.rdb \ ${TARGET}.${RR_AL}.n_sep.rdb \ ${TARGET}.${FIRST_STAGE_EXT} \ ${TARGET}.${RR_AL}-thin${THIN}.${STATS}.rdb.gz endif # setup for building first stage, if necessary ifdef FIRST_STAGE FIRST_STAGE_CMD := SetRRPairs ${TARGET}.${FIRST_STAGE_EXT} $(warning First stage -----is ${FIRST_STAGE}) #if we have to, build the first stage! first_stage: ${MAKE} -k AL_METHOD=${RR_AL} RR_EXT=${FIRST_STAGE} \ ${TARGET}.${FIRST_STAGE_EXT} TARGET=${TARGET} endif STAT_LIST2:= $(subst ${comma},${space},${STAT_LIST}) RR_NEURAL_NET:=${RR_NETWORKS}/${RR_NN_NAME} CONTACT_PRED_TARGET := ${TARGET}.${RR_EXT}.rr rr_start_section: ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_section_head_summary_html \ "Contact predictions, based on ${RR_FEATURE_COMMENT}" >> ${WORKDIR}/summary.html endif # The CASP9 predictor will likely use the *.probs %.probs : %.a2m.gz echo Alphabet ExtAA > tmp-$*-probs.script echo ClipWeight 1.0 >> tmp-$*-probs.script echo PushReg /projects/compbio/lib/recode3.20comp >> tmp-$*-probs.script echo SequenceWeight HenikoffWeight 1.0 1.0 >> tmp-$*-probs.script echo ReadA2M $*.a2m.gz >> tmp-$*-probs.script echo PrintProbs $*.probs >> tmp-$*-probs.script echo quit >> tmp-$*-probs.script ${ESTIMATE_DIST} < tmp-$*-probs.script rm -f tmp-$*-probs.script # The prediction requires using the 'traincontactnn7' program # or the new 'predictlocal' program # to build a list of inputs for a specified neural network # to make predictions. Those predictions are sorted by raw score # and the sequence_length*2 best scoring predictions form # the submitted RR predictions. TRAIN_CONTACT_NN := ${RR_BIN}/${RR_PROG} # $(warning RR_DEPENDS= ${RR_DEPENDS}) ${CONTACT_PRED_TARGET} ${CONTACT_PRED_TARGET}.rasmol: \ ${RR_DEPENDS} ${RR_NEURAL_NET} ${TRAIN_CONTACT_NN} ${TRAIN_CONTACT_NN} -start ${START_COL} -i ${RR_NEURAL_NET} ${RR_ARGS} \ | ${VALIDATE2RR} --format raw --network ${RR_NN_NAME} --abbrv ${RR_EXT} \ --author ${CASP_ID} \ --target ${TARGET} --start ${START_COL} \ > ${CONTACT_PRED_TARGET} ${CONTACT_PRED_TARGET}.nn1000.constraints : ${CONTACT_PRED_TARGET} ${RR_BIN}/rr2constraints.py 10.0 0.0 8.5 6.5 ${START_COL} < $^ > $@ # This rather awkard recursive make is to ensure that the process # continues, even if the contact prediction fails. contact_prediction: ${CONTACT_PRED_TARGET} ${CONTACT_PRED_TARGET}.rasmol \ ${CONTACT_PRED_TARGET}.nn1000.constraints contact_predictions: ALWAYS -${MAKE} -k RR_EXT=647_47 contact_prediction rr nn1000.constraints -${MAKE} -k RR_EXT=730_47 contact_prediction -${MAKE} -k RR_EXT=648_17.730_47 contact_prediction add_contact_prediction: contact_predictions add_contact_prediction: contact_prediction ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_summary_multiple_formats \ "contact predictions" \ ${TARGET}.${RR_EXT} \ rr rr.nn1000.constraints rr.rasmol \ >> ${WORKDIR}/summary.html endif add_contact_predictions: ALWAYS -${MAKE} -k RR_EXT=647_47 add_contact_prediction rr nn1000.constraints -${MAKE} -k RR_EXT=730_47 add_contact_prediction -${MAKE} -k RR_EXT=648_17.730_47 add_contact_prediction rr2s rr : ${TARGET}.647_47.rr.rasmol ln -sf $^ $@ rr2s : ${TARGET}.648_17.730_47.rr.rasmol ln -sf $^ $@ nn1000.constraints: ${CONTACT_PRED_TARGET}.nn1000.constraints ln -sf $^ $@ RR_CONSTRAINTS:= ${CONTACT_PRED_TARGET}.constraints %.costfcn: ${STARTER_SCRIPTS}/%.costfcn if [[ -r ${RR_CONSTRAINTS} ]] ; \ then sed s/XXX0000/${TARGET}/g < ${STARTER_SCRIPTS}/$*.costfcn > $@; \ else grep -v ${RR_CONSTRAINTS} < ${STARTER_SCRIPTS}/$*.costfcn | sed s/XXX0000/${TARGET}/g > $@; \ fi # CASP e-mail submission EMAIL_CONTACT_PRED_SUBJECT ?= SAM-${SAM_YEAR} ${TARGET} mail_contact_pred: ${CONTACT_PRED_TARGET} ifdef EMAIL_CONTACT_PRED ifneq (${CLUSTER_HEAD},) ssh ${CLUSTER_HEAD} "cd ${WORKDIR}; mail -s '${EMAIL_CONTACT_PRED_SUBJECT} $^' ${EMAIL_ADDRESS} < $^" else mail -s '${EMAIL_CONTACT_PRED_SUBJECT} $^' ${EMAIL_ADDRESS} < $^ endif echo mailing $^ to ${EMAIL_ADDRESS} done else echo mailing $^ not requested endif ################################################# # CORRELATION STATISTICS FOR CONTACT PREDICTION # ################################################# # # computable strings NOTE: these are based on the # functions defined in CTStatistics and included files # mi:=MutualInformation LogGamma miraw:=MutualInformation mieraw:=MutualInformationEntropy ccraw:=aaCorrCoefficient cc1raw:=CorrCoefficient1 wccraw:=WeightedCorrCoefficient hgraw:=Hypergeometric omesraw:=OMES entraw:=Entropy ppraw_near7_5:=Propensity dunbrack-2191-near7.5-sep9.residue_pairs ppraw_near7:=Propensity dunbrack-2191-near7-sep9.residue_pairs ppraw_CB8:=Propensity ${RR_DATA}/dunbrack-2191-CB8-sep9.residue_pairs pplraw_CB8:=PropensityLog ${RR_DATA}/dunbrack-2191-CB8-sep9.residue_pairs ppraw_CB8_w:=Propensity ${RR_DATA}/dunbrack-2191-CB8-sep9-w.residue_pairs ppraw_CB8_ww:=Propensity ${RR_DATA}/dunbrack-2191-CB8-sep9-ww.residue_pairs pplraw_CB8_w:=PropensityLog ${RR_DATA}/dunbrack-2191-CB8-sep9-w.residue_pairs pplraw_CB8_ww:=PropensityLog ${RR_DATA}/dunbrack-2191-CB8-sep9-ww.residue_pairs # the newest 'keys' for AddStatistics hg:=Hypergeometric omes:=OMES ent:=Entropy mi_eval:=MutualInformation LogGamma pp:=Propensity ${RR_DATA}/dunbrack-2191-CB8-sep9.residue_pairs ppl:=PropensityLog ${RR_DATA}/dunbrack-2191-CB8-sep9.residue_pairs ppw:=Propensity ${RR_DATA}/dunbrack-2191-CB8-sep9-w.residue_pairs ppww:=Propensity ${RR_DATA}/dunbrack-2191-CB8-sep9-ww.residue_pairs pplw:=PropensityLog ${RR_DATA}/dunbrack-2191-CB8-sep9-w.residue_pairs pplww:=PropensityLog ${RR_DATA}/dunbrack-2191-CB8-sep9-ww.residue_pairs cc:=CorrCoefficient1 chi2:=Chi2 # only consider column pairs with at least ${MINPAIR}* num_sequences # sequences that have both columns occupied MINPAIR ?= 0.7 # only consider column pairs with at least ${MINNUMPAIR} sequences # that have both columns occupied MINNUMPAIR ?= 3 # generate ${LENFRAC}*sequence length pairs of columns to feed to neural net LENFRAC ?= 10 RR_AL ?= t04 AL_THIN ?= ${RR_AL}-thin${THIN} build_correlations : ${TARGET}.${AL_THIN}.${STATS}.rdb.gz %.${STATS}.rdb.gz : %.a2m.gz echo Building ${STATS} ${STAT_LIST2} echo SetAlphabet ExtAA > tmp-$*-${STATS}.corr_col echo ReadA2m $< >> tmp-$*-${STATS}.corr_col echo SetFractionOfLen ${LENFRAC} >> tmp-$*-${STATS}.corr_col echo SetMinNumPairs ${MINNUMPAIR} >> tmp-$*-${STATS}.corr_col echo SetMinPairs ${MINPAIR} >> tmp-$*-${STATS}.corr_col echo SetMinSep 7 >> tmp-$*-${STATS}.corr_col -$(foreach i,${STAT_LIST2}, \ echo AddStatistic ${${i}} >> tmp-$*-${STATS}.corr_col ;) echo ${FIRST_STAGE_CMD} >> tmp-$*-${STATS}.corr_col echo MutualInfoAll ${@:.gz=} >> tmp-$*-${STATS}.corr_col ${CORR_COLUMNS} < tmp-$*-${STATS}.corr_col -rm -f tmp-$*-${STATS}.corr_col gzip -9f ${@:.gz=} else # NO_CONTACTS defines rr_start_section: ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_section_head_summary_html \ "Contact predictions suppressed" >> ${WORKDIR}/summary.html endif build_correlations : echo $@ Make target suppressed by NO_CONTACTS=${NO_CONTACTS} contact_predictions: echo $@ Make target suppressed by NO_CONTACTS=${NO_CONTACTS} add_contact_predictions: echo $@ Make target suppressed by NO_CONTACTS=${NO_CONTACTS} mail_contact_pred: echo $@ Make target suppressed by NO_CONTACTS=${NO_CONTACTS} %.costfcn: ${STARTER_SCRIPTS}/%.costfcn cat $^ \ | grep -v 'nn1000.*[.]constraints' \ | sed s/XXX0000/${TARGET}/g \ > $@ endif # NO_CONTACTS ############################# # CONSTRAINTS FROM ALIGNMENTS ############################# constraints_from_alignment: all-templates.under align.constraints ifndef NO_SUMMARY -${MAIN_PRINTING_SCRIPT} add_section_head_summary_html \ "Distance constraints extracted from alignments" >> ${WORKDIR}/summary.html -${MAIN_PRINTING_SCRIPT} add_summary_html \ "Constraints from alignment" \ align.constraints >> ${WORKDIR}/summary.html -${MAIN_PRINTING_SCRIPT} add_summary_html \ "Rejected constraints" \ rejected_bonus.constraints >> ${WORKDIR}/summary.html -${MAIN_PRINTING_SCRIPT} add_summary_html \ "Noncontact constraints" \ noncontact.constraints >> ${WORKDIR}/summary.html endif ############## # FRAGFINDER # ############## FRAGFINDER_SEQS := ${PCEM_INDEXES}/calibration.x-seqs FRAGFINDER_STR2 := ${PCEM_INDEXES}/calibration.str2s FRAGFINDER_CB_BURIAL_14_7 := ${PCEM_INDEXES}/calibration.CB-burial-14-7s FRAGFINDER_NEAR-BACKBONE-11 := ${PCEM_INDEXES}/calibration.near-backbone-11s FRAGFINDER_STR2_TWOTRACKDBS := ${FRAGFINDER_SEQS},${FRAGFINDER_STR2} FRAGFINDER_STR2+CB_BURIAL_14_7_THREETRACKDBS := ${FRAGFINDER_SEQS},${FRAGFINDER_STR2},${FRAGFINDER_CB_BURIAL_14_7} FRAGFINDER_STR2+NEAR-BACKBONE-11_THREETRACKDBS := ${FRAGFINDER_SEQS},${FRAGFINDER_STR2},${FRAGFINDER_NEAR-BACKBONE-11} %.frag.gz: %.w0.5.mod %.str2.mod guide.a2m.gz ${FRAGFINDER} $* \ -a protein,STR2 \ -trackmod $*.w0.5.mod,$*.str2.mod \ -track_coeff 1.0,0.3 \ -db ${FRAGFINDER_STR2_TWOTRACKDBS} \ -firstsequence guide.a2m.gz,- \ -fraglen 9 -numpermatch 6 gzip -9f $*.frag gzip -9f $*.fstat %.many.frag.gz: %.w0.5.mod %.str2.mod %.near-backbone-11.mod guide.a2m.gz ${FRAGFINDER} $*.many \ -alphabet protein,str2,near-backbone-11 \ -trackmod $*.w0.5.mod,$*.str2.mod,$*.near-backbone-11.mod \ -trackcoeff 0.8,0.6,0.8 \ -db ${FRAGFINDER_STR2+NEAR-BACKBONE-11_THREETRACKDBS} \ -firstsequence guide.a2m.gz,-,- \ -fraglen 9 -numpermatch 30 gzip -9f $*.many.frag gzip -9f $*.many.fstat ############## # UNDERTAKER # ############## # will need to create a "decoys" directory before any full 3D building # with undertaker can be done. decoys/${TARGET}.%-opt3.pdb.gz: %.under %.costfcn costfcn-init.under MQA_init.costfcn -mkdir -p decoys nice -5 ${UNDERTAKER} < $*.under > $*.log 2>&1 -gzip -f decoys/${TARGET}.$**pdb -gzip -9f $*.log Template.atoms read-decoys.under: decoys echo "InfilePrefix decoys/"> $@ ls decoys/*${TARGET}*pdb* \ | sed 's;decoys/;ReadConformPDB ;' \ >> $@ echo "InfilePrefix" >> $@ -chgrp protein $@ -chmod g+w $@ %.undertaker-align.${PDBEXT} %.undertaker-align.sheets: %.undertaker-align.under cat ${STARTER_SCRIPTS}/show-align.under \ | sed s/XXX0000.t2k/$*/g \ |sed s/XXX0000/${TARGET}/g \ | sed s/START_COL/${START_COL}/g \ | nice -2 ${UNDERTAKER} >& show-align.log gzip -f show-align.log ifndef NOGZIP_PDB gzip -f $*.undertaker-align.pdb endif CONVERT_200_OPTIONS := -resize 200x200 -quality 85 -frame 1x1 -mattecolor '\#000000' CONVERT_500_OPTIONS := -resize 500x500 -quality 75 -frame 1x1 -mattecolor '\#000000' %.make_jpeg: ${MAKE} -k $*.${PDBEXT} \ $*.view1_200.jpg $*.view2_200.jpg $*.view3_200.jpg \ $*.view1_500.jpg $*.view2_500.jpg $*.view3_500.jpg %.view1_200.jpg %.view2_200.jpg %.view3_200.jpg \ %.view1_500.jpg %.view2_500.jpg %.view3_500.jpg : %.${PDBEXT} ${STARTER_SCRIPTS}/make-eps.rasmol ${RASMOL} -nodisplay $*.${PDBEXT} < ${STARTER_SCRIPTS}/make-eps.rasmol convert ${CONVERT_200_OPTIONS} tmp1.eps $*.view1_200.jpg convert ${CONVERT_200_OPTIONS} tmp2.eps $*.view2_200.jpg convert ${CONVERT_200_OPTIONS} tmp3.eps $*.view3_200.jpg convert ${CONVERT_500_OPTIONS} tmp1.eps $*.view1_500.jpg convert ${CONVERT_500_OPTIONS} tmp2.eps $*.view2_500.jpg convert ${CONVERT_500_OPTIONS} tmp3.eps $*.view3_500.jpg -rm -f tmp*eps %/read-alignments-noscwrl.under: % cd $*; \ ${UNDERTAKER_SCRIPTS}/make-read-fragments.csh \ > read-alignments-noscwrl.under %/read-alignments-scwrl.under: % cd $*; \ ${UNDERTAKER_SCRIPTS}/make-read-fragments.csh SCWRL \ > read-alignments-scwrl.under # for close homology modeling, may want to pick out best scores using # single sequences sw-best: [1-9]* echo 'foreach x ([0-9]*)' > tmp.script echo 'grep -h "$$x " $$x/*SW*dist' >> tmp.script echo 'end' >> tmp.script chmod +x tmp.script csh tmp.script \ | sort -n -k 4 \ | uniq \ > $@ -rm tmp.script guide.a2m.gz: ${TARGET}.upper-only.a2m ${STARTER_SCRIPTS}/extract-guide < $^ \ | gzip > $@ ifdef PRED PRED2 := $(shell echo ${PRED} | sed 's/\(..\).*/\1/') ifdef PRED2 PRED_NOSTRUCT := ${PCEM}/pdb/${PRED2}/${PRED}/nostruct-align PRED_INFO := ${PCEM}/pdb/${PRED2}/${PRED}/info ifdef ALIGN_TYPE ifeq (${ALIGN_TYPE},local) SW=2 endif ifeq (${ALIGN_TYPE},global) SW=0 endif ifeq (${ALIGN_TYPE},simplesw) SW=2 endif endif ifeq (${MASTER},target) ifeq (${ALIGN_TYPE},local) ALIGN_MODEL=${TARG_AL}.w0.5.mod ALIGN_NAME := ${PRED}/${TARGET}-${PRED}-${AL_METHOD}-${ALIGN_TYPE}-adpstyle${ADP} endif ifeq (${ALIGN_TYPE},global) ALIGN_MODEL=${TARG_AL}.w0.5.mod ALIGN_NAME := ${PRED}/${TARGET}-${PRED}-${AL_METHOD}-${ALIGN_TYPE}-adpstyle${ADP} endif ifeq (${ALIGN_TYPE},simplesw) ALIGN_MODEL=${TARGET}.mod ALIGN_NAME := ${PRED}/${TARGET}-${PRED}-${ALIGN_TYPE}-adpstyle${ADP} endif endif ifeq (${MASTER},template) ALIGN_MODEL := ${PRED_NOSTRUCT}/${PRED}.${AL_METHOD}-w0.5.mod ALIGN_NAME := ${PRED}/${PRED}-${TARGET}-${AL_METHOD}-${ALIGN_TYPE}-adpstyle${ADP} endif ifeq (${MASTER},fssp) FSSP_STRUCT := ${PCEM}/pdb/${FSSP2}/${FSSP}/struct-align ALIGN_MODEL := ${FSSP_STRUCT}/${FSSP}.fssp.w0.5.mod ALIGN_NAME := ${PRED}/${FSSP}-${TARGET}-fssp-${ALIGN_TYPE}-adpstyle${ADP} endif ifdef ALIGN_NAME single-track-alignment: ${ALIGN_NAME}.a2m ${ALIGN_NAME}.dist echo $^ made. ${ALIGN_NAME}.dist ${ALIGN_NAME}.a2m: ${ALIGN_MODEL} ${PRED}/${PRED}.seq guide.a2m.gz ${HMMSCORE} ${ALIGN_NAME} \ -verbose 0 \ -alphabet protein -i $< -db guide.a2m.gz \ -db ${PRED}/${PRED}.seq \ -db_size ${LIBSIZE} \ -simple_threshold 10000 \ -sw ${SW} -dpstyle 0 -subtract_null 4 \ -adpstyle ${ADP} \ -select_align 8 endif define single-track-viterbi-op -${MAKE} -k AL_METHOD=${al} ALIGN_TYPE=${at} \ PRED=${PRED} PRED2=${PRED2} \ ADP=1 MASTER=target single-track-alignment endef define single-track-op -${MAKE} -k AL_METHOD=${al} ALIGN_TYPE=${at} \ PRED=${PRED} PRED2=${PRED2} \ ADP=5 MASTER=target single-track-alignment endef # Sun Dec 30 17:45:00 PST 2007 Kevin Karplus # added ALL_ALIGN_TYPES macro, and switched to # doing only local alignments. # ALL_ALIGN_TYPES ?= simplesw local global # Sat Feb 9 08:01:48 PST 2008 Kevin Karplus # put back global alignments ALL_ALIGN_TYPES ?= local global single-track-target-alignments: -mkdir -p ${PRED} test -e ${PRED}/${PRED}.seq -o '!' -e ${PRED_INFO}/${PRED}.stride-mixed.seq \ || cp -p ${PRED_INFO}/${PRED}.stride-mixed.seq ${PRED}/${PRED}.seq test -e ${PRED}/${PRED}.seq \ || ${STARTER_SCRIPTS}/extract-one-seq ${PRED} < ${PDB_DB} > ${PRED}/${PRED}.seq \ || { echo removing rm ${PRED}/${PRED}.seq; rm ${PRED}/${PRED}.seq ;} ifdef ALIGN_VITERBI $(foreach al,${MA_METHODS}, \ $(foreach at,${ALL_ALIGN_TYPES}, $(call single-track-viterbi-op))) else $(foreach al,${MA_METHODS}, \ $(foreach at,${ALL_ALIGN_TYPES}, $(call single-track-op))) endif define template_viterbi_op -grep '${PRED}' ${PCEM_INDEXES}/${al}.ids \ && ( \ ${MAKE} -k AL_METHOD=${al} ALIGN_TYPE=${at} \ PRED=${PRED} PRED2=${PRED2} ADP=1 MASTER=template single-track-alignment \ ) endef define template_align_op -grep '${PRED}' ${PCEM_INDEXES}/${al}.ids \ && (${MAKE} -k AL_METHOD=${al} ALIGN_TYPE=${at} \ PRED=${PRED} PRED2=${PRED2} ADP=5 MASTER=template single-track-alignment) endef template-alignments: -mkdir -p ${PRED} ifdef ALIGN_VITERBI $(foreach al,${MA_METHODS}, \ $(foreach at,${ALL_ALIGN_TYPES}, $(call template_viterbi_op))) else $(foreach al,${MA_METHODS}, \ $(foreach at,${ALL_ALIGN_TYPES}, $(call template_align_op))) endif # Thu May 18 15:58:46 PDT 2006 Kevin Karplus # muscle alignments sometimes take a long time and don't seem very # good, so I've commented them out # -$(foreach al,${MA_METHODS}, \ # grep '${PRED}' ${PCEM_INDEXES}/${al}.ids \ # && ${MAKE} -k ${PRED}/${TARGET}-${PRED}-${al}-muscle.a2m.gz \ # PRED=${PRED} PRED2=${PRED2} ;) ######################################## # FSSP-based alignments ######################################## ifdef FSSP ifdef FSSP2 fssp-template-alignments: -mkdir -p ${PRED} -${MAKE} -k ALIGN_TYPE=local \ PRED=${PRED} PRED2=${PRED2} ADP=5 \ MASTER=fssp FSSP=${FSSP} FSSP2=${FSSP2} single-track-alignment # -${MAKE} -k ALIGN_TYPE=global \ # PRED=${PRED} PRED2=${PRED2} ADP=5 \ # MASTER=fssp FSSP=${FSSP} FSSP2=${FSSP2} single-track-alignment endif endif ######################################## # Two-track target alignments ######################################## STRUCT_WEIGHT ?= 0.3 ifdef STRUCT_ALPH # copy local structure alphabet name to SEQ_ALPH, renaming as needed to # match sequence names in info directories INFO_ALPH := ${STRUCT_ALPH} ifeq (${STRUCT_ALPH},stride-ebghtl) INFO_ALPH := 2d endif ifeq (${STRUCT_ALPH},dssp-ebghstl) INFO_ALPH := dssp endif ifeq (${STRUCT_ALPH},dssp-ehl2) INFO_ALPH := dssp endif ifeq (${STRUCT_ALPH},CB_burial_12_7) INFO_ALPH := CB-burial-12-7 endif ifeq (${STRUCT_ALPH},CB_burial_14_7) INFO_ALPH := CB-burial-14-7 endif AA_ALIGN_WEIGHT ?= 1.0 PRED_SEQ := ${PRED_INFO}/${PRED}.stride-mixed.seq PRED_MAXP := ${PRED_NOSTRUCT}/${PRED}.${AL_METHOD}.w0.5.maxp ifeq ($(wildcard ${PRED_MAXP}*),) # if there is no maxp sequence in the template library, use the real sequence, # which should be there since we do basic_setup for everything # in the dunbrack_pdbaa set PRED_MAXP := ${PRED_SEQ} endif # PRED_MAXP PRED_SEED_PAIR := ${PRED_SEQ},${PRED_MAXP},${PRED_INFO}/${PRED}.stride-mixed.${INFO_ALPH} TWO_ALIGN_NAME := ${PRED}/${TARGET}-${PRED}-${AL_METHOD}-${ALIGN_TYPE}-${STRUCT_ALPH}-${AA_ALIGN_WEIGHT}+${TR1_ALIGN_WEIGHT}-adpstyle${ADP} PRED_HMM:=${PRED_NOSTRUCT}/$PRED.${AL_METHOD}.w0.5.mod PRED_TR1_HMM:=${PRED_NOSTRUCT}/$PRED.${AL_METHOD}.${STRUCT_ALPH}.mod ifeq ($(wildcard ${PRED_TR1_HMM}*),) # there is no local-structure HMM for this template PRED_TR1_HMM:= endif TWO_ALIGN_TEMPLATE_NAME := ${PRED}/${PRED}-${TARGET}-${AL_METHOD}-${ALIGN_TYPE}-${STRUCT_ALPH}-${AA_ALIGN_WEIGHT}+${TR1_ALIGN_WEIGHT}-adpstyle${ADP} ifneq (${PRED_TR1_HMM},) # There is a full template, so go for template alignment also two-track-alignment: \ ${TWO_ALIGN_NAME}.a2m \ ${TWO_ALIGN_NAME}.dist \ ${TWO_ALIGN_TEMPLATE_NAME}.a2m \ ${TWO_ALIGN_TEMPLATE_NAME}.dist echo $^ made. else # No HMM for templae, make only the target alignment two-track-alignment: ${TWO_ALIGN_NAME}.a2m echo $^ made. endif ${TWO_ALIGN_NAME}.dist ${TWO_ALIGN_NAME}.a2m: ${TARG_AL}.w0.5.mod ${TRACKMOD_STRUCT} \ ${SECONDARY_TARGET}.seq -mkdir -p ${PRED} ${HMMSCORE} ${TWO_ALIGN_NAME} \ -verbose 0 \ -alphabet protein,protein,${SAM_STRUCT_ALPH} \ -trackmod ${TARG_AL}.w0.5.mod,${TARG_AL}.w0.5.mod,${TRACKMOD_STRUCT} \ -trackcoeff 0,${AA_ALIGN_WEIGHT},${TR1_ALIGN_WEIGHT} \ -db guide.a2m.gz,${TARG_AL}.w0.5.maxp,${TARG_AL}.${STRUCT_ALPH}.seq \ -db ${PRED_SEED_PAIR} \ -db_size ${LIBSIZE} \ -simple_threshold 10000 \ -sw ${SW} -dpstyle 0 -subtract_null 4 \ -adpstyle ${ADP} \ -select_align 8 ${TWO_ALIGN_TEMPLATE_NAME}.dist ${TWO_ALIGN_TEMPLATE_NAME}.a2m: ${PRED_HMM} ${PRED_TR1_HMM} -mkdir -p ${PRED} ${HMMSCORE} ${TWO_ALIGN_TEMPLATE_NAME} \ -verbose 0 \ -alphabet protein,protein,${SAM_STRUCT_ALPH} \ -trackmod ${PRED_HMM},${PRED_HMM},${PRED_TR1_HMM} \ -trackcoeff 0,${AA_ALIGN_WEIGHT},${TR1_ALIGN_WEIGHT} \ -db guide.a2m.gz,${TARG_AL}.w0.5.maxp,${TARG_AL}.${STRUCT_ALPH}.seq \ -db ${PRED_SEED_PAIR} \ -db_size ${LIBSIZE} \ -simple_threshold 10000 \ -sw ${SW} -dpstyle 0 -subtract_null 4 \ -adpstyle ${ADP} \ -select_align 8 endif # ifdef STRUCT_ALPH ifdef ALIGN_VITERBI TWO_TRACK_ADP=1 else TWO_TRACK_ADP=5 endif # TO DO: # REDUCE number of two-track alignments tried, but # be sure to include some of the ones that worked well in # alignment tests. # (Actually, reducing the number may be a bad idea---we may need more diversity.) # ADD a SAM profile-profile alignment. define two_track_op -${MAKE} -k \ AL_METHOD=${al} \ ALIGN_TYPE=${l} ADP=${TWO_TRACK_ADP} \ PRED=${PRED} PRED2=${PRED2} \ MASTER=target STRUCT_ALPH=${x} \ two-track-alignment endef two-track-alignments: -mkdir -p ${PRED} $(foreach al,${MA_METHODS},$(foreach l,${ALL_ALIGN_TYPES},$(foreach x,str2 near-backbone-11 pb alpha, \ $(call two_track_op)))) # Sat Apr 5 22:29:14 PDT 2008 Kevin Karplus # instead of doing 2-track alignments for all alphabets (as below), just do a few # $(foreach al,${MA_METHODS},$(foreach l,${ALL_ALIGN_TYPES},$(foreach x,${SECONDARY_ALPHABETS}, \ # $(call two_track_op)))) ######################################## # Three-track target alignments ######################################## BURIAL_WEIGHT ?= 0.4 BURIAL_ALPH ?= CB_burial_14_7 BURIAL_INFO_ALPH := ${BURIAL_ALPH} ifeq (${BURIAL_ALPH},CB_burial_14_7) BURIAL_INFO_ALPH := CB-burial-14-7 endif THREE_ALIGN_NAME := ${PRED}/${TARGET}-${PRED}-${AL_METHOD}-${ALIGN_TYPE}-${STRUCT_ALPH}+${BURIAL_ALPH}-${AA_ALIGN_WEIGHT}+${STRUCT_WEIGHT}+${BURIAL_WEIGHT}-adpstyle${ADP} THREE_ALIGN_PRED_SEQS:=${PRED_SEQ},${PRED_MAXP},${PRED_INFO}/${PRED}.stride-mixed.${INFO_ALPH},${PRED_INFO}/${PRED}.stride-mixed.${BURIAL_INFO_ALPH} THREE_ALIGN_TARGET_SEQS := ${TARGET}.a2m,${TARG_AL}.w0.5.maxp,${SECONDARY_TARGET}.seq,${TARG_AL}.${BURIAL_ALPH}.seq three-track-alignment: ${THREE_ALIGN_NAME}.a2m ${THREE_ALIGN_NAME}.dist echo $^ made. ${THREE_ALIGN_NAME}.dist ${THREE_ALIGN_NAME}.a2m: ${TARG_AL}.w0.5.mod \ ${SECONDARY_TARGET}.mod \ ${TARG_AL}.${BURIAL_ALPH}.mod \ ${SECONDARY_TARGET}.seq \ ${TARG_AL}.${BURIAL_ALPH}.seq -mkdir -p ${PRED} -rm ${PRED}/tmp.* ${HMMSCORE} ${PRED}/tmp \ -verbose 0 \ -alphabet protein,protein,${SAM_STRUCT_ALPH},${BURIAL_ALPH} \ -trackmod ${TARG_AL}.w0.5.mod,${TARG_AL}.w0.5.mod,${SECONDARY_TARGET}.mod,${TARG_AL}.${BURIAL_ALPH}.mod \ -trackcoeff 0,${AA_ALIGN_WEIGHT},${STRUCT_WEIGHT},${BURIAL_WEIGHT} \ -db ${THREE_ALIGN_PRED_SEQS} \ -db_size ${LIBSIZE} \ -simple_threshold 1000000 \ -sw ${SW} -dpstyle 0 -subtract_null 4 \ -adpstyle ${ADP} \ -select_align 8 mv ${PRED}/tmp.dist ${THREE_ALIGN_NAME}.dist cat ${TARGET}.a2m ${PRED}/tmp.a2m > ${THREE_ALIGN_NAME}.a2m -rm ${PRED}/tmp.* ifdef ALIGN_VITERBI THREE_TRACK_ADP=1 else THREE_TRACK_ADP=5 endif define three_track_op -${MAKE} -k ALIGN_TYPE=$(6) ADP=${THREE_TRACK_ADP} \ AL_METHOD=${al} \ PRED=${PRED} PRED2=${PRED2} \ MASTER=target STRUCT_ALPH=$(1) BURIAL_ALPH=$(2) \ AA_ALIGN_WEIGHT=$(3) STRUCT_WEIGHT=$(4) BURIAL_WEIGHT=$(5) \ three-track-alignment endef three-track-alignments: -mkdir -p ${PRED} # $(foreach al,${MA_METHODS},$(call three_track_op,str2,CB_burial_14_7,1.0,0.4,0.4,local)) # $(foreach al,${MA_METHODS},$(call three_track_op,str2,CB_burial_14_7,1.0,0.4,0.4,global)) $(foreach al,${MA_METHODS},$(call three_track_op,str2,near-backbone-11,0.8,0.6,0.8,local)) $(foreach al,${MA_METHODS},$(call three_track_op,str2,near-backbone-11,0.8,0.6,0.8,global)) endif #if PRED2 endif #if PRED ####################################### # tree building (not done by default) # ####################################### # programs in non-standard places PHYTREE := /projects/compbio/usr/karplus/src/phytree/phytree DG := /projects/compbio/usr/karplus/src/phytree/dg DTREE := /projects/compbio/usr/karplus/src/phytree/dtree %_sorted.ids %.tree %_sorted.a2m.gz %.phytrace: %.a2m.gz -gunzip -f $*.a2m.gz ${PHYTREE} -f -o -i -r flat $* $*.a2m ${MIXTURE} -gzip -f $*.a2m -gzip -f $*_sorted.a2m -rm $*.phytrace $*.tree_weight %tree.ps: %tree ${DG} $^ %tree-unroot.ps: %tree ${DTREE} $^ # extract the ids stripping off the muldomain-added section. %.bare-ids: %.a2m.gz gunzip -c $^ \ | ${PCBS}/ids-from-fasta -nodom \ >$@ %.full-seqs: %.bare-ids fastacmd -d ${NR} -p T -i $^ -o $@ # Thu Apr 23 11:12:14 PDT 2009 Kevin Karplus # # PSTILL was used during CASP9, but now seems to be producing bigger files than ps2pdf, # (which is still many times larger than distill) # # PSTILL_PATH ?= /usr/local/pstill_dist # ifeq ($(wildcard ${PSTILL_PATH}*),) # PSTILL_PATH:= ${PCPR}/pstill_dist # endif # # %.pdf: %.eps # set PSTILL_PATH=${PSTILL_PATH} ; ${PSTILL_PATH}/pstill -gip -o $@ $^ %.pdf: %.eps ps2pdf -dEPSCrop $^ $@ ################################# # KEY RESIDUES AND CONSERVATION # ################################# # This section is for realignment using key residues and selecting # sequences that have those key residues. ${AL_METHOD}-selected: \ ${TARGET}.${AL_METHOD}.w0.5.key-residues \ ${TARGET}.${AL_METHOD}.selected.a2m.gz ${AL_METHOD}-realign: \ ${TARGET}.${AL_METHOD}.w0.5.key-residues \ ${TARGET}.${AL_METHOD}.selected.a2m.gz \ ${TARGET}.${AL_METHOD}.realign.a2m.gz ${AL_METHOD}-realign.w0.5: \ ${TARGET}.${AL_METHOD}.w0.5.key-residues \ ${TARGET}.${AL_METHOD}.realign.a2m.gz \ ${TARGET}.${AL_METHOD}.realign.w0.5.mod \ ${TARGET}.${AL_METHOD}.realign.w0.5-logo.eps \ ${TARGET}.${AL_METHOD}.realign.w0.5-logo.pdf KEY_MIN_SAVINGS ?= 1.5 KEY_MIN_FREQ ?= 0.04 %.key-residues: %.saves ${PCEM_SCRIPTS2K}/pick-key-residues \ -minbits ${KEY_MIN_SAVINGS} \ -minfreq ${KEY_MIN_FREQ} \ -first_residue ${START_COL} \ <$^ >$@ %.selected.a2m.gz: %.a2m.gz %.w0.5.key-residues gunzip -c $< \ | ${PCEM_SCRIPTS2K}/select-by-key-residues \ -first_residue ${START_COL} \ -residues $*.w0.5.key-residues \ | gzip \ >$@ %.realign.a2m.gz: %.selected.w0.5.mod %.a2m.gz ${HMMSCORE} $*.realign -i $< -db $*.a2m.gz \ -verbose 0 \ -adpstyle 5 -sw 2 -selectalign 8 gzip -8f $*.realign.a2m conserved_%: ${TARGET}.%.w0.5.key-residues ${STARTER_SCRIPTS}/key-to-rasmol \ -set_name conserved_$* \ < $^ > ${TARGET}.$*.conserved.rasmol -ln -sf ${TARGET}.$*.conserved.rasmol $@ ########## # MUSCLE # ########## # profile-profile alignment using Muscle: ${PRED}/${TARGET}-${PRED}-%-muscle.a2m.gz : ${TARGET}.%.a2m.gz ${PCEM_SCRIPTS04}/muscle-profile-profile \ -in $^ \ -in ${PRED_NOSTRUCT}/${PRED}.$*.a2m.gz \ -tmp /var/tmp \ -out $@ # This section is for realignment using Bob Edgar's "muscle" program. %.muscle.gz: %.a2m.gz gunzip -c $^ \ | muscle -maxhours 2.0 \ -out ${@:.gz=} gzip -9f ${@:.gz=} %.muscle.a2m.gz: %.muscle.gz ${PCEM_SCRIPTS04}/a2m_from_muscle -in $^ -out $@ -guide 1 ############# # ALL-ALIGN # ############# # all-align.a2m.gz is an alignment created by merging all the # pairwise alignments into a single multiple-alignment. # This is useful for looking for consensus about alignments. # It may also be useful (after thinning at 100%) as an input # for undertaker. all-align.a2m.gz: $(wildcard [1-9][0-9a-z][0-9a-z][0-9a-z]) $(wildcard [1-9][0-9a-z][0-9a-z][0-9a-z]?) $(foreach dir,$^, ${MAKE} ${dir}/merged-a2m;) ${PCEM_SCRIPTS04}/merge_a2m -guide ${TARGET} -out $@ */merged-a2m %/merged-a2m: % ${PCEM_SCRIPTS04}/merge_a2m -guide ${TARGET} -out $*/merged-a2m \ $(wildcard $*/*.a2m $*/*.a2m.gz) all-align.pa: all-align.a2m.gz ${BIN_SAM}/prettyalign $^ -m5 > $@ ######################################## # Building a t2k alignment from a seed # ######################################## GOS ?= /projects/compbio/data/GOS/GOS ADPSTYLE ?= 5 BLAST_MAX ?= 10000 ifdef REDO_T2K T2K_DEPEND := ${NR} else T2K_DEPEND := endif %.t2k.a2m.gz: %.a2m ${T2K_DEPEND} ${TARGET2K} -out $*.t2k \ -final_adpstyle ${ADPSTYLE} \ -blast_max_report ${BLAST_MAX} \ -db ${NR} \ -seed $< -tmp_dir /var/tmp gzip -f $*.t2k.a2m ######################################## # Building a t04 alignment from a seed # ######################################## ifdef REDO_T04 T04_DEPEND := ${NR} else T04_DEPEND= endif ${TARGET}.t04.a2m.gz: ${TARGET}.a2m ${T04_DEPEND} echo "making T04 alignment" ${TARGET04} \ -seed $< -out $@ \ -tmp /var/tmp -db ${NR} \ -final_align viterbi \ -thresh 0.0001 -thresh 0.0005 -thresh 0.002 -thresh 0.01 ######################################## # Building a t06 alignment from a seed # ######################################## ifdef REDO_T06 T06_DEPEND := ${NR} else T06_DEPEND= endif # Sun Dec 30 17:54:53 PST 2007 Kevin Karplus # Removed search of GOS database (results add too much noise) ${TARGET}.t06.a2m.gz: ${TARGET}.a2m ${T06_DEPEND} echo "making T06 alignment" ${TARGET06} \ -seed $< -out $@ \ -tmp /var/tmp -db ${NR} ######################################## # TARGETS FOR REMOVING FILES TO REMAKE # ######################################## remove-top-reported-alignments: -rm ${TARG_AL}.top_reported_alignments.rdb remove-best-scores: -rm ${TARG_AL}.best-scores.* ################## # SCORING DECOYS # ################## %/read-pdb.under: % -mkdir -p $* -rm $@ cd $*; shopt -s nullglob ; for x in *.pdb* ; do echo ReadConformPDB $$x >> read-pdb.under ; done -chgrp protein $@ -chmod g+w $@ %/read-pdb+proteinshop.under: % -mkdir -p $* -rm $@ cd $*; shopt -s nullglob ; for x in *.pdb* ; do echo ReadConformPDB $$x >> ${subst $*/,,$@} ; done cd $*; shopt -s nullglob ; for x in ProteinShop/*.pdb* ; do \ y=$${x%.pdb} ; \ z=$${y#ProteinShop/} ; \ echo ReadConformPDB $$x name $$z >> ${subst $*/,,$@} ; \ done -chgrp protein $@ -chmod g+w $@ %/read-pdb+servers.under: % -mkdir -p $* -rm $@ cd $*; shopt -s nullglob ; for x in ../*.ts-submitted* ; do echo ReadConformPDB $$x >> ${subst $*/,,$@} ; done cd $*; shopt -s nullglob ; for x in *.pdb* ; do echo ReadConformPDB $$x >> ${subst $*/,,$@} ; done cd $*; shopt -s nullglob ; for x in ../*mer/decoys/*.pdb* ; \ do \ y=$${x#../} ; \ z=$${y/decoys} ; \ a=$${z/${TARGET}.} ; \ b=$${a%.gz} ; \ c=$${b%.pdb} ; \ echo ReadConformPDB $$x chain A name $$c >> ${subst $*/,,$@} ; \ done cd $*; shopt -s nullglob ; for x in servers/* ; do \ y=$${x%.pdb.gz} ; \ z=$${y#servers/} ; \ echo ReadConformPDB $$x name $$z >> ${subst $*/,,$@} ; \ echo SCWRLConform >> ${subst $*/,,$@} ; \ done -chgrp protein $@ -chmod g+w $@ %/read-pdb+servers-noscwrl.under: % -mkdir -p $* -rm $@ cd $*; shopt -s nullglob ; for x in ../*.ts-submitted* ; do echo ReadConformPDB $$x >> ${subst $*/,,$@} ; done cd $*; shopt -s nullglob ; for x in *.pdb* ; do echo ReadConformPDB $$x >> ${subst $*/,,$@} ; done cd $*; shopt -s nullglob ; for x in ../*mer/decoys/*.pdb* ; \ do \ y=$${x#../} ; \ z=$${y/decoys} ; \ a=$${z/${TARGET}.} ; \ b=$${a%.gz} ; \ c=$${b%.pdb} ; \ echo ReadConformPDB $$x chain A name $$c >> ${subst $*/,,$@} ; \ done cd $*; shopt -s nullglob ; for x in servers/* ; do \ y=$${x%.pdb.gz} ; \ z=$${y#servers/} ; \ echo ReadConformPDB $$x name $$z >> ${subst $*/,,$@} ; \ done -chgrp protein $@ -chmod g+w $@ %.pretty: %.rdb -mv -f $@ $@.old ${STARTER_SCRIPTS}/prettyscore -terse -targpfx -decpoint < $^ > $@ decoys/score-all.%.rdb: %.costfcn decoys/read-pdb.under costfcn-init.under -mv -f $@ $@.old -rm -f decoys/all.clashes* -rm -f decoys/all.breaks* cat ${STARTER_SCRIPTS}/score-all.under \ | sed s/START_COL/${START_COL}/g \ | sed -e s/XXX0000/${TARGET}/ -e s/try1/$*/ \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp ${SORTTBL} cost < $@ > sort.tmp mv -f sort.tmp $@ gzip -9f decoys/all.clashes gzip -9f decoys/all.breaks decoys/score-all+proteinshop.%.rdb: %.costfcn decoys/read-pdb+proteinshop.under costfcn-init.under -mv -f $@ $@.old -rm -f decoys/all.clashes* -rm -f decoys/all.breaks* cat ${STARTER_SCRIPTS}/score-all.under \ | sed -e s/START_COL/${START_COL}/g \ -e s/XXX0000/${TARGET}/ -e s/try1/$*/ \ -e s/read-pdb/read-pdb+proteinshop/ \ -e s/score-all/score-all+proteinshop/ \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp ${SORTTBL} cost < $@ > sort.tmp mv -f sort.tmp $@ gzip -9f decoys/all.clashes gzip -9f decoys/all.breaks decoys/score-all+servers.%.rdb: %.costfcn decoys/read-pdb+servers.under costfcn-init.under -mv -f $@ $@.old -rm -f decoys/all.clashes* -rm -f decoys/all.breaks* cat ${STARTER_SCRIPTS}/score-all.under \ | sed -e s/START_COL/${START_COL}/g \ -e s/XXX0000/${TARGET}/ -e s/try1/$*/ \ -e s/read-pdb/read-pdb+servers/ \ -e s/score-all/score-all+servers/ \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp ${SORTTBL} cost < $@ > sort.tmp mv -f sort.tmp $@ gzip -9f decoys/all.clashes gzip -9f decoys/all.breaks decoys/score-all+servers-noscwrl.%.rdb: %.costfcn decoys/read-pdb+servers-noscwrl.under costfcn-init.under -mv -f $@ $@.old -rm -f decoys/all.clashes* -rm -f decoys/all.breaks* cat ${STARTER_SCRIPTS}/score-all.under \ | sed -e s/START_COL/${START_COL}/g \ -e s/XXX0000/${TARGET}/ -e s/try1/$*/ \ -e s/read-pdb/read-pdb+servers-noscwrl/ \ -e s/score-all/score-all+servers-noscwrl/ \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp ${SORTTBL} cost < $@ > sort.tmp mv -f sort.tmp $@ gzip -9f decoys/all.clashes gzip -9f decoys/all.breaks manual_models: if test -e superimpose-best.under; then make best-models.pdb.gz best-models.pictures; fi # edit superimpose-best.under to pick which models to superimpose best-models.pdb.gz: superimpose-best.under nice -1 ${UNDERTAKER} < superimpose-best.under -gzip -f best-models.pdb best-models.pictures: best-models.pdb.gz best-models.make_jpeg -${STARTER_SCRIPTS}/add_jpeg_views_html \ -basename ${TARGET} \ -rootname best-models \ -explain manual \ >> ${WORKDIR}/summary.html ############################################ # FETCHING ROBETTA MODELS FROM CASP7 CACHE # ############################################ decoys/robetta-model%.pdb.gz: wget -N \ 'http://robetta.bakerlab.org/servlet/robetta.GetModel?m=$*&t=${TARGET}' \ -O ${@:.gz=} if grep 'DOES NOT EXIST' ${@:.gz=} ; then rm ${@:.gz=} ; else gzip -9f ${@:.gz=} ; fi fetch_robetta: $(foreach m,1 2 3 4 5 6 7 8 9 10,decoys/robetta-model${m}.pdb.gz) ############################################### # FETCHING TARBALLS OF ALL SERVER PREDICTIONS # # This is the CASP7 Method, and now obsolete. # ############################################### fetch_tarball: decoys/${TARGET}.3D.srv.tar.gz decoys/${TARGET}.3D.srv.tar.gz: wget -N \ 'http://www2.predictioncenter.org/tarballs/${TARGET}.3D.srv.tar.gz' \ -O decoys/${TARGET}.3D.srv.tar.gz # The unpacking should probably be done on silo, as native file I/O is # *so much* faster than I/O over the network. unpack_tarball: decoys/${TARGET}.3D.srv.tar.gz -rm -rf decoys/servers tar -x --gunzip -f $^ for x in ${TNUM}/*TS[1-5] ; do mv $$x $$x.pdb; done cd ${TNUM}; gzip -9f *.pdb -mv -f ${TNUM} decoys/servers ############################################### # FETCHING TARBALLS OF ALL SERVER PREDICTIONS # This is the CASP9 method. # (.pdb is no longer added to file names) ############################################### server_download decoys/servers: ${TARGET}.3D.srv.tar.gz -(rm -rf decoys/servers || mv decoys/servers decoys/servers.TRASH) umask 2; \ cd decoys \ ; gunzip -c ../$^ \ | tar xvf - mv decoys/${TARGET} decoys/servers -gzip -9f decoys/servers/*.pdb touch decoys/servers ${TARGET}.3D.srv.tar.gz: -wget -N '${CASP_WEBSITE_SERVERS_DOWNLOAD}/$@' -O $@ if [ ! -s $@ ]; then rm $@; false; fi decoys/predictions: ${TARGET}-predictions.tar.gz -(rm -rf decoys/predictions || mv decoys/predictions decoys/predictions.TRASH) umask 2 \ ; cd decoys \ ; gunzip -c ../$^ \ | tar xvf - mv decoys/${TARGET} decoys/predictions -gzip -9f decoys/predictions/${TARGET}*_[1-5] touch decoys/predictions ${TARGET}-predictions.tar.gz: -wget -N '${CASP_WEBSITE_PREDICTIONS_DOWNLOAD}/${TARGET}.tar.gz' -O $@ if [ ! -s $@ ]; then rm $@; false; fi ###################################### # USING ROSETTA TO REPACK SIDECHAINS # ###################################### PATHS_TXT?=${PCB_SUB}/paths.txt ifeq ($(wildcard ${PATHS_TXT}*),) PATHS_TXT:=${PCB_SUB2}/paths.txt endif paths.txt: ${PATHS_TXT} cp -p $^ $@ ifdef DISULF_FILE DISULF_ARGS= -fix_disulf ${DISULF_FILE} -norepack_disulf else DISULF_ARGS= -find_disulf -norepack_disulf endif %.repack.res: %.a2m ${STARTER_SCRIPTS}/make-repack-res-file -start_col ${START_COL} < $^ > $@ %.dimer.repack.res: %.a2m ${STARTER_SCRIPTS}/make-repack-res-file -multimer 2 -start_col ${START_COL} < $^ > $@ # score a file using Rosetta, producing an annotated .score.pdb file # and adding to decoys/%.fasc decoys/%.score.pdb: decoys/%.pdb paths.txt ${ROSETTA} \ -s $< -read_all_chains \ -score -scorefile $* \ -decoystats \ -fa_output -fa_input \ ${DISULF_ARGS} \ -nstruct 1 mv decoys/$*_0001.pdb decoys/$*.score.pdb # If the target is a refinement target, what is the corresponding # simple target? (just the target, if not a refinment target) T0_from_TR := ${TARGET:TR%=T0%} # Shorthand for a common request---do an optimization run and repack ${TARGET}.do%: ${MAKE} -k decoys/${TARGET}.try$*-opt3.pdb.gz \ decoys/${TARGET}.try$*-opt3.repack-nonPC.pdb.gz \ decoys/${TARGET}.try$*-opt3.gromacs0.pdb.gz \ decoys/${TARGET}.try$*-opt3.gromacs0.repack-nonPC.pdb.gz \ decoys/grep-best-rosetta \ decoys/read-pdb.under \ decoys/score-all.try$*.rdb decoys/score-all.try$*.pretty -gzip -9f decoys/${TARGET}.try$**.pdb # same as do%, but for multimer targets (unpacks before calling gromacs) ${TARGET}.mult%: ${MAKE} -k decoys/${TARGET}.try$*-opt3.pdb.gz \ decoys/${TARGET}.try$*-opt3.repack-nonPC.pdb.gz \ decoys/${TARGET}.try$*-opt3.unpack.pdb.gz \ decoys/${TARGET}.try$*-opt3.unpack.gromacs0.pdb.gz \ decoys/${TARGET}.try$*-opt3.unpack.gromacs0.repack-nonPC.pdb.gz \ decoys/grep-best-rosetta \ decoys/read-pdb.under \ decoys/score-all.try$*.rdb decoys/score-all.try$*.pretty -gzip -9f decoys/${TARGET}.try$**.pdb # Shorthand for a common repacking request--just make "try12.repack" %.repack: ${MAKE} -k decoys/${TARGET}.$*-opt3.repack-nonPC.pdb.gz decoys/score-all.$*.rdb decoys/score-all.$*.pretty # repack sidechains using Rosetta design mode, not changing # CYS and PRO residues decoys/%.repack-nonPC.pdb.gz: decoys/%.pdb paths.txt \ ${TARGET}.repack.res guide.a2m.gz ${DISULF_FILE} -mkdir $*.tmp -mkdir $*.tmp/decoys cp -f $^ $*.tmp cp -f $< $*.tmp/XXXX.pdb gunzip -c guide.a2m.gz > $*.tmp/XXXXA.fasta cd $*.tmp \ ; ${ROSETTA} aa XXXX A \ -s ./XXXX.pdb \ -scorefile $* \ -read_all_chains \ -design -fixbb -resfile ${TARGET}.repack.res \ -fa_output -fa_input \ -ex1 -ex2 -ex34 \ -use_input_sc \ ${DISULF_ARGS} \ -nstruct 1 \ > $*.repack.log 2>&1 mv $*.tmp/decoys/XXXX_0001.pdb decoys/$*.repack-nonPC.pdb rm -rf $*.tmp -gzip -9f decoys/$*.repack-nonPC.pdb # same as above, but starting with gzipped pdb file. decoys/%.repack-nonPC.pdb.gz: decoys/%.pdb.gz paths.txt \ ${TARGET}.repack.res guide.a2m.gz ${DISULF_FILE} -mkdir $*.tmp -mkdir $*.tmp/decoys cp -f $^ $*.tmp gunzip -c $< > $*.tmp/XXXX.pdb gunzip -c guide.a2m.gz > $*.tmp/XXXXA.fasta cd $*.tmp \ ; ${ROSETTA} aa XXXX A \ -s ./XXXX.pdb \ -scorefile $* \ -read_all_chains \ -design -fixbb -resfile ${TARGET}.repack.res \ -fa_output -fa_input \ -ex1 -ex2 -ex34 \ -use_input_sc \ ${DISULF_ARGS} \ -nstruct 1 mv $*.tmp/decoys/XXXX_0001.pdb decoys/$*.repack-nonPC.pdb rm -rf $*.tmp -gzip -9f decoys/$*.repack-nonPC.pdb # repack sidechains using Rosetta design mode, not changing # CYS and PRO residues dimer%.repack-nonPC.pdb.gz: dimer%.pdb paths.txt \ ${TARGET}.dimer.repack.res guide.a2m.gz cp -f $< XXXX.pdb gunzip -c guide.a2m.gz > XXXXA.fasta grep -v '>' ${TARGET}.a2m >> XXXXA.fasta ${ROSETTA} aa XXXX A \ -s XXXX.pdb \ -scorefile $* \ -read_all_chains \ -design -fixbb -resfile ${TARGET}.dimer.repack.res \ -fa_output -fa_input \ -ex1 -ex2 -ex34 \ -use_input_sc \ ${DISULF_ARGS} \ -nstruct 1 \ > dimer$*.repack.log 2>&1 -gzip -9f dimer$*.repack.log -rm XXXX.pdb XXXXA.fasta mv decoys/XXXX_0001.pdb dimer$*.repack-nonPC.pdb gzip -9f dimer$*.repack-nonPC.pdb # The following full-atom relax does not seem to be working yet. decoys/%.relax.pdb: decoys/%.pdb paths.txt guide.a2m.gz cp -f $< XXXX.pdb gunzip -c guide.a2m.gz > XXXXA.fasta ${ROSETTA} aa XXXX A \ -s $< -read_all_chains \ -relax -minimize -farlx \ -new_refold \ -scorefile $* \ -fa_output -fa_input \ -nstruct 1 -rm XXXX.pdb XXXXA.fasta mv decoys/aa$*_0001.pdb decoys/$*.relax.pdb decoys/grep-best-rosetta: decoys ${STARTER_SCRIPTS}/sort-by-rosetta ################## # MAKING A DIMER # ################## make_dimer: dimer dimer/${TARGET}.a2m dimer/Makefile \ dimer/costfcn-init.under \ dimer/decoys \ dimer/MQA_init.costfcn dimer/best-evalue \ $(foreach STRUCT_ALPH,${SECONDARY_ALPHABETS}, \ $(foreach AL,${MA_METHODS}, \ dimer/${TARGET}.${AL}.${STRUCT_ALPH}.rdb )) -fixmode dimer dimer: -mkdir dimer dimer/decoys: -mkdir dimer/decoys dimer/${TARGET}.a2m: ${TARGET}.a2m cp -p $^ $@ grep -v '^>' <$^ >> $@ dimer/Makefile: Makefile sed 's/TARGET:=/MONOMER_LENGTH:=${LENGTH}\nTARGET:=/' < $^ > $@ dimer/MQA_init.costfcn: MQA_init.costfcn cp -p $^ $@ dimer/best-evalue: best-evalue cp -p $^ $@ ifneq ($(wildcard ${TARGET}.a2m),) FIRST_RESIDUE ?= $(shell ${STARTER_SCRIPTS}/first_residue < ${TARGET}.a2m) DIMER_BREAK_BEFORE?= ${FIRST_RESIDUE}$(shell perl -e 'print ${START_COL} + ${LENGTH}') $(warning DIMER_BREAK_BEFORE='${DIMER_BREAK_BEFORE}') endif dimer/costfcn-init.under: costfcn-init.under sed 's|[iI]nclude |include ../|' < $^ > $@ echo KnownBreak ${DIMER_BREAK_BEFORE} >> $@ dimer/${TARGET}.%.rdb: ${TARGET}.%.rdb cp $^ $@ compute Pos += ${LENGTH} < $^ \ | headchg -del \ >> $@ ################## # MAKING A TRIMER # ################## make_trimer: trimer trimer/${TARGET}.a2m trimer/Makefile \ trimer/costfcn-init.under \ trimer/decoys \ trimer/MQA_init.costfcn trimer/best-evalue \ $(foreach STRUCT_ALPH,${SECONDARY_ALPHABETS}, \ $(foreach AL,${MA_METHODS}, \ trimer/${TARGET}.${AL}.${STRUCT_ALPH}.rdb )) -fixmode trimer trimer: -mkdir trimer trimer/decoys: -mkdir trimer/decoys trimer/${TARGET}.a2m: ${TARGET}.a2m cp -p $^ $@ grep -v '^>' <$^ >> $@ grep -v '^>' <$^ >> $@ trimer/Makefile: Makefile sed 's/TARGET:=/MONOMER_LENGTH:=${LENGTH}\nTARGET:=/' < $^ > $@ trimer/MQA_init.costfcn: MQA_init.costfcn cp -p $^ $@ trimer/best-evalue: best-evalue cp -p $^ $@ TRIMER_BREAK_BEFORE?= ${FIRST_RESIDUE}$(shell perl -e 'print ${START_COL} + ${LENGTH} + ${LENGTH}') trimer/costfcn-init.under: costfcn-init.under sed 's|[iI]nclude |include ../|' < $^ > $@ echo KnownBreak ${DIMER_BREAK_BEFORE} >> $@ echo KnownBreak ${TRIMER_BREAK_BEFORE} >> $@ trimer/${TARGET}.%.rdb: ${TARGET}.%.rdb cp $^ $@ compute Pos += ${LENGTH} < $^ \ | headchg -del \ >> $@ compute Pos += ${LENGTH} + ${LENGTH} < $^ \ | headchg -del \ >> $@ ########################## # MAKING CASP SUMBISSION # ########################## CASP_ID ?= SAM_${SAM_YEAR} ###################################### # DSSP_EHL2 merged prediction stuff: # ###################################### # Mon Dec 31 09:04:32 PST 2007 Kevin Karplus # No longer predicting dssp-ebghstl and stride-ebghtl # %.${AL_METHOD}.dssp-ebghstl.rdb \ # %.${AL_METHOD}.stride-ebghtl.rdb \ %.${AL_METHOD}.dssp-ehl2.rdb: \ %.${AL_METHOD}.str2.rdb \ %.${AL_METHOD}.str4.rdb \ %.${AL_METHOD}.pb.rdb \ %.${AL_METHOD}.bys.rdb \ %.${AL_METHOD}.alpha.rdb ${STARTER_SCRIPTS}/RDBCombine $^ -a ${CASP_ID} > $@ ${TARGET}.dssp-ehl2.rdb: $(foreach AL_METHOD, ${MA_METHODS}, \ ${TARGET}.${AL_METHOD}.str2.rdb \ ${TARGET}.${AL_METHOD}.str4.rdb \ ${TARGET}.${AL_METHOD}.pb.rdb \ ${TARGET}.${AL_METHOD}.bys.rdb \ ${TARGET}.${AL_METHOD}.alpha.rdb) ${STARTER_SCRIPTS}/RDBCombine $^ -a ${CASP_ID} > $@ %.dssp-ehl2.seq: %.dssp-ehl2.rdb ${STARTER_SCRIPTS}/seq-from-rdb < $^ > $@ # CASP formatted prediction---may also be useful for EVA or LiveBench ${TARGET}.dssp-ehl2: ${TARGET}.dssp-ehl2.rdb ${STARTER_SCRIPTS}/rdb2casp $^ ${CASP_ID} > $@ METHOD_FILE ?= ${TARGET}.method ifdef MANUAL_TOP_HITS USE_CASP_PARENT := -parent '${MANUAL_TOP_HITS}' else USE_CASP_PARENT := -parent "N/A" endif %.method: ${STARTER}/generic_method_file cp $^ $@ model%.ts: best-models.pdb.gz ${METHOD_FILE} gunzip -c $< \ | ${STARTER_SCRIPTS}/pdb2casp \ -target ${TARGET} -author ${CASP_ID} \ -method ${METHOD_FILE} \ ${USE_CASP_PARENT} casp_models: $(foreach x,1 2 3 4 5, model${x}.ts) # CASP e-mail submission EMAIL_3D_SUBJECT ?= SAM-${SAM_YEAR} hand ${TARGET} model%.email: model%.ts # ifdef EMAIL_3D ifneq (${CLUSTER_HEAD},) ssh ${CLUSTER_HEAD} "cd ${WORKDIR}; mail -s '${EMAIL_3D_SUBJECT} $^' ${EMAIL_ADDRESS} < $^" else mail -s '${EMAIL_3D_SUBJECT} $^' ${EMAIL_ADDRESS} < $^ endif -mv -f $^ $^-submitted echo mailing model $* to ${EMAIL_ADDRESS} done # else # echo no request to mail $^ # endif email: $(foreach x,1 2 3 4 5, model${x}.email) # ##################### # # Mailing multimers # # ##################### define multimer_to_ts $(1) | \ ${STARTER_SCRIPTS}/pdb2casp \ -target ${TARGET} -author ${CASP_ID} \ -method ${METHOD_FILE} \ -noerase_chain_ids \ ${USE_CASP_PARENT} sed -e 's/TARGET ${TARGET}/TARGET ${TARGET} OLIGOMER/' \ -e 's/^MODEL 1/MODEL $(2)/' \ < model1.ts > dimer$(2).ts rm model1.ts endef # takes an ordinary gzipped model that resulted from a try...run # (must have MODEL records) define modelfullname_to_ts ${MAKE} $(subst pdb,unpack.pdb,$(1)) $(call multimer_to_ts, gunzip -c $(subst pdb,unpack.pdb,$(1)),$(2)) endef # takes an ordinary gzipped model that resulted from a try...run # (must have MODEL records) define model_to_ts $(call modelfullname_to_ts,decoys/${TARGET}.$(1).pdb.gz,$(2)) endef # takes an unpacked (separate chains) multimer that # lacks a model record and converts it to ts format define modelless_to_ts echo 'MODEL 1' > $(1)-tmp gunzip -c $(1).gz >> $(1)-tmp $(call multimer_to_ts,cat $(1)-tmp,$(2)) rm $(1)-tmp endef # In the Makefile, you need to have targets for each of the dimer.ts models: # dimer1.ts: # $(call model_to_ts,try5-opt3,1) # submake, so that you don't need all five targets dimer_models: ${MAKE} -k $(foreach x,1 2 3 4 5, dimer${x}.ts) # Commented out CASP e-mail submission # # dimer%.email: dimer%.ts # mail -s '${EMAIL_3D_SUBJECT} $^' ${EMAIL_ADDRESS} < $^ # -mv -f $^ $^-submitted # echo mailing dimer $* to ${EMAIL_ADDRESS} done # # email_dimers: $(foreach x,1 2 3 4 5, dimer${x}.email) # ############################### # Refinement targets for CASP # ################################ # This section is mostly obsolete, intended to get # TR models into the T0xxx directory. # Now we generally use a separate TR directory. # # REFINE := ${TARGET:T0%=tr%} # # fetch_refinement: decoys/${REFINE}.pdb.gz # # decoys/${REFINE}.pdb.gz: # -wget -N \ # ${CASP_WEBSITE}/${REFINE}.pdb.txt # mv ${REFINE}.pdb.txt ${@:.gz=} # gzip -9f ${@:.gz=} # # refine_check: decoys/${REFINE}.pdb.gz \ # decoys/${REFINE}.repack-nonPC.pdb.gz \ # decoys/${REFINE}.gromacs0.pdb.gz \ # decoys/${REFINE}.gromacs0.repack-nonPC.pdb.gz \ # decoys/score-all.unconstrained.pretty # # # edit superimpose-refine.under to pick which models to superimpose # refine-models.pdb.gz: superimpose-refine.under # nice -1 ${UNDERTAKER} < $^ # -gzip -f refine-models.pdb # # REFINE_METHOD_FILE ?= ${TARGET}.refine_method # # casp_refines: refine-models.pdb.gz # gunzip -c $< \ # | ${STARTER_SCRIPTS}/pdb2casp \ # -prefix refine. \ # -target ${TARGET:T0%=TR%} -author ${CASP_ID} \ # -method ${REFINE_METHOD_FILE} \ # -parent "N/A" # # # EMAIL_REFINE_SUBJECT ?= SAM-${SAM_YEAR} hand ${TARGET} # # # refine%.email: refine.model%.ts # mail -s '${EMAIL_REFINE_SUBJECT} $^' ${EMAIL_ADDRESS} < $^ # -mv -f $^ $^-submitted # echo mailing refine $* to ${EMAIL_ADDRESS} done # # email_refines: $(foreach x,1 2 3 4 5, refine${x}.email) # ######################################################## # FOR REFINEMENT TARGETS: FINDING WHAT SERVER CAME CLOSE ######################################################## ${TARGET}-compare.rdb: find-TR.under everything.costfcn ../${T0_from_TR}/decoys/read-pdb.under ${UNDERTAKER} < $< sorttbl GDT real_cost < $@ > tmp-$@ mv tmp-$@ $@ # Finding out how different the proposed refines are from the starting model: how-different.rdb: how-different.under decoys/read-pdb.under try1.costfcn ${UNDERTAKER} < $< sorttbl GDT real_cost < $@ > tmp-$@ mv tmp-$@ $@ ############################################# # MQA RDB File Stuff # Sun May 4 22:11:17 PDT 2008 John Archie ############################################# QA_CWD := ${WORKDIR} QA_PREDICTDIR ?= $(QA_CWD)/decoys/SAM_T08/ QA_MAINDIR ?= $(QA_CWD) MQA_VARIANTS ?= SAM-T08-MQAO SAM-T08-MQAU SAM-T08-MQAC save_SAM_servers: $(foreach SAM,T02 T06 T08, save_SAM_server_${SAM}) decoys/SAM_%: ${STARTER_SCRIPTS}/save_server_dir -server $* -target ${TARGET} save_SAM_server_%: ALWAYS ${STARTER_SCRIPTS}/save_server_dir -server $* -target ${TARGET} decoys/read-servers.under: decoys/servers -mkdir -p decoys/servers -rm $@ cd decoys; shopt -s nullglob ; for x in servers/*; do \ w=$${x%.gz} ; \ y=$${w%.pdb} ; \ z=$${y#servers/} ; \ echo ReadConformPDB $$x name $$z >> ${subst decoys/,,$@} ; \ done -chgrp protein $@ -chmod g+w $@ decoys/read-servers-exemplar.under: decoys/read-servers.under grep _TS1\\\|_AL1 $< > $@ decoys/read-servers-scwrl.under: decoys/servers -mkdir -p decoys/servers-scwrl -rm $@ cd decoys; shopt -s nullglob ; for x in servers-scwrl/*; do \ w=$${x%.gz} ; \ y=$${w%.pdb} ; \ z=$${y#servers-scwrl/} ; \ echo ReadConformPDB $$x name $$z >> ${subst decoys/,,$@} ; \ done -chgrp protein $@ -chmod g+w $@ decoys/read-predictions.under: decoys/predictions -mkdir -p decoys/predictions -rm $@ cd decoys; shopt -s nullglob ; for x in predictions/*; do \ w=$${x%.gz} ; \ y=$${w%.pdb} ; \ z=$${y#predictions/} ; \ echo ReadConformPDB $$x name $$z >> ${subst decoys/,,$@} ; \ done -chgrp protein $@ -chmod g+w $@ decoys/scwrl-predictions.under: decoys/predictions -mkdir -p decoys -rm $@ cd decoys; shopt -s nullglob ; for x in predictions/* ; do \ echo ReadConformPDB $$x >> ${subst decoys/,,$@} ; \ w=$${x%.gz} ; \ y=$${w%.pdb} ; \ z=$${y#predictions/} ; \ echo NameConform $$z >> ${subst decoys/,,$@} ; \ echo SCWRLConform >> ${subst decoys/,,$@} ; \ echo PrintConformPDB predictions-scwrl/$$z-scwrl \ >> ${subst decoys/,,$@} ; \ done -chgrp protein $@ -chmod g+w $@ decoys/scwrl-servers.under: decoys/servers -mkdir -p decoys -rm $@ cd decoys; shopt -s nullglob ; for x in servers/* ; do \ echo ReadConformPDB $$x >> ${subst decoys/,,$@} ; \ w=$${x%.gz} ; \ y=$${w%.pdb} ; \ z=$${y#servers/} ; \ echo NameConform $$z >> ${subst decoys/,,$@} ; \ echo SCWRLConform >> ${subst decoys/,,$@} ; \ echo PrintConformPDB servers-scwrl/$$z-scwrl \ >> ${subst decoys/,,$@} ; \ done -chgrp protein $@ -chmod g+w $@ %/servers-scwrl: %/servers %/scwrl-servers.under -mkdir -p $@ -chgrp protein $@ -chmod g+w $@ umask 002; \ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e s/_domain// \ -e s/read-pdb/scwrl-servers/ \ -e s/REAL_PDB/${REAL_PDB}/ \ < ${STARTER_SCRIPTS}/read-decoys.under \ | nice -2 ${UNDERTAKER} gzip -9f $@/* %/predictions-scwrl: %/predictions %/scwrl-predictions.under -mkdir -p $@ -chgrp protein $@ -chmod g+w $@ umask 002; \ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e s/_domain// \ -e s/read-pdb/scwrl-predictions/ \ -e s/REAL_PDB/${REAL_PDB}/ \ < ${STARTER_SCRIPTS}/read-decoys.under \ | nice -2 ${UNDERTAKER} gzip -9f $@/* decoys/read-servers+scwrl.under: decoys/servers decoys/servers-scwrl -mkdir -p decoys -rm $@ cd decoys; shopt -s nullglob ; for x in servers/*; do \ echo ReadConformPDB $$x >> ${subst decoys/,,$@} ; \ w=$${x%.gz} ; \ y=$${w%.pdb} ; \ z=$${y#servers/} ; \ echo NameConform $$z >> ${subst decoys/,,$@} ; \ done cd decoys; shopt -s nullglob ; for x in servers-scwrl/*; do \ echo ReadConformPDB $$x >> ${subst decoys/,,$@} ; \ w=$${x%.gz} ; \ y=$${w%.pdb} ; \ z=$${y#servers-scwrl/} ; \ echo NameConform $$z >> ${subst decoys/,,$@} ; \ done -chgrp protein $@ -chmod g+w $@ decoys/read-predictions+scwrl.under: decoys/predictions decoys/predictions-scwrl -mkdir -p decoys -rm $@ cd decoys; shopt -s nullglob ; for x in predictions/*; do \ echo ReadConformPDB $$x >> ${subst decoys/,,$@} ; \ w=$${x%.gz} ; \ y=$${w%.pdb} ; \ z=$${y#predictions/} ; \ echo NameConform $$z >> ${subst decoys/,,$@} ; \ done cd decoys; shopt -s nullglob ; for x in predictions-scwrl/*; do \ echo ReadConformPDB $$x >> ${subst decoys/,,$@} ; \ w=$${x%.gz} ; \ y=$${w%.pdb} ; \ z=$${y#predictions-scwrl/} ; \ echo NameConform $$z >> ${subst decoys/,,$@} ; \ done -chgrp protein $@ -chmod g+w $@ #decoys/servers.evaluate.%.rdb: decoys/read-servers.under decoys/SAM_T08 # -rm -f $@ # cd ${QA_PREDICTDIR}; \ # sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ # -e s/REAL_PDB/${REAL_PDB}/ \ # -e s#MAINDIR#${QA_MAINDIR}# \ # < ${STARTER_SCRIPTS}/evaluate-mqa.under \ # | nice -2 ${UNDERTAKER} # -rm -f sort.tmp # ${SORTTBL} name < $@ > sort.tmp # mv -f sort.tmp $@ # This will now be done by the per costfcn targets # #decoys/servers.evaluate.everything.rdb: decoys/read-servers.under decoys/SAM_T08 # -rm -f $@ # cd ${QA_PREDICTDIR}; \ # sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ # -e s/REAL_PDB/${REAL_PDB}/ \ # -e s#MAINDIR#${QA_MAINDIR}# \ # < ${STARTER_SCRIPTS}/evaluate-mqa.under \ # | nice -2 ${UNDERTAKER} # -rm -f sort.tmp # ${SORTTBL} name < $@ > sort.tmp # mv -f sort.tmp $@ #Stuff for a quick before CASP evaluation--now should be done better by #more general suffix-rule targets # #domain_qa_evals: $(foreach a,$(DOMAINS),$(a)/decoys/servers.evaluate.everything.rdb $(a)/decoys/mqa_evaluate.rdb) # #define DOMAIN_QA_EVAL_TEMPLATE #${a}/decoys/servers.evaluate.everything.rdb: decoys/read-servers.under decoys/SAM_T08 # mkdir -p ${a}/decoys/ # -rm -f $@ # cd ${QA_PREDICTDIR}; \ # sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ # -e 's/AddID REAL_PDB//' \ # -e 's|ReadConformPDBids|InfilePrefix ${QA_MAINDIR}/\nReadConformPDB ${TARGET}_${a}.pdb\nNameConform ${TARGET}_${a}|' \ # -e 's|OutFilePrefix MAINDIR/decoys|OutFilePrefix ${QA_MAINDIR}/${a}/decoys|' \ # -e s#MAINDIR#${QA_MAINDIR}# \ # < ${STARTER_SCRIPTS}/evaluate-mqa.under \ # | nice -2 ${UNDERTAKER} # -rm -f sort.tmp # ${SORTTBL} name < $$@ > sort.tmp # mv -f sort.tmp $$@ # #${a}/decoys/mqa_evaluate.rdb: ${a}/decoys/servers.evaluate.everything.rdb decoys/servers.similarity.rdb # ${JOINTBL} name $$< < decoys/servers.similarity.rdb > sort1.tmp # (${CASP9_SCRIPTS}/qa2rdb.pl MQAO | ${SORTTBL} name | \ # ${JOINTBL} name sort1.tmp) < SAM-T08-MQAO.qa1 > sort2.tmp # (${CASP9_SCRIPTS}/qa2rdb.pl MQAU | ${SORTTBL} name | \ # ${JOINTBL} name sort2.tmp) < SAM-T08-MQAU.qa1 > sort3.tmp # (${CASP9_SCRIPTS}/qa2rdb.pl MQAC | ${SORTTBL} name | \ # ${JOINTBL} name sort3.tmp) < SAM-T08-MQAC.qa1 > $$@ # rm -f sort1.tmp sort2.tmp sort3.tmp #endef # #$(foreach a,$(DOMAINS),$(eval $(call DOMAIN_QA_EVAL_TEMPLATE,$(a)))) decoys/servers.similarity.rdb: -rm -f $@ cp -r decoys/servers /tmp/$(TARGET)-$(PID)-servers -gunzip /tmp/$(TARGET)-$(PID)-servers/*.gz ulimit -c 0; nice -5 $(CASP9_SCRIPTS)/medianSimilarity.pl -cf /tmp/$(TARGET)-$(PID)-servers /tmp/$(TARGET)-$(PID)-servers | sorttbl name > $@ -rm -rf /tmp/$(TARGET)-$(PID)-servers decoys/servers.simAll.rdb: -rm -f $@ cp -r decoys/servers /tmp/$(TARGET)-$(PID)-servers -gunzip /tmp/$(TARGET)-$(PID)-servers/*.gz ulimit -c 0; nice -5 $(CASP9_SCRIPTS)/medianSimilarity.pl -c /tmp/$(TARGET)-$(PID)-servers /tmp/$(TARGET)-$(PID)-servers | sorttbl name > $@ -rm -rf /tmp/$(TARGET)-$(PID)-servers decoys/similarity.servers.evaluate.%.rdb: decoys/servers.evaluate.%.rdb decoys/servers.similarity.rdb ${PCB}/sorttbl name < $< > sort.$*.eval.$(PID).tmp ${PCB}/jointbl name decoys/servers.similarity.rdb < sort.$*.eval.$(PID).tmp > $@ rm -f sort.$*.eval.$(PID).tmp decoys/servers-scwrl.similarity.rdb: decoys/servers.similarity.rdb ${STARTER_SCRIPTS}/add_scwrl_ext.pl < $< > $@ decoys/similarity.servers-scwrl.evaluate.%.rdb: decoys/servers+scwrl.evaluate.%.rdb decoys/servers-scwrl.similarity.rdb ${PCB}/sorttbl name < $< > sort.$*.eval.$(PID).tmp ${PCB}/jointbl name decoys/servers-scwrl.similarity.rdb < sort.$*.eval.$(PID).tmp > $@ rm -f sort.$*.eval.$(PID).tmp qa_all: $(foreach MQA,${MQA_VARIANTS},${MQA}.qa1) SAM-T08-MQAO.qa1: ${QA_PREDICTDIR}/best-evalue decoys/similarity.servers.evaluate.everything.rdb cat decoys/similarity.servers.evaluate.everything.rdb \ | ${CASP9_SCRIPTS}/reweight_rdb.pl ${QA_DATA}/align.weights \ | ${SORTTBL} cost \ | ${CASP9_SCRIPTS}/rdb2qa1.pl -- \ ${MQAO_ID} ${TARGET} 0.03836759 0.90146034 \ > $@; \ SAM-T08-MQAO-scwrl.qa1: ${QA_PREDICTDIR}/best-evalue decoys/similarity.servers-scwrl.evaluate.everything.rdb cat decoys/similarity.servers-scwrl.evaluate.everything.rdb \ | ${CASP9_SCRIPTS}/reweight_rdb.pl ${QA_DATA}/align.weights \ | ${SORTTBL} cost \ | ${CASP9_SCRIPTS}/rdb2qa1.pl -- \ ${MQAO_ID} ${TARGET} 0.03836759 0.90146034 \ > $@; \ SAM-T08-MQAU.qa1: ${QA_PREDICTDIR}/best-evalue decoys/similarity.servers.evaluate.everything.rdb if perl -e "exit not "`cat $<`" < 0.31687" ; then \ cat decoys/similarity.servers.evaluate.everything.rdb \ | ${CASP9_SCRIPTS}/reweight_rdb.pl \ ${QA_DATA}/align+under-lev.weights \ | ${SORTTBL} cost \ | ${CASP9_SCRIPTS}/rdb2qa1.pl -- \ ${MQAU_ID} ${TARGET} 0.01746253 -0.04650613 \ > $@; \ else \ cat decoys/similarity.servers.evaluate.everything.rdb \ | ${CASP9_SCRIPTS}/reweight_rdb.pl \ ${QA_DATA}/align+under-hev.weights \ | ${SORTTBL} cost \ | ${CASP9_SCRIPTS}/rdb2qa1.pl \ -- ${MQAU_ID} ${TARGET} 0.03445373 2.09727401 \ > $@; \ fi SAM-T08-MQAC.qa1: ${QA_PREDICTDIR}/best-evalue decoys/similarity.servers.evaluate.everything.rdb if perl -e "exit not "`cat $<`" < 6.9768e-15" ; then \ cat decoys/similarity.servers.evaluate.everything.rdb \ | ${CASP9_SCRIPTS}/reweight_rdb.pl \ ${QA_DATA}/align+under+sim-lev.weights \ | ${SORTTBL} cost \ | ${CASP9_SCRIPTS}/rdb2qa1.pl -- \ ${MQAC_ID} ${TARGET} 0.04554128 2.34628144 \ > $@; \ else \ cat decoys/similarity.servers.evaluate.everything.rdb \ | ${CASP9_SCRIPTS}/reweight_rdb.pl \ ${QA_DATA}/align+under+sim-hev.weights \ | ${SORTTBL} cost \ | ${CASP9_SCRIPTS}/rdb2qa1.pl -- \ ${MQAC_ID} ${TARGET} 0.04055935 1.79455986 \ > $@; \ fi mail_qa_all: $(foreach MQA,${MQA_VARIANTS},mail_qa1_${MQA}) mail_qa1_%: %.qa1 mail -s '$* hand QA ${TARGET}' ${EMAIL_ADDRESS} < $^ echo -n `date` " " >> README \ || echo -n `date` " " >> README.QA echo $* hand QA ${TARGET} Submitted >> README \ || echo $* hand QA ${TARGET} Submitted >> README.QA under_qa_all: $(foreach MQA,${MQA_VARIANTS},${MQA}.read_under) metaserve-MQAU1.under metaserve-MQAC1.under %.read_under: %.qa1 ${STARTER_SCRIPTS}/under_from_qa1 -num 10 < $^ > $@ run_metaservers: meta_MQAU1 meta_MQAC1 meta_%: ALWAYS ${MAKE} -k decoys/${TARGET}.$*-opt3.pdb.gz \ decoys/${TARGET}.$*-opt3.repack-nonPC.pdb.gz \ decoys/${TARGET}.$*-opt3.gromacs0.pdb.gz \ decoys/${TARGET}.$*-opt3.gromacs0.repack-nonPC.pdb.gz \ decoys/grep-best-rosetta # BUG: The target for MQAC1 should depend on SAM-T08-MQA.read_under # but stripping off the number is too messy. decoys/${TARGET}.%-opt3.pdb.gz: metaserve-%.under -${UNDERTAKER} < metaserve-$*.under > metaserve-$*.log 2>&1 -gzip -9f $(@:.gz=) -gzip -9f metaserve-$*.log ############################################# # GROMACS optimization # Sat Jan 22 19:31:50 PST 2005 Kevin Karplus ############################################# GROMACS_KEEP ?= -nokeeptmp %.gromacs0.pdb.gz: %.pdb.gz gunzip -c $^ \ | ${STARTER_SCRIPTS}/run-gromacs -force_field 0 \ ${GROMACS_KEEP} -tmp /var/tmp \ | gzip -9 \ > $@ %.gromacs4.pdb.gz: %.pdb.gz gunzip -c $^ \ | ${STARTER_SCRIPTS}/run-gromacs -force_field 4 \ -nokeeptmp -tmp /var/tmp \ | gzip -9 \ > $@ # Shorthand for a common request: do undertaker then gromacs ${TARGET}.gro%: ${MAKE} -k decoys/${TARGET}.try$*-opt3.pdb.gz \ decoys/${TARGET}.try$*-opt3.gromacs0.pdb.gz \ decoys/${TARGET}.try$*-opt3.gromacs4.pdb.gz \ decoys/score-all.try$*.rdb decoys/score-all.try$*.pretty ifndef MONOMER_LENGTH %.unpack.pdb.gz: echo "can't make $@ without specification of MONOMER_LENGTH" ${TARGET}.multgro%: echo "can't make $@ without specification of MONOMER_LENGTH" endif ifdef MONOMER_LENGTH # Rule for unpacking a single chain into a homo-multimer: %.unpack.pdb.gz: %.pdb.gz gunzip -c $^ \ | ${STARTER_SCRIPTS}/unpack-multimer -length ${MONOMER_LENGTH} \ -start ${START_COL} \ | gzip > $@ # The multgro target is needed for multimers, # to make sure that gromacs sees separate chains on its inputs, # and to resep ${TARGET}.multgro%: ${MAKE} -k decoys/${TARGET}.try$*-opt3.pdb.gz \ decoys/${TARGET}.try$*-opt3.unpack.pdb.gz \ decoys/${TARGET}.try$*-opt3.unpack.gromacs0.unpack.pdb.gz \ decoys/${TARGET}.try$*-opt3.unpack.gromacs4.unpack.pdb.gz \ decoys/score-all.try$*.rdb decoys/score-all.try$*.pretty endif ############################################################ # # target for selecting among very close templates # ############################################################ # for selecting among close templates, we need to look at the simple # Smith-Waterman scores, not the HMM-based scores, as the HMM may # have drifted away a bit from the original target. # This method just uses blastp on the dunbrack-pdbaa set ${TARGET}.pdb_blast.txt : ${TARGET}.a2m ${PCB_SUB}/blastall -p blastp -d ${PDB_DB} -i $^ \ -e 100 -I -m 9 -o $@ # Quick search for close homologs in NR: ${TARGET}.nrp_blast.txt : ${TARGET}.a2m ${PCB_SUB}/blastall -p blastp -d ${NR} -i $^ \ -e 100 -I -m 9 -o $@ ########################################################## # targets for finding out lowest E-value, to choose which # model-quality assessment function to use. ########################################################## # only have dependency for best-evalue if the best-scores file exists. # If the rdb file doesn't exist (say in a dimer directory), then # don't try to remake the best-evalue file. ifneq ($(wildcard ${TARGET}.best-scores.rdb*),) best-evalue: ${TARGET}.best-scores.rdb summ -m < $^ \ | grep Evalue \ | awk '{print $$7}' \ | sed s/,// \ > $@ endif EVALUE_THRESHOLD_FOR_MQA ?= 0.3 EVALUE_THRESHOLD_FOR_INIT ?= 0.004 MQA.costfcn: best-evalue ${STARTER_SCRIPTS}/MQA_highE.costfcn ${STARTER_SCRIPTS}/MQA_lowE.costfcn echo -n 'my $$hilo = (' > MQA.pl cat best-evalue >> MQA.pl echo '>= ${EVALUE_THRESHOLD_FOR_MQA} )? '>> MQA.pl echo '"${STARTER_SCRIPTS}/MQA_highE.costfcn": ' >> MQA.pl echo '"${STARTER_SCRIPTS}/MQA_lowE.costfcn";' >> MQA.pl echo 'system("cp $$hilo $@");' >> MQA.pl perl < MQA.pl MQA_init.costfcn: best-evalue ${STARTER_SCRIPTS}/MQA_initRC_highE.costfcn ${STARTER_SCRIPTS}/MQA_initRC_lowE.costfcn echo -n 'my $$hilo = (' > MQA.pl cat best-evalue >> MQA.pl echo '>= ${EVALUE_THRESHOLD_FOR_INIT} )? '>> MQA.pl echo '"${STARTER_SCRIPTS}/MQA_initRC_highE.costfcn": ' >> MQA.pl echo '"${STARTER_SCRIPTS}/MQA_initRC_lowE.costfcn";' >> MQA.pl echo 'system("cp $$hilo $@");' >> MQA.pl perl < MQA.pl #################################################### # # targets for generating constraints from alignments # #################################################### all-templates.under: ${TARGET}.best-scores.rdb \ ${TARGET}.top_reported_alignments.rdb ${STARTER_SCRIPTS}/make_all_templates_under \ --target ${TARGET} \ < ${TARGET}.best-scores.rdb \ > $@ align.constraints align_bonus.constraints \ rejected.constraints rejected_bonus.constraints \ noncontact.constraints noncontact_bonus.constraints \ : all-templates.under sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e s/log_// \ < ${STARTER_SCRIPTS}/optimize.under \ | ${UNDERTAKER} >& optimize-constraints.log log_align.constraints log_align_bonus.constraints \ log_rejected.constraints log_rejected_bonus.constraints \ log_noncontact.constraints log_noncontact_bonus.constraints \ : all-templates.under sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ < ${STARTER_SCRIPTS}/optimize.under \ | ${UNDERTAKER} >& log_optimize-constraints.log ################################ # EVALUATING THE FINAL RESULTS # ################################ # Mon Oct 27 05:57:22 PDT 2008 Kevin Karplus # There are still bugs in this for server-only predictions, # because ${QA_PREDICTDIR}/costfcn-init.under has # InfilePrefix commands that end up looking for things in ${WORKDIR} # # I'll have to find a different workaround, either modifying # ${QA_PREDICTDIR}/costfcn-init.under, which I really DON'T want # to do, or creating a dummy ${WORKDIR}/costfcn-init.under if there # isn't one already that replaces the blank InfilePrefix calls with # ones to ${QA_PREDICTDIR}. TARGET_SETS ?= servers servers+scwrl predictions predictions+scwrl servers-scwrl MODEL_PATH ?= decoys/servers:decoys/servers-scwrl:decoys/predictions:decoys/predictions-scwrl:. ifeq ($(wildcard ${TARGET}.best-scores.rdb*),) EVAL_DIR?=${QA_PREDICTDIR} else EVAL_DIR?=${WORKDIR} endif ${TARGET}.pdb: zcat `pdb-get ${REAL_PDB}` | ${STARTER_SCRIPTS}/nmr_ax.pl -vnc $${real_pdb:4} > $@ costfcn-init-eval.under: ${STARTER_SCRIPTS}/costfcn-init-eval.under cat < $^ \ | sed s/XXX0000/${TARGET}/g \ | sed 's|EVALDIR|${EVAL_DIR}|g' \ | sed s/T0XXX/${T0_from_TR}/g \ | sed s/START_COL/${START_COL}/g \ > $@ decoys/evaluate_%.rdb: decoys/read-pdb+servers.under ${TARGET}_%.real.pdb.gz costfcn-init-eval.under -rm -f $@ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e s/REAL_PDB/${REAL_PDB}/ \ -e s/_domain/_$*/ \ -e s/read-pdb/read-pdb+servers/ \ -e 's/ReadConformPDBids/# ReadConformPDBids/' \ -e 's/# ReadConformPDB /ReadConformPDB /' \ -e 's|EVALDIR|${EVAL_DIR}|' \ -e 's/include COSTFCN.costfcn//' \ -e s/.COSTFCN// \ < ${STARTER_SCRIPTS}/evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp ${SORTTBL} real_cost < $@ > sort.tmp mv -f sort.tmp $@ decoys/evaluate.rdb: decoys/read-pdb+servers.under costfcn-init-eval.under -rm -f $@ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e s/REAL_PDB/${REAL_PDB}/ \ -e s/_domain// \ -e s/read-pdb/read-pdb+servers/ \ -e 's|EVALDIR|${EVAL_DIR}|' \ -e 's/include COSTFCN.costfcn//' \ -e s/.COSTFCN// \ < ${STARTER_SCRIPTS}/evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp ${SORTTBL} real_cost < $@ > sort.tmp mv -f sort.tmp $@ domain_evaluations: $(foreach a,$(DOMAINS),$(a)/decoys/evaluate.rdb) define OTHER_MQA_GROUP decoys/servers.${1}.rdb: ${STARTER_SCRIPTS}/qa2rdb.pl ${1} ${TARGET} $$@ ${SORTTBL} name < $$@ > sort-${UNIQ}.tmp mv sort-${UNIQ}.tmp $$@ endef $(foreach a,$(CASP_QA_GROUPS),$(eval $(call OTHER_MQA_GROUP,$(a)))) define DOMAIN_SCORE_TEMPLATE ${1}/decoys/evaluate.rdb: decoys/read-pdb+servers.under costfcn-init-eval.under -rm -f $@ mkdir -p ${1}/decoys/ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e 's/AddID REAL_PDB//' \ -e s/_domain// \ -e 's/ReadConformPDBids/ReadConformPDB ${TARGET}_${1}.pdb\nNameConform ${TARGET}_${1}/' \ -e 's|OutFilePrefix decoys|OutFilePrefix ${1}/decoys|' \ -e s/read-pdb/read-pdb+servers/ \ -e 's|EVALDIR|${EVAL_DIR}|' \ -e 's/include COSTFCN.costfcn//' \ -e s/.COSTFCN// \ < ${STARTER_SCRIPTS}/evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp ${SORTTBL} real_cost < $$@ > sort.tmp mv -f sort.tmp $$@ ${1}/decoys/similarity.servers.evaluate.%.rdb: ${1}/decoys/servers.evaluate.%.rdb decoys/servers.similarity.rdb ${SORTTBL} name < $$< > sort-${UNIQ}.tmp ${JOINTBL} name decoys/servers.similarity.rdb < sort-${UNIQ}.tmp > $$@ rm -f sort-${UNIQ}.tmp ${1}/decoys/servers.evaluate.%.rdb: decoys/servers.%.rdb ${1}/decoys/servers-real_cost+extra.rdb ${JOINTBL} name ${1}/decoys/servers-real_cost+extra.rdb < $$< > $$@ ${1}/decoys/servers-scwrlonly.evaluate.%.rdb: ${1}/decoys/servers+scwrl.evaluate.%.rdb row name =~ m/-scwrl\$$$$/ < $$< > $$@ endef $(foreach a,$(DOMAINS),$(eval $(call DOMAIN_SCORE_TEMPLATE,$(a)))) evaluate.%.under: ${STARTER_SCRIPTS}/evaluate.under Makefile costfcn-init-eval.under sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e 's|EVALDIR|${EVAL_DIR}|' \ -e s/_domain// \ -e 's|COSTFCN.costfcn|$*.costfcn|' \ -e s/.COSTFCN/.$*/ \ -e s/_domain// \ -e s/read-pdb/read-pdb+servers/ \ -e s/REAL_PDB/${REAL_PDB}/ \ < $< > $@ decoys/evaluate-exemplar.%.rdb: %.costfcn decoys/read-servers-exemplar.under costfcn-init-eval.under -rm -f $@ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e 's|EVALDIR|${EVAL_DIR}|' \ -e s/evaluate_domain/evaluate-exemplar/ \ -e s/_domain// \ -e 's|COSTFCN.costfcn|$*.costfcn|' \ -e s/.COSTFCN/.$*/ \ -e s/read-pdb/read-servers/ \ -e s/REAL_PDB/${REAL_PDB}/ \ < ${STARTER_SCRIPTS}/evaluate-exemplar.under \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp ${SORTTBL} real_cost < $@ > sort.tmp mv -f sort.tmp $@ decoys/evaluate.%.rdb: %.costfcn decoys/read-pdb+servers.under costfcn-init-eval.under -rm -f $@ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e 's|EVALDIR|${EVAL_DIR}|' \ -e s/evaluate_domain/evaluate/ \ -e s/_domain// \ -e 's|COSTFCN.costfcn|$*.costfcn|' \ -e s/.COSTFCN/.$*/ \ -e s/read-pdb/read-pdb+servers/ \ -e s/REAL_PDB/${REAL_PDB}/ \ < ${STARTER_SCRIPTS}/evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp ${SORTTBL} real_cost < $@ > sort.tmp mv -f sort.tmp $@ define MODEL_EVALUATE_TEMPLATE decoys/${1}.evaluate.%.rdb: %.costfcn decoys/${1}-real_cost+extra.rdb decoys/read-${1}.under costfcn-init-eval.under -rm -f $$@ real_pdb=${REAL_PDB}; \ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e 's|ReadConformPDBids|ReadConformPDB\t${TARGET}.pdb chain '$$$${real_pdb:4}'\nNameConform\t${TARGET}|' \ -e 's|EVALDIR|${EVAL_DIR}|' \ -e s/evaluate_domain/${1}.evaluate/ \ -e s/_domain// \ -e s/SetCost/SetRealCost\\nSetCost/ \ -e 's|COSTFCN.costfcn|$$*.costfcn|' \ -e s/.COSTFCN/.$$*/ \ -e s/read-pdb/read-${1}/ \ -e s/REAL_PDB/${REAL_PDB}/ \ -e 's/ReadConformPDB .*//' \ -e 's/[pP]rintConformPDB .*//' \ -e 's/PrintConformGDT .*//' \ -e 's/SCWRLConform.*//' \ -e 's/NameConform .*//' \ < ${STARTER_SCRIPTS}/evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f join.tmp sort.tmp ${SORTTBL} name < $$@ > sort.tmp (${CASP9_SCRIPTS}/unique_rdb_headers.pl name \ decoys/${1}-real_cost+extra.rdb < sort.tmp) | \ ${PCB}/jointbl name decoys/${1}-real_cost+extra.rdb > join.tmp ${SORTTBL} real_cost < join.tmp > $$@ -rm -f join.tmp sort.tmp decoys/${1}-real_cost.rdb: decoys/read-${1}.under costfcn-init-eval.under ${TARGET}.pdb ${TARGET}.pdb -rm -f $$@ real_pdb=${REAL_PDB}; \ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e 's|ReadConformPDBids|ReadConformPDB\t${TARGET}.pdb chain '$$$${real_pdb:4}'\nNameConform\t${TARGET}|' \ -e 's|EVALDIR|${EVAL_DIR}|' \ -e s/evaluate_domain.COSTFCN/${1}-real_cost/ \ -e s/_domain// \ -e 's|include COSTFCN.costfcn||' \ -e s/read-pdb/read-${1}/ \ -e s/REAL_PDB/${REAL_PDB}/ \ -e 's/ReadConformPDB .*//' \ -e 's/[pP]rintConformPDB .*//' \ -e 's/PrintConformGDT .*//' \ -e 's/SCWRLConform.*//' \ -e 's/NameConform .*//' \ < ${STARTER_SCRIPTS}/evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp echo test ${SORTTBL} name < $$@ > sort.tmp mv -f sort.tmp $$@ decoys/${1}-real_cost+extra.rdb: decoys/${1}-real_cost.rdb ${STARTER_SCRIPTS}/add_cost_cols.pl \ ${TARGET} ${MODEL_PATH} ${CASP9}/domain_list.ids \ < $$< > $$@ decoys/${1}.evaluate.%.rdb: decoys/${1}.evaluate.everything.rdb ${QA_DATA}/%.weights ${STARTER_SCRIPTS}/reweight_rdb.pl ${QA_DATA}/$$*.weights < $$< > $$@ decoys/${1}.evaluate.%.rdb: decoys/${1}-real_cost+extra.rdb decoys/${1}.%.rdb ${JOINTBL} name $$< < decoys/${1}.$$*.rdb > $$@ ${SORTTBL} real_cost < $$@ > sort${UNIQ}.tmp mv sort${UNIQ}.tmp $$@ decoys/${1}_complete.evaluate.%.rdb: decoys/${1}.evaluate.%.rdb ${ROW} missing_atoms == 0 < $$^ > $$@ decoys/${1}_incomplete.evaluate.%.rdb: decoys/${1}.evaluate.%.rdb ${ROW} missing_atoms '>' 0 < $$^ > $$@ endef $(foreach a,${TARGET_SETS},$(eval $(call MODEL_EVALUATE_TEMPLATE,$(a)))) define DOMAIN_EVALUATE_TEMPLATE ${2}/decoys/similarity.${1}.evaluate.%.rdb: ${2}/decoys/similarity.${1}.evaluate.everything.rdb ${QA_DATA}/%.weights ${STARTER_SCRIPTS}/reweight_rdb.pl ${QA_DATA}/$$*.weights < $$< > $$@ ${2}/decoys/${1}.evaluate.%.rdb: ${2}/decoys/${1}.evaluate.everything.rdb ${QA_DATA}/%.weights ${STARTER_SCRIPTS}/reweight_rdb.pl ${QA_DATA}/$$*.weights < $$< > $$@ ${2}/decoys/${1}.evaluate.%.rdb: %.costfcn ${2}/decoys/${1}-real_cost+extra.rdb costfcn-init-eval.under if [ ! -e decoys/read-${1}.under ]; then false; fi # this file should be built from the target level (not the domain level) and so is not listed as a dependency -rm -f $$@ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e 's|OutFilePrefix decoys|OutFilePrefix ${2}/decoys|' \ -e 's|ReadConformPDBids|ReadConformPDB\t${TARGET}_${2}.pdb\nNameConform\t${TARGET}_${2}|' \ -e 's|EVALDIR|${EVAL_DIR}|' \ -e s#evaluate_domain#${1}.evaluate# \ -e s/_domain// \ -e s/SetCost/SetRealCost\\nSetCost/ \ -e 's|COSTFCN.costfcn|$$*.costfcn|' \ -e s/.COSTFCN/.$$*/ \ -e s/read-pdb/read-${1}/ \ -e s/REAL_PDB/${REAL_PDB}/ \ -e 's/ReadConformPDB .*//' \ -e 's/[pP]rintConformPDB .*//' \ -e 's/PrintConformGDT .*//' \ -e 's/SCWRLConform.*//' \ -e 's/NameConform .*//' \ < ${STARTER_SCRIPTS}/evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f join$(UNIQ).tmp sort$(UNIQ).tmp ${SORTTBL} name < $$@ > sort$(UNIQ).tmp (${CASP9_SCRIPTS}/unique_rdb_headers.pl name \ ${2}/decoys/${1}-real_cost+extra.rdb < sort$(UNIQ).tmp) \ | ${PCB}/jointbl name ${2}/decoys/${1}-real_cost+extra.rdb \ > join$(UNIQ).tmp ${SORTTBL} real_cost < join$(UNIQ).tmp > $$@ -rm -f join$(UNIQ).tmp sort$(UNIQ).tmp ${2}/decoys/${1}_complete.evaluate.%.rdb: ${2}/decoys/${1}.evaluate.%.rdb ${ROW} missing_atoms == 0 < $$^ > $$@ ${2}/decoys/${1}_incomplete.evaluate.%.rdb: ${2}/decoys/${1}.evaluate.%.rdb ${ROW} missing_atoms '>' 0 < $$^ > $$@ ${2}/decoys/${1}-real_cost.rdb: costfcn-init-eval.under if [ ! -e decoys/read-${1}.under ]; then false; fi # this file should be built from the target level (not the domain level) and so is not listed as a dependency -rm -f $$@ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e 's/ReadConformPDBids/ReadConformPDB\t${TARGET}_${2}.pdb\nNameConform\t${TARGET}_${2}/' \ -e 's|OutFilePrefix decoys|OutFilePrefix ${2}/decoys|' \ -e 's|EVALDIR|${EVAL_DIR}|' \ -e s/evaluate_domain.COSTFCN/${1}-real_cost/ \ -e s/_domain// \ -e 's|include COSTFCN.costfcn||' \ -e s/read-pdb/read-${1}/ \ -e s/REAL_PDB/${REAL_PDB}/ \ -e 's/ReadConformPDB .*//' \ -e 's/[pP]rintConformPDB .*//' \ -e 's/PrintConformGDT .*//' \ -e 's/SCWRLConform.*//' \ -e 's/NameConform .*//' \ < ${STARTER_SCRIPTS}/evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f sort$(UNIQ).tmp ${SORTTBL} name < $$@ > sort$(UNIQ).tmp mv -f sort$(UNIQ).tmp $$@ ${2}/decoys/${1}-real_cost+extra.rdb: ${2}/decoys/${1}-real_cost.rdb ${STARTER_SCRIPTS}/add_cost_cols.pl \ ${TARGET} ${MODEL_PATH} ${CASP9}/domain_list.ids \ < $$< > $$@ endef $(foreach a,${TARGET_SETS},$(foreach b,${DOMAINS},$(eval $(call DOMAIN_EVALUATE_TEMPLATE,$(a),$(b))))) decoys/mini_evaluate.%.rdb: %.costfcn decoys/read-pdb.under costfcn-init-eval.under -rm -f $@ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e 's|EVALDIR|${EVAL_DIR}|' \ -e s/evaluate_domain/mini_evaluate/ \ -e s/_domain// \ -e 's|COSTFCN.costfcn|$*.costfcn|' \ -e s/.COSTFCN/.$*/ \ -e s/REAL_PDB/${REAL_PDB}/ \ < ${STARTER_SCRIPTS}/evaluate.under \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp ${SORTTBL} real_cost < $@ > sort.tmp mv -f sort.tmp $@ decoys/eval_alignments.rdb: costfcn-init-eval.under -rm -f $@ sed -e s/XXX0000/${TARGET}/ -e s/START_COL/${START_COL}/ \ -e s/REAL_PDB/${REAL_PDB}/ \ -e s/_domain// \ -e 's|EVALDIR|${EVAL_DIR}|' \ -e 's/include COSTFCN.costfcn//' \ -e s/.COSTFCN// \ < ${STARTER_SCRIPTS}/eval_alignments.under \ | nice -2 ${UNDERTAKER} -rm -f sort.tmp ${SORTTBL} real_cost < $@ > sort.tmp mv -f sort.tmp $@ # clean up after prediction is over clean: ALWAYS echo REMOVING find . -name '*~' -exec rm -f '{}' \; -print find . -name '.*~' -exec rm -f '{}' \; -print find . -name '#*' -exec rm -f '{}' \; -print find . -name '*.pretty.old' -exec rm -f '{}' \; -print find . -name '*.rdb.old' -exec rm -f '{}' \; -print find . -name 'Template.atoms*' -exec rm -f '{}' \; -print echo GZIPPING find . -name '*.log' -exec gzip -9f '{}' \; -print find . -name '*.pdb' -exec gzip -9f '{}' \; -print ${FIXMODE} . fixmode: ${FIXMODE} . # fake targets # Putting the "ALWAYS" target as a dependency means that the # actions for a particular rule will be applied, even if the # target already exists and is newer than other dependencies. .PSEUDO: ALWAYS ALWAYS: # Fri Feb 6 17:52:00 PST 2009 John Archie # moved this section of code to the end of the makefile so that the # new make would only call it when no other rules matched (i.e. # evaluate.%.under for cost functions). ifdef REDO_UNDER # Sat Apr 19 09:59:39 PDT 2008 Kevin Karplus # Changed dependency, so that .under files not remade when # STARTER_SCRIPTS version changes. %.under: ${STARTER_SCRIPTS}/%.under ${TARGET}.upper-only.a2m cat < $< \ | sed s/XXX0000/${TARGET}/g \ | sed s/T0XXX/${T0_from_TR}/g \ | sed s/START_COL/${START_COL}/g \ > $@ else # Mon Jun 9 17:35:13 PDT 2008 Kevin Karplus # removed dependency on ${TARGET}.upper-only.a2m # so that dimer and trimer costfcn-init.under not remade. %.under: cat < ${STARTER_SCRIPTS}/$*.under \ | sed s/XXX0000/${TARGET}/g \ | sed s/T0XXX/${T0_from_TR}/g \ | sed s/START_COL/${START_COL}/g \ > $@ endif # End of Make.main