# Make.rr # a preliminary makefile for residue-residue contact predictions # George Shackelford # This is a test file. When it all works well, the results # will be moved to Make.main. include /projects/compbio/usr/ggshack/research/contact/casp6/Make.main ifndef MAKE MAKE=make endif ifndef MA_TARGETS MA_TARGETS=t04 t2k endif ifndef TARG_AL TARG_AL = ${TARGET}.${AL_METHOD} endif ifndef TARGETDIR TARGETDIR=${TARGET} endif TMPDIR=/var/tmp ifndef CORRELATED CORRELATED=correlated-columns endif ifndef START_COL START_COL=0 endif RR_NN_DIR=//projects/compbio/experiments/protein-predict/casp6/networks # RR_NEURAL_NET_104=${RR_NN_DIR}/NN104-40n40all.net # RR_NN_NAME=NN134-90n70err.net.6 RR_NN_NAME=NN280-240n300.net.28 # new designation for the new predictions RR_EXT=280 RR_NEURAL_NET=${RR_NN_DIR}/${RR_NN_NAME} SAM=${TARG_AL}-thin${THIN} # Such are the changes to traincontactnn that it may not be possible to use these ole # arguments. However they can be recovered. # RR_ARGS=-entropy -window 1 -T 120.0 \ # -S .t2k.CB_burial_14_7.rdb,.t2k.str2.rdb,.t04.CB_burial_14_7.rdb,.t04.str2.rdb \ # -C .t2k-thin62.mi.rdb,.t2k-thin40.mi.rdb,.t2k-thin35.mi.rdb,.t2k-thin30.mi.rdb,.t04-thin62.mi.rdb,.t04-thin40.mi. rdb,.t04-thin35.mi.rdb,.t04-thin30.mi.rdb # new RR_ARGS required for building inputs for 280's RR_ARGS=-entropy -W 3 -M 3 -T 120.0 -distribution .t04 -S \ .t04.CB_burial_14_7.rdb,.t04.near-backbone-11.rdb,.t04.str2.rdb -C \ .t04-thin62.mi.rdb,.t04-thin40.mi.rdb,.t04-thin35.mi.rdb,.t04-thin30.mi.rdb # The prediction requires using the 'traincontactnn' program # to build a list of inputs for the lwnn_validate program # lwnn_validate uses those inputs and a specified neural network # to make predictions. Those predictions are sorted by raw score # and the sequence_length/2 best scoring predictions form # the submitted RR predictions. # There are a few input files that are not yet generated by Make.main: # the 'muscle' alignment mi files at thin-40, thin-62 # actually just like t2k-thinNN except t2k.muscle-thinNN THINS=35 40 62 MINPAIR=0.7 # ============================ mi data ============================================== build_mis: -$(foreach i,${THINS},\ -$(foreach j,${MA_METHODS},\ ${MAKE} -f make.rr -k THIN=${i} TARGET=${TARGET} AL_METHOD=${j} build_mi;)) build_mi: ${TARGETDIR}/${SAM}.mi.rdb.gz remove_mis: rm -f ${TARGETDIR}/${TARGET}.*.mi.rdb.gz;\ rm -f ${TARGETDIR}/${TARGET}.*.mi.rdb;) ${TARGETDIR}/%.mi.rdb.gz : ${TARGETDIR}/%.a2m.gz echo Building $@ echo SetAlphabet ExtAA > ${TMPDIR}/tmp-$*-mi.script echo ReadA2m $< >> ${TMPDIR}/tmp-$*-mi.script echo "SetSignif 9.0" >> ${TMPDIR}/tmp-$*-mi.script echo SetMinPairs 0.4 >> ${TMPDIR}/tmp-$*-mi.script echo "SetMinSep 8" >> ${TMPDIR}/tmp-$*-mi.script echo MutualInfoSignif ${TMPDIR}/$*.mi.rdb ${TARGETDIR}/$*.constraints >> ${TMPDIR}/tmp-$*-mi.script # echo MutualInfoAll ${TMPDIR}/$*.mi.rdb >> ${TMPDIR}/tmp-$*-mi.script ${CORRELATED} < ${TMPDIR}/tmp-$*-mi.script rm -f ${TMPDIR}/tmp-$*-mi.script gzip ${TMPDIR}/$*.mi.rdb mv ${TMPDIR}/$*.mi.rdb.gz $@ # Thin to ${THIN}% identity ${TARGETDIR}/${SAM}.a2m.gz: ${TARGETDIR}/${TARG_AL}.a2m.gz uniqueseq unique-tmp -alignfile ${TARGETDIR}/${TARG_AL}.a2m.gz -percent_id 0.${THIN} gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ # ============================ build distributions ============================= build_dists: -$(foreach j,${MA_METHODS},\ ${MAKE} -f make.rr -k TARGET=${TARGET} AL_METHOD=${j} build_dist;) build_dist: ${TARGETDIR}/${TARG_AL}.probs # and the '*.probs' file with the residue distributions using # 'estimate-dist' Similar to '*.saves' # for now, I'll base the distribution on the *.a2m.gz files ${TARGETDIR}/%.probs : ${TARGETDIR}/%.a2m.gz echo Alphabet ExtAA > ${TMPDIR}/tmp-$*-probs.script echo ClipWeight 1.0 >> ${TMPDIR}/tmp-$*-probs.script echo PushReg /projects/compbio/lib/recode3.20comp >> ${TMPDIR}/tmp-$*-probs.script echo SequenceWeight HenikoffWeight 1.0 1.0 >> ${TMPDIR}/tmp-$*-probs.script echo ReadA2M ${TARGETDIR}/$*.a2m.gz >> ${TMPDIR}/tmp-$*-probs.script echo PrintProbs ${TMPDIR}/$*.probs >> ${TMPDIR}/tmp-$*-probs.script echo quit >> ${TMPDIR}/tmp-$*-probs.script estimate-dist < ${TMPDIR}/tmp-$*-probs.script rm -f ${TMPDIR}/tmp-$*-probs.script mv ${TMPDIR}/$*.probs $@ # ======================== build the predicition =================== # as of 2004-08-10 TARGET_LIST=T0206 T0213 T0214 T0215 T0216 T0217 T0218 T0219 \ T0220 T0221 T0222 T0223 T0224 T0225 T0226 T0227 \ T0235 T0237 T0238 T0239 \ T0240 T0241 T0242 T0243 T0244 T0245 T0246 T0247 T0248 T0249 \ T0250 T0251 T0253 T0254 T0256 T0257 \ T0261 T0262 T0263 T0264 T0265 T0266 T0267 T0268 T0269 \ T0270 T0271 T0272 # old targets # T0203 T0204 T0205 # T0198 T0199 T0200 T0201 T0202 T0208 T0209 T0210 T0211 T0212 # T0228 T0229 T0230 T0231 T0232 T0233 # cancelled # T0207 T0236 T0234 buildrrs_all: -$(foreach i,${TARGET_LIST},\ ${MAKE} -f make.rr -k TARGET=${i} buildrrs;) clearrrs_all: -$(foreach i,${TARGET_LIST},\ ${MAKE} -f make.rr -k TARGET=${i} clearrrs;) buildrrs: ${TARGETDIR}/${TARGET}.${RR_EXT}.rr clearrrs: rm -f ${TARGETDIR}/${TARGET}.${RR_EXT}.rr rm -f ${TARGETDIR}/${TARGET}.${RR_EXT}.rr.rasmol rm -f ${TARGETDIR}/${TARGET}.${RR_EXT}.rr.distinct.rasmol rm -f ${TARGETDIR}/${TARGET}.${RR_EXT}.rr.constraints rm -f ${TARGETDIR}/${TARGET}.validate rm -f ${TARGETDIR}/${TARGET}.pairs rm -f ${TARGETDIR}/${TARGET}.*.${RR_EXT}-contactmap.eps # NOTE: traincontactnn creates a file containing the pairs i,j # called ${TARGET}.pairs. It is used by validate2rr ${TARGETDIR}/${TARGET}.${RR_EXT}.rr ${TARGETDIR}/${RR_EXT}.rr.rasmol : cd ${TARGETDIR} && \ traincontactnn -dump -id ${TARGET} -start ${START_COL} ${RR_ARGS} > ${TMPDIR}/${TARGET}.validate cd ${TARGETDIR} && \ lwnn_validate -a -v ${TMPDIR}/${TARGET}.validate ${RR_NEURAL_NET} | \ validate2rr -c -n ${RR_NN_NAME} -s ${START_COL} -t ${TARGET} > ${TARGET}.${RR_EXT}.rr cd ${TARGETDIR} && \ rr2contactmap -s ${START_COL} -e ${TARG_AL}.dssp-ehl2.seq \ < ${TARGET}.${RR_EXT}.rr | gnuplot > ${TARG_AL}.${RR_EXT}-contactmap.eps echo ${RR_NN_NAME} rm ${TARGETDIR}/${TARGET}.pairs rm ${TMPDIR}/${TARGET}.validate rm -f ${TARGETDIR}/rr ln -s ${TARGET}.${RR_EXT}.rr.rasmol ${TARGETDIR}/rr rm -f ${TARGETDIR}/rr.distinct ln -s ${TARGET}.${RR_EXT}.rr.distinct.rasmol ${TARGETDIR}/rr.distinct # ${TARGETDIR}/${TARGET}.t2k.str2.rdb # ${TARGETDIR}/${TARGET}.t04.str2.rdb \ # ${TARGETDIR}/${TARGET}.t2k.CB_burial_14_7.rdb \ # ${TARGETDIR}/${TARGET}.t04.CB_burial_14_7.rdb \ # ${TARGETDIR}/${TARGET}.t2k.probs \ # ${TARGETDIR}/${TARGET}.t04.probs \ # ${TARGETDIR}/${TARGET}.t2k-thin62.mi.rdb \ # ${TARGETDIR}/${TARGET}.t2k-thin40.mi.rdb \ # ${TARGETDIR}/${TARGET}.t2k-thin35.mi.rdb \ # ${TARGETDIR}/${TARGET}.t04-thin62.mi.rdb \ # ${TARGETDIR}/${TARGET}.t04-thin40.mi.rdb \ # ${TARGETDIR}/${TARGET}.t04-thin35.mi.rdb ########################## # MAKING CASP SUMBISSION # ########################## mailrr_all : -$(foreach i,${TARGET_LIST},\ ${MAKE} -f make.rr -k TARGET=${i} mailrr;) mailrr : ${TARGETDIR}/${TARGET}.${RR_EXT}.rr mail -s 'SAM-T04 hand ${TARGET} RR' ${CASP6_SUBMIT} < ${TARGETDIR}/${TARGET}.${RR_EXT}.rr # ifeq ($(shell if -s ${TARGETDIR}/${TARGET}.${RR_EXT} echo yes),yes) # else # echo bad ${TARGET} # endif # =============== realignment using Bob Edgar's "muscle" program. ========== %.muscle.gz: %.a2m.gz gunzip -c $^ \ | muscle -maxhours 2.0 \ | gzip \ > $@ %.muscle.a2m.gz: %.muscle.gz ${PCEM_SCRIPTS}/a2m_from_muscle -in $^ -out $@ -guide 1