#!/usr/bin/python # program: rr2constraints.py # converts an rr format to a constraints file # options: see 'usage' import sys, getopt,time from GeoBio import * def die(s): s = "ERROR rr2constraints: "+s sys.stderr.write(s+'\n') sys.exit(1) def usage(): print """usage: %s [topfraction nonbonus topdistance lowdistance [start]] < rr > constraints If the topfraction has no decimal point, then we insert one as: arg[:-2]+'.'+arg[-2:], eg '250' becomes '2.50' """ % sys.getargv[0] class Parameter: def __init__(self): self.nonbonus = 0.005 self.topfraction = 0.20 self.topdistance = "14.0" self.lowdistance = "7.0" self.start = 1 self.a = 0.0 self.b = 0.0 self.c = 0.0 params = Parameter() args = sys.argv def adjustscore(score,a,b,c=0.0): return max(0.0,min(1.0, (1.0-c)*(a/(1.0-score)-b)+c*score)) mapfcn = [] if len(args)>2: try: arg1 = args[1] if arg1.find('.')<0: arg1 = arg1[:-2]+'.'+arg1[-2:] params.topfraction = float(arg1) arg2 = args[2] if arg2.find('.')<0: arg2 = arg2[:-2]+'.'+arg2[-2:] params.nonbonus = float(arg2) if len(args)>3: arg3 = args[3] # if arg3.find('x')>=0: # mapfcn = string.split(args[3],'x') # else: params.topdistance = arg3 if len(args)>4: params.lowdistance = args[4] except ValueError: die("Arguments must be float values") try: if len(args)>5: params.start = int(args[5]) except ValueError: die("'start' must be an integer") if mapfcn: try: params.a = float('.'+mapfcn[0]) params.b = float('.'+mapfcn[1]) params.c = float('.'+mapfcn[2]) except IndexError,ValueError: die("mapfcn argument format is: NxNxN") def AorB(residue): if residue=='G': return 'A' return 'B' def constraintOut(score,sequence,x,y,isBonus=True): residue1 = sequence[x] residue2 = sequence[y] if isBonus: bonus = "bonus" else: bonus = "" if params.a>0.0: score = adjustscore(score,params.a,params.b,params.c) print "Constraint %s%i.C%s %s%i.C%s -999 %s %s %f %s" % (residue1, x+params.start, AorB(residue1), residue2, y+params.start, AorB(residue2), params.lowdistance, params.topdistance, score, bonus) # load an rr format as a contact map # sort by scores # using sequence length, export top preds as constraints # we assume we're working by chain/domain rr = ContactPrediction() rr.load(sys.stdin) rr.sortByScores() sequence = rr.sequence seqlen = len(sequence) nonbonusCt = int(seqlen*params.nonbonus) bonusCt = int(seqlen*params.topfraction) #print sequence #print seqlen,bonusCt,len(rr.contacts) print "# converted from rr file by 'rr2constraints.py'" print "#",time.asctime() print "# parameters:" print "# nonbonus: %f*sequencelen" % (params.nonbonus) print "# topfraction: %f*sequencelen" % (params.topfraction) print "# topdistance: %s" % (params.topdistance) print "# lowdistance: %s" % (params.lowdistance) print "#" for contact in rr.contacts: if bonusCt==0: break constraintOut(contact[4], sequence, contact[0]-params.start, contact[1]-params.start, nonbonusCt<1) bonusCt -= 1 nonbonusCt -= 1