#!/usr/bin/perl -w # make-repack-res-file [-multimer 1] [-start_col 1] < foo.a2m > foo-repack.res # # Reads a sequence in fasta format, # then outputs a Rosetta .res file for desgin-mode repacking # of the residues. # The file has NATAA for all residues except PRO and CIS, # which have NATRO. # If multimer is set >1, the residues are repeated, as different chains. use strict; use Getopt::Long; use Pod::Usage; { my $multimer=1; my $start_col=1; GetOptions( "multimer=n" => \$multimer , "start_col=n" => \$start_col , "help|?" => sub {pod2usage("verbose"=>1);} , "man" => sub {pod2usage("verbose"=>2);} ) or pod2usage("verbose" => 0); if (scalar(@ARGV)>0) { pod2usage("verbose" => 0); } print << "END_HEADER" This file specifies which residues will be varied Column 2: Chain Column 4-7: sequential residue number Column 9-12: pdb residue number Column 14-18: id (described below) Column 21-40: amino acids to be used NATAA => use native amino acid ALLAA => all amino acids NATRO => native amino acid and rotamer PIKAA => select inividual amino acids POLAR => polar amino acids APOLA => apolar amino acids The following demo lines are in the proper format A 1 3 NATAA A 2 4 ALLAA A 3 6 NATRO A 4 7 NATAA B 5 1 PIKAA DFLM B 6 2 PIKAA HIL B 7 3 POLAR ------------------------------------------------- start END_HEADER ; # record the chains of the input my @chain_id; # A for first chain, B for second, ... my @pdb_num; # sequential number in original sequence my @res; # what is the residue my $zeroth_chain = ord("A")-1; my $chain= $zeroth_chain; my $res_num= $start_col; while() { if (/^>/) { $chain++; $res_num=$start_col; next; } chomp; $_ = uc($_); tr/A-Z//cd; #reduce to letters of sequence my @seq = split(//); @chain_id = (@chain_id , ($chain) x scalar(@seq)); @res = (@res, @seq); @pdb_num = (@pdb_num, $res_num .. ($res_num+scalar(@seq)-1)); $res_num += scalar(@seq); } print STDERR "Read " . scalar(@res) . " residues, have " . scalar(@chain_id) . " chain ids and " . scalar(@pdb_num) . " pdb_nums\n"; # how many chains per multimer my $chain_incr = $chain -$zeroth_chain; print STDERR "Printing $multimer copies of $chain_incr chains\n"; my $serial_res_num=1; foreach my $x (0 .. $multimer-1) { for (my $i=0; $i TARGET.repack.res make-repack-resfile -multimer 2 [-start_col 1] < TARGET.a2m > TARGET.dimer.repack.res Options: -help brief help -man detailed help -start_col 1 the number of the first residue -multimer 1 how many copies of input to make =head1 OPTIONS =over 4 =item B<-help> Print a brief help message and exits. =item B<-man> Prints the manual page and exits. =item B<-start_col> 1 Since the fasta input format does not have any way of specifying residue numbers, they are sequentially assigned for each chain, starting with this number. =item B<-multimer> 1 The input is repeated this many times, with the sequential residue number increasing and the chain ID increasing on each repetition, but with the pdb_numbers being repeated. =back =head1 DESCRIPTION Make a .res file for repacking sidechains in design-mode Rosetta from a fasta file of the sequence of the monomer. All the prolines and cysteines are frozen, everything else is allowed to be repacked. =head1 BUGS If there are multiple chains in the input, they all start at -start_col. If there are multiple chains in the input, their chain ids are replaced by "A", "B", ... and so forth. Should allow the user to specify which residue types not to try to repack, defaulting to PRO and CYS or just PRO. =cut