#!/usr/bin/perl -w # unpack-multimer -length 208 < joined-n-mer.pdb > nmer.pdb # takes a long chain from stdin and chops it up into # equal-length pieces, renumbering the residues in each piece and # making them have different chain ids. # With the -superpose option, the resulting chains are sent to # temporary files, then superimposed using undertaker, assuming that # they are all identical sequences. # With the -only option, can specify which monomers to output. # Kevin Karplus 16 Sept 2004 # Fri Jul 14 09:43:07 PDT 2006 Kevin Karplus # Added addition of MODEL and ENDMDL records if missing from input. use strict; use English; use File::Basename; use Getopt::Long; use Pod::Usage; sub create_superpose_under($$); # main { my $undertaker = "/projects/compbio/programs/undertaker/undertaker"; my $length; my $superpose=0; my $only=""; GetOptions("length=n" => \$length , "superpose!" => \$superpose , "only=s" => \$only , "help|?" => sub {pod2usage("verbose"=>1);} , "man" => sub {pod2usage("verbose"=>2);} ) or pod2usage("verbose" => 0); pod2usage("verbose" => 0) if (!defined($length)); my $print_all_chains = ($only eq ""); # make a hash to determine quickly whether a given chain is to be included my @chains=split(//, $only); my %included_chains; foreach my $chain (@chains) { $included_chains{$chain} = 1; } my $res_num=0; my $chain_id= "A"; my $old_res_string=""; my $out = *STDOUT{IO}; my $job_id = "/var/tmp/multimer_" . int(rand 10000000); if ($superpose) { open (TMPOUT, ">$job_id-$chain_id.pdb"); $out = *TMPOUT{IO}; } my $has_model_record = 0; my $has_endmdl_record = 0; my $model_number=0; while() { if (/^ENDMDL/) { $has_model_record = 0 ; $has_endmdl_record = 1; } next if (/^TER/ || /^END/); if (! /^ATOM/) { print $out $_; if (/^MODEL\s+(\d+)/) { $has_model_record=1; $model_number=$1; } next; } $has_endmdl_record = 0; if (!$has_model_record) { printf $out "MODEL %8d\n", ++$model_number; $has_model_record = 1; } my $res_string = substr($_, 22,5); if ($res_string ne $old_res_string) { # starting a new residue $res_num++; $old_res_string=$res_string; if ($res_num>$length) { # starting a new chain $res_num=1; $chain_id = chr(ord($chain_id)+1); if ($superpose) { close $out; open (TMPOUT, ">$job_id-$chain_id.pdb"); $out = *TMPOUT{IO}; } } } if ($print_all_chains || defined($included_chains{$chain_id})) { printf $out "%s%s%4d %s" , substr($_,0,21), $chain_id, $res_num, substr($_,27); } } print $out "ENDMDL\n" if (! $has_endmdl_record); if ($superpose) { close $out; create_superpose_under($job_id, ord($chain_id) - ord ("A") +1); system("$undertaker < $job_id.under > $job_id.undertaker.log 2>&1"); open TMPIN , "<$job_id.super.pdb"; while() { print $_; } # system("rm $job_id*"); } } # create an undertaker command file for superimposing the $num_chains monomers. sub create_superpose_under($$) { my ($job_id, $num_chains) = @_; open UNDER, ">$job_id.under"; print UNDER << "END_HEADER"; InFilePrefix /projects/compbio/experiments/undertaker/atoms-inputs/ ReadTrainingAtoms monomeric-50pc.atoms ReadRotamerLibrary dunbrack-1332.rot InFilePrefix /projects/compbio/experiments/undertaker/spots/ ReadAtomType exp-pdb.types ReadClashTable exp-pdb-1332-2symm.clash SetClashDefinition exp-pdb-2symm InFilePrefix END_HEADER print UNDER "ReadTargetPDB $job_id-A.pdb\n"; print UNDER "PopConform\n"; for(my $chain=0; $chain<$num_chains; $chain++) { print UNDER "ReadConformPDB $job_id-" . chr($chain+ord("A")) . ".pdb\n"; } print UNDER "PrintAllConformPDB $job_id.super.pdb superpose\n"; print UNDER "quit\n"; close UNDER; } __END__ =pod =head1 NAME unpack-multimer =head1 SYNOPSIS unpack-multimer -length 208 < joined-n-mer.pdb > nmer.pdb takes a long chain from stdin and chops it up into equal-length pieces, renumbering the residues in each piece and making them have different chain ids. =head1 OPTIONS =over 4 =item B<-help> Print a brief help message and exits. =item B<-man> Prints the manual page and exits. =item B<-length> 208 Required parameter (no default). Specifies the length of the pieces to break chain into. =item B<-superpose> Optional boolean parameter. If set, then the monomers that the chain is broken into are output to separate temporary files and superimposed using undertaker. The temporary files are deleted on completion. =item B<-only> ABCD Optional string parameter If set, then only chains whose chain id letter is part of the provided parameter are output. This option is useful for extracting particular monomers from the multimer---the default is to output all the monomers. =back =head1 DESCRIPTION unpack-multimer takes a long chain from stdin and chops it up into equal-length pieces, renumbering the residues in each piece and making them have different chain ids. =cut