# This package is for handling ids files, and the # files distributed below /projects/compbio/experiments/models.97/pdb/ package ChainIds; use English; use FileHandle; use Exporter; @ISA = qw(Exporter); @EXPORT = qw( ReadIDs ReadIDsFH PrintIDsStdout dir_path_for_id pretty_dir_path_for_id subdir_for_alignspec get_suffix full_existing_filename pdb_file ); use vars qw( $models97 $indexesdir $models97_subdir $chain_subdir %SUBDIR %override_suffix ); $models97 = $ENV{MODELS97} ? $ENV{MODELS97} : "/projects/compbio/experiments/models.97"; $indexesdir = $ENV{INDEXES97} ? $ENV{INDEXES97} : "/projects/compbio/experiments/models.97/indexes"; ######################################################################### # Here is the section that specifies the location of .a2m files. # The keys to %SUBDIR are the legal values for -a2m. # Even if the alignment files are gzipped, specify $a2m_suffix # without the ".gz". ######################################################################### $SUBDIR{"fnok"} = "struct-align"; $SUBDIR{"fssp"} = "struct-align"; $SUBDIR{"fssp-t98"} = "struct-align"; $SUBDIR{"hssp"} = "struct-align"; $SUBDIR{"fssp-thin20.pdbnums.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-chi1-3.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-abs_sa-CB-burial-12-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-length-4.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-near-backbone-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-near-backbone-11.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-way-back-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-way-back-11.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-CB-burial-16-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-CB-burial-14-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-CB-burial-12-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-CB-burial-10-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-CB-burial-8-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-CB-burial-6.5-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-CA-burial-14-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-CA-burial-12-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-CA-burial-10-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-CA-burial-8-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-CA-burial-6.5-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-wet-burial-12-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-wet-burial-10-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-wet-burial-8-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-wet-burial-6.5-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-dry-burial-14-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-dry-burial-12-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-dry-burial-10-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-dry-burial-8-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-dry-burial-6.5-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-burial-12-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-burial-10-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-burial-8-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-burial-9-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-burial-6.5-7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-burial-6.5-9.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-rel_sa10.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-rel_sa7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-rel_sa3.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-rel_sa2.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20-abs_sa7.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20.tco.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20.str.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20.str2.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20.akp.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20.alpha.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20.pmt.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20.seq.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20.2d.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20.stride.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20.ang.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20.co.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20.pb.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20.gamma.rdb"} = "struct-align"; $SUBDIR{"fssp-thin20.kappa.rdb"} = "struct-align"; $SUBDIR{"fssp-thin30.tco.rdb"} = "struct-align"; $SUBDIR{"fssp-thin30.seq.rdb"} = "struct-align"; $SUBDIR{"fssp-thin30.2d.rdb"} = "struct-align"; $SUBDIR{"fssp-thin30.stride.rdb"} = "struct-align"; $SUBDIR{"fssp-thin40.tco.rdb"} = "struct-align"; $SUBDIR{"fssp-thin40.seq.rdb"} = "struct-align"; $SUBDIR{"fssp-thin40.2d.rdb"} = "struct-align"; $SUBDIR{"fssp-thin40.stride.rdb"} = "struct-align"; $SUBDIR{"target96"} = "nostruct-align"; $SUBDIR{"target97"} = "nostruct-align"; $SUBDIR{"target98"} = "nostruct-align"; $SUBDIR{"target98_1"} = "nostruct-align"; $SUBDIR{"target98_2"} = "nostruct-align"; $SUBDIR{"target98_3"} = "nostruct-align"; $SUBDIR{"target98-pdb"} = "nostruct-align"; $SUBDIR{"target98-mixed"} = "nostruct-align"; $SUBDIR{"target98-dssp"} = "nostruct-align"; $SUBDIR{"t99"} = "nostruct-align"; $SUBDIR{"t99-thin90"} = "nostruct-align"; $SUBDIR{"t2k"} = "nostruct-align"; $SUBDIR{"t2k-thin90"} = "nostruct-align"; $SUBDIR{"t2k-thin62"} = "nostruct-align"; $SUBDIR{"t04"} = "nostruct-align"; $SUBDIR{"t04-thin90"} = "nostruct-align"; $SUBDIR{"t04-thin62"} = "nostruct-align"; $SUBDIR{"t06"} = "nostruct-align"; $SUBDIR{"t06-thin90"} = "nostruct-align"; $SUBDIR{"t06-thin62"} = "nostruct-align"; $SUBDIR{"t99-w0.5.mod"} = "nostruct-align"; $SUBDIR{"t2k-w0.5.mod"} = "nostruct-align"; $SUBDIR{"t04-w0.5.mod"} = "nostruct-align"; $SUBDIR{"t06-w0.5.mod"} = "nostruct-align"; $SUBDIR{"t99-ebghtl.mod"} = "nostruct-align"; $SUBDIR{"t2k-ebghtl.mod"} = "nostruct-align"; $SUBDIR{"t04-ebghtl.mod"} = "nostruct-align"; $SUBDIR{"t99-str2.mod"} = "nostruct-align"; $SUBDIR{"t2k-str2.mod"} = "nostruct-align"; $SUBDIR{"t04-str2.mod"} = "nostruct-align"; $SUBDIR{"t99-pb.mod"} = "nostruct-align"; $SUBDIR{"t2k-pb.mod"} = "nostruct-align"; $SUBDIR{"t04-pb.mod"} = "nostruct-align"; $SUBDIR{"t99-2d.mod"} = "nostruct-align"; $SUBDIR{"t2k-2d.mod"} = "nostruct-align"; $SUBDIR{"t04-2d.mod"} = "nostruct-align"; $SUBDIR{"t2k-ebghtl-logo.eps"} = "nostruct-align"; $SUBDIR{"t2k-test-100-30-rel_sa7-db.mlib"} = "nostruct-align"; $SUBDIR{"hssp"} = "struct-align"; $SUBDIR{"dssp"} = "info"; $SUBDIR{"stride"} = "info"; $SUBDIR{"dssp-mixed"} = "info"; $SUBDIR{"stride-mixed"} = "info"; $SUBDIR{"guide"} = "info"; $SUBDIR{"domainseq"} = "info"; $SUBDIR{"pdb.seq"} ="info"; $SUBDIR{"dssp.seq"} ="info"; $SUBDIR{"stride.seq"} ="info"; $SUBDIR{"dssp-mixed.seq"} ="info"; $SUBDIR{"stride-mixed.seq"} ="info"; $SUBDIR{"hssp.guide"} ="info"; $SUBDIR{"dssp.asa"} ="info"; $SUBDIR{"dssp.pmt"} ="info"; $SUBDIR{"dssp.int"} ="info"; $SUBDIR{"pdb.2d"} ="info"; $SUBDIR{"dssp.2d"} ="info"; $SUBDIR{"stride.2d"} ="info"; $SUBDIR{"dssp-mixed.2d"} ="info"; $SUBDIR{"stride-mixed.2d"} ="info"; $SUBDIR{"stride-mixed.dssp"} ="info"; $SUBDIR{"hssp.2d"} ="info"; $SUBDIR{"dssp.ang"} ="info"; $SUBDIR{"stride-mixed.ang"} ="info"; $SUBDIR{"stride-mixed.bys"} ="info"; $SUBDIR{"dssp.co"} ="info"; $SUBDIR{"stride-mixed.co"} ="info"; $SUBDIR{"dssp.rot"} ="info"; $SUBDIR{"stride-mixed.rot"} ="info"; $SUBDIR{"dssp.ppi"} ="info"; $SUBDIR{"stride-mixed.ppi"} ="info"; $SUBDIR{"dssp.tco"} = "info"; $SUBDIR{"stride-mixed.tco"} = "info"; $SUBDIR{"dssp.alpha"} = "info"; $SUBDIR{"stride-mixed.alpha"} = "info"; $SUBDIR{"dssp.pb"} = "info"; $SUBDIR{"stride-mixed.pb"} = "info"; $SUBDIR{"dssp.gamma"} = "info"; $SUBDIR{"stride-mixed.gamma"} = "info"; $SUBDIR{"dssp.kappa"} = "info"; $SUBDIR{"stride-mixed.kappa"} = "info"; $SUBDIR{"dssp.str"} = "info"; $SUBDIR{"stride-mixed.str"} = "info"; $SUBDIR{"dssp.str2"} = "info"; $SUBDIR{"stride-mixed.str2"} = "info"; $SUBDIR{"dssp.akp"} = "info"; $SUBDIR{"stride-mixed.akp"} = "info"; $SUBDIR{"dssp.ehl2-stride"} = "info"; $SUBDIR{"stride-mixed.ehl2-stride"} = "info"; $SUBDIR{"dssp.ehl2-dssp"} = "info"; $SUBDIR{"stride-mixed.ehl2-dssp"} = "info"; $SUBDIR{"stride-mixed.chi1-3"} = "info"; $SUBDIR{"stride-mixed.abs_sa-CB-burial-12-7"} = "info"; $SUBDIR{"stride-mixed.length-4"} = "info"; $SUBDIR{"stride-mixed.CB-burial-16-7"} = "info"; $SUBDIR{"stride-mixed.CB-burial-14-7"} = "info"; $SUBDIR{"stride-mixed.near-backbone-7"} = "info"; $SUBDIR{"stride-mixed.near-backbone-11"} = "info"; $SUBDIR{"stride-mixed.way-back-7"} = "info"; $SUBDIR{"stride-mixed.way-back-11"} = "info"; $SUBDIR{"stride-mixed.CB-burial-12-7"} = "info"; $SUBDIR{"stride-mixed.CB-burial-10-7"} = "info"; $SUBDIR{"stride-mixed.CB-burial-8-7"} = "info"; $SUBDIR{"stride-mixed.CB-burial-6.5-7"} = "info"; $SUBDIR{"stride-mixed.CA-burial-14-7"} = "info"; $SUBDIR{"stride-mixed.CA-burial-12-7"} = "info"; $SUBDIR{"stride-mixed.CA-burial-10-7"} = "info"; $SUBDIR{"stride-mixed.CA-burial-8-7"} = "info"; $SUBDIR{"stride-mixed.CA-burial-6.5-7"} = "info"; $SUBDIR{"stride-mixed.wet-burial-12-7"} = "info"; $SUBDIR{"stride-mixed.wet-burial-10-7"} = "info"; $SUBDIR{"stride-mixed.wet-burial-8-7"} = "info"; $SUBDIR{"stride-mixed.wet-burial-6.5-7"} = "info"; $SUBDIR{"stride-mixed.dry-burial-14-7"} = "info"; $SUBDIR{"stride-mixed.dry-burial-12-7"} = "info"; $SUBDIR{"stride-mixed.dry-burial-10-7"} = "info"; $SUBDIR{"stride-mixed.dry-burial-8-7"} = "info"; $SUBDIR{"stride-mixed.dry-burial-6.5-7"} = "info"; $SUBDIR{"stride-mixed.burial-12-7"} = "info"; $SUBDIR{"stride-mixed.burial-10-7"} = "info"; $SUBDIR{"stride-mixed.burial-9-7"} = "info"; $SUBDIR{"stride-mixed.burial-8-7"} = "info"; $SUBDIR{"stride-mixed.burial-6.5-7"} = "info"; $SUBDIR{"stride-mixed.burial-6.5-9"} = "info"; $SUBDIR{"stride-mixed.abs_sa8"} = "info"; $SUBDIR{"stride-mixed.abs_sa7"} = "info"; $SUBDIR{"stride-mixed.rel_sa2"} = "info"; $SUBDIR{"stride-mixed.rel_sa3"} = "info"; $SUBDIR{"stride-mixed.rel_sa7"} = "info"; $SUBDIR{"stride-mixed.rel_sa10"} = "info"; $SUBDIR{"stride-mixed.near-backbone-7"} = "info"; $SUBDIR{"stride-mixed.near-backbone-11"} = "info"; $SUBDIR{"stride-mixed.way-back-7"} = "info"; $SUBDIR{"stride-mixed.way-back-11"} = "info"; #extracted properties files $SUBDIR{"dihedral.rdb"} = "info"; $SUBDIR{"dssp.rdb"} = "info"; $SUBDIR{"gen6.5.burial.rdb"} = "info"; $SUBDIR{"gen8.burial.rdb"} = "info"; $SUBDIR{"gen9.burial.rdb"} = "info"; $SUBDIR{"gen10.burial.rdb"} = "info"; $SUBDIR{"gen12.burial.rdb"} = "info"; $SUBDIR{"wet6.5.burial.rdb"} = "info"; $SUBDIR{"wet8.burial.rdb"} = "info"; $SUBDIR{"wet10.burial.rdb"} = "info"; $SUBDIR{"wet12.burial.rdb"} = "info"; $SUBDIR{"dry6.5.burial.rdb"} = "info"; $SUBDIR{"dry8.burial.rdb"} = "info"; $SUBDIR{"dry10.burial.rdb"} = "info"; $SUBDIR{"dry12.burial.rdb"} = "info"; $SUBDIR{"CB6.5.burial.rdb"} = "info"; $SUBDIR{"CB8.burial.rdb"} = "info"; $SUBDIR{"CB10.burial.rdb"} = "info"; $SUBDIR{"CB12.burial.rdb"} = "info"; $SUBDIR{"CA6.5.burial.rdb"} = "info"; $SUBDIR{"CA8.burial.rdb"} = "info"; $SUBDIR{"CA10.burial.rdb"} = "info"; $SUBDIR{"CA12.burial.rdb"} = "info"; #the master $SUBDIR{"pdb-rdb"} = "info"; # rename the suffix for the following abbreviations $override_suffix{"guide"} = ".hssp.guide"; $override_suffix{"pdb-rdb"} = "pdb-rdb"; # -------------------------------------------------- # Read ids from file into a hash table. # Using a hash means that duplicate IDs are automatically merged. sub ReadIDs($\%) { my ($file, $href) = @_; open(FILE, "<$file") || fatal("Can't open file $file for reading IDs"); while () { next if /^\s*#/; # # is comment next if /^\s*\/\//; # // is also comment next if ( !( /^\s*(\S+)/ && ($id = $1)) ); $$href{$id} = 1; } close FILE; } # Read ids from FileHandle into a hash table. # Using a hash means that duplicate IDs are automatically merged. sub ReadIDsFH(*\%) { my ($fileref, $href) = @_; while (<$fileref>) { next if /^\s*#/; # # is comment next if /^\s*\/\//; # // is also comment next if ( !( /^\s*(\S+)/ && ($id = $1)) ); $$href{$id} = 1; } } # -------------------------------------------------- # Read ids from file into an array. sub ReadIDsToArray($\@) { my ($file, $aref) = @_; open(FILE, "<$file") || fatal("Can't open file $file for reading IDs"); while () { next if /^\s*#/;# # is comment next if /^\s*\/\//;# // is also comment next if ( !( /^\s*(\S+)/ && ($id = $1)) ); push @$aref, $id; } close FILE; } #print the keys of the hash (reference) to STDOUT sub PrintIDsStdout(%) { use strict; my ($my_ids) = @_; my $key; foreach $key (keys(%$my_ids)){ print "$key\n"; } } # return the subdirectory for a specified alignment sub subdir_for_alignspec($) { my ($alignspec) = @_; if (! defined $SUBDIR{$alignspec}) { print STDERR "ERROR: \"$alignspec\" unrecognized.\n"; print STDERR "legal specifiers are:\n"; foreach $x (sort keys(%SUBDIR)) { print STDERR "\t$x\n"; } fatal(" \"$alignspec\" unrecognized.\n"); } return $SUBDIR{$alignspec}; } # return the correct file suffix for the specified alignment spec sub get_suffix($) { my ($spec) = @_; # append a ".a2m" unless a specific override has been defined, # or the alignment spec already contains a period. if ( defined($override_suffix{$spec})) { return $override_suffix{$spec}; } elsif ($spec =~ /[.]/) { return $spec; } else { return $spec . ".a2m"; } } # Return the full path for the DIRECTORY containing all # information about a specified id. # UGLY--relies on $ChainIds::models97_subdir and $ChainIds::chain_subdir being set. sub dir_path_for_id($) { my ($id) = @_; return "$models97/$models97_subdir/" . substr($id,0,2) . "/$id/$chain_subdir"; } #version of above that has models97_subdir and chain_subdir passed #in as arguments sub pretty_dir_path_for_id($$$) { my ($id,$m97_subdir,$c_subdir) = @_; return "$models97/$m97_subdir/" . substr($id,0,2) . "/$id/$c_subdir"; } # Construct the full filename for given id and alignment spec, # searching array of possible subdirectories (usually just ("pdb") ). # Return the full file name if file is found, otherwise return undef. # Example: full_existing_filename("1kcqA", "stride.seq", "pdb") # returns /projects/compbio/experiments/models.97/pdb/1k/1kcqA/info/1kcqA.stride.seq sub full_existing_filename($$@) { my ($id, $spec, @subdir_list) = @_; my $suffix = get_suffix($spec); my $chain_subdir = subdir_for_alignspec($spec); # We have to find the appropriate subdirectory: foreach my $subdir (@subdir_list) { $full_subdir = "$models97/$subdir/" . substr($id,0,2) . "/$id/$chain_subdir"; $path_nogz = "$full_subdir/$id.$suffix"; $path = $path_nogz . ".gz"; return $path if (-e $path); $path = $path_nogz; return $path if (-e $path); } return undef; } # Extends full_existing_filename to look for files in the # /pcem/removed-templates directory # Return the full file name if file is found, otherwise return undef. sub full_existing_obsolete_filename($$@) { my ($id, $spec, @subdir_list) = @_; my $suffix = get_suffix($spec); my $chain_subdir = subdir_for_alignspec($spec); # We have to find the appropriate subdirectory: $full_subdir = $models97 . "/removed-templates/" . "/$id/$chain_subdir"; $path_nogz = "$full_subdir/$id.$suffix"; $path = $path_nogz . ".gz"; return $path if (-e $path); $path = $path_nogz; return $path if (-e $path); return undef; } # calls pdb-get to get the full file name for a pdb chain sub pdb_file($) { my ($id) = @_; my $file = `/projects/compbio/bin/pdb-get $id`; chomp $file; return $file } # # fatal(msg) # # Generate a fatal error message. # sub fatal($) { my($msg) = @_; die("*** Error: $PROGRAM_NAME: $msg\n"); exit(1); } # # run_prog(cmd, [errmsg]) # # Run a command using system. Exit if the command fails. An optional # error message for failure maybe specified or one will be generated. # sub run_prog($;$) { my($cmd,$errmsg) = @_; if ($::verbose) { print STDERR "\n\n@@@@ $cmd\n"; } if (system($cmd) != 0) { if (!defined($errmsg)) { $errmsg = "command failed: $cmd"; } fatal($errmsg); } } # packages must end with a true value---a restriction that seems # to be poorly documented. 1;