// TrainingCommands.cc // copyright Kevin Karplus // 30 March 1998 // Commands for controlling the training of neural networks // Note: there is no .h file for this .cc file, since the commands // are automatically entered in the command table when they // are declared. #include #include // included for atoi #include #include "Command/Command.h" #include "Input/Input.h" #include "Utilities/IOSmacros.h" #include "Utilities/Random.h" #include "LearningParam.h" #include "Globals.h" #include "Filenames/Filenames.h" #include "InterfaceDescription.h" #include "TrainSet.h" #include "NeuralNet.h" #include "NetActivation.h" #include "ActivationRecord.h" #include "OneChain.h" #include "NetQuality.h" static int PredReadParams(istream& in, Command* self, ostream&logfile) { if (!Globals::nn()) { logfile << "# Error: need to read neural net before " << self->name() << "\n" << flush; } char filename[500]; filename[0]=0; get_word(in, filename, '\n'); if (!filename[0]) { logfile << "# Error: No file specified for learning parameters\n"; return 1; } gzifstream *net_in = Filenames::open_input(filename); if (!net_in) return 0; Globals::nn()->set_learning_params(LearningParam::read_new(*net_in)); logfile << "# Learning params read from " << filename << "\n"; delete net_in; return 1; } static Command ReadLearningParams("ReadLearningParams", PredReadParams, "takes one argument, the filename to read a set of learning parameters\n\ from.\n\ "); static Command ReadParams("ReadParams", &ReadLearningParams); static int GetOutputFile(ofstream *&File, istream& in, Command* self, ostream&logfile) { char filename[500]; get_word(in, filename, '\n'); if (File) { File->close(); delete File; File=0; } if (!filename[0]) { logfile << "# " << self->name() << " given with no output file.\n"; return 1; } ofstream *nqdata_out = Filenames::open_output(filename); if (!nqdata_out) return 0; File=nqdata_out; logfile << "# " << self->name() << " " << filename << " ok.\n"; return 1; } static int PredReportUnitUsage(istream &in, Command*self, ostream& logfile) { return GetOutputFile(Globals::ReportUnitUsageTo, in, self, logfile); } static Command ReportUnitUsageCommand("ReportUnitUsageTo", PredReportUnitUsage, "takes one optional argument, the filename to which unit usage data should\n\ be reported after each TrainNet epoch (including epoch 0).\n"); static int PredReportQvsPhat(istream &in, Command*self, ostream& logfile) { return GetOutputFile(Globals::ReportQvsPhatTo, in, self, logfile); } static Command ReportQvsPhatCommand("ReportQvsPhatTo", PredReportQvsPhat, "takes one optional argument, the filename to which RDB-formatted\n\ data for fraction correct as a function of predicted probability should\n\ be reported after each TrainNet epoch (including epoch 0).\n"); static int PredReportNetQualityForCrossTrainTo(istream& in, Command* self, ostream&logfile) { return GetOutputFile(Globals::ReportNetQualityForCrossTrainTo, in, self, logfile); } static Command ReportNetQualityForCrossTrainCommand( "ReportNetQualityForCrossTrainTo", PredReportNetQualityForCrossTrainTo, "takes one optional argument, the filename to which net quality data \n\ for the cross-training set should be reported to after each epoch.\n\ With no argument, turns off reporting of cross-training results.\n"); static int PredReportNetQualityForTrainTo(istream& in, Command* self, ostream&logfile) { return GetOutputFile(Globals::ReportNetQualityForTrainTo, in, self, logfile); } static Command ReportNetQualityForTrainCommand("ReportNetQualityForTrainTo", PredReportNetQualityForTrainTo, "takes one optional argument, the filename to which net quality data \n\ for the training set should be reported to after each epoch.\n\ With no argument, turns off reporting of training results.\n"); static int PredReportNetQualityForTestTo(istream& in, Command* self, ostream&logfile) { return GetOutputFile(Globals::ReportNetQualityForTestTo, in, self, logfile); } static Command ReportNetQualityForTestCommand("ReportNetQualityForTestTo", PredReportNetQualityForTestTo, "takes one optional argument, the filename to which net quality data \n\ for the testing set should be reported to after each epoch.\n\ With no argument, turns off reporting of testing results.\n"); static int PredReportNetQualityForCrossTrainChainTo(istream& in, Command* self, ostream&logfile) { return GetOutputFile(Globals::ReportNetQualityForCrossTrainChainTo, in, self, logfile); } static Command ReportNetQualityForCrossTrainChainCommand( "ReportCrossTrainByChain", PredReportNetQualityForCrossTrainChainTo, "takes one optional argument, the filename to which net quality data \n\ for the cross-training set should be reported to after each individual\n\ chain.\n\ With no argument, turns off reporting of cross-training results.\n"); static int PredReportNetQualityForTrainChainTo(istream& in, Command* self, ostream&logfile) { return GetOutputFile(Globals::ReportNetQualityForTrainChainTo, in, self, logfile); } static Command ReportNetQualityForTrainChainCommand( "ReportTrainByChain", PredReportNetQualityForTrainChainTo, "takes one optional argument, the filename to which net quality data \n\ for the training set should be reported to after each inidividual chain.\n\ With no argument, turns off reporting of training results.\n"); static int PredReportNetQualityForTestChainTo(istream& in, Command* self, ostream&logfile) { return GetOutputFile(Globals::ReportNetQualityForTestChainTo, in, self, logfile); } static Command ReportNetQualityForTestChainCommand( "ReportTestByChain", PredReportNetQualityForTestChainTo, "takes one optional argument, the filename to which net quality data \n\ for the testing set should be reported to after each inidividual chain.\n\ With no argument, turns off reporting of testing results.\n"); static int PredSaveNeuralNet(istream& in, Command* self, ostream &logfile) { ofstream *net_out=0; if (!GetOutputFile(net_out, in,self,logfile) || net_out==NULL) { logfile << "# Error: No neural network output file opened.\n" << "# Not reporting network\n"; return 1; } if (!Globals::nn()) { logfile << "# No neural network to print.\n"; return 1; } else { // Initialize the weights, bias, pseudo, or gain parameters if they // have not been set Globals::nn()->initialize_net(logfile, Globals::training); Globals::nn()->write(*net_out); logfile << "# Neural network printed.\n"; } net_out->close(); delete net_out; return 1; } static Command SaveNeuralNet("SaveNeuralNet", PredSaveNeuralNet, "takes one required argument, the filename to which a neural network\n\ is to be written.\n"); static Command ReportNeuralNet("ReportNeuralNet", &SaveNeuralNet); static int PredCenterWeights(istream& in, Command* self, ostream &logfile) { NeuralNet* n = Globals::nn(); if (n==NULL) { logfile << "# No neural network to center.\n"; return 1; } else { // Initialize the weights, bias, pseudo, or gain parameters if they // have not been set Globals::nn()->initialize_net(logfile, Globals::training); for (int lay=n->num_layers()-1; lay>=0; lay--) { NeuralLayer *l = n->layer(lay); l->normalize(); l->center_weights_input_1(); l->center_weights(); l->center_biases(); } } return 1; } static Command CenterWeights("CenterWeights", PredCenterWeights, "centers the weights of the neural net, approximately preserving meaning.\n\ The meaning should be preserved if pseudocounts are all 0.\n\ "); // This is the command that begins the estimatation process for the net. // It should be called after everything has been set up, // and it can be called repeatedly. static int PredTrainNet(istream& in, Command* self, ostream &logfile) { int numiter; // The minimal assertion assert(Globals::nn() != NULL); in >> numiter; const int MIN_ITER=0; if (numiter < MIN_ITER) { logfile << "# Error: Minimun number of training iterations is " << MIN_ITER << ", " << numiter << " were specified.\n" << flush; numiter = MIN_ITER; } logfile << "# Training Net with " << numiter << " estimation cycles.\n" << flush; // Initialize the weights, bias, pseudo, or gain parameters if they // have not been set Globals::nn()->initialize_net(logfile, Globals::training); Globals::nn()->learning_loop(numiter, Globals::training, Globals::ReportNetQualityForTrainTo, Globals::ReportNetQualityForTrainChainTo, Globals::cross_training, Globals::ReportNetQualityForCrossTrainTo, Globals::ReportNetQualityForCrossTrainChainTo, Globals::ReportUnitUsageTo, Globals::ReportQvsPhatTo ); return 1; } static Command BeginTraining("TrainNet", PredTrainNet, "takes one required argument, the number of iterations that should\n\ be used to estimate the neural network.\n\ "); static int PredTestNet(istream& in, Command* self, ostream &logfile) { // The minimal assertion assert(Globals::nn() != NULL); logfile << "# Testing net " << Globals::nn()->name() << "\n" << flush; // Initialize the weights, bias, pseudo, or gain parameters if they // have not been set Globals::nn()->initialize_net(logfile, Globals::training); NetQuality *NQ = new NetQuality(Globals::nn()); if (Globals::training) { if (Globals::ReportNetQualityForTrainTo) NQ->print_data_header(* Globals::ReportNetQualityForTrainTo); Globals::nn()->test(Globals::training, NQ, Globals::ReportNetQualityForTrainTo, Globals::ReportNetQualityForTrainChainTo); if (Globals::ReportUnitUsageTo) NQ->print_unit_usage(* Globals::ReportUnitUsageTo,0); if (Globals::ReportQvsPhatTo) NQ->print_Q_vs_Phat(* Globals::ReportQvsPhatTo,0); } if (Globals::cross_training) { if (Globals::ReportNetQualityForCrossTrainTo) NQ->print_data_header(*Globals::ReportNetQualityForCrossTrainTo); Globals::nn()->test(Globals::cross_training, NQ, Globals::ReportNetQualityForCrossTrainTo, Globals::ReportNetQualityForCrossTrainChainTo); if (Globals::ReportUnitUsageTo) NQ->print_unit_usage(* Globals::ReportUnitUsageTo,0); if (Globals::ReportQvsPhatTo) NQ->print_Q_vs_Phat(* Globals::ReportQvsPhatTo,0); } if (Globals::testing) { if (Globals::ReportNetQualityForTestTo) NQ->print_data_header(*Globals::ReportNetQualityForTestTo); Globals::nn()->test(Globals::testing, NQ, Globals::ReportNetQualityForTestTo, Globals::ReportNetQualityForTestChainTo); if (Globals::ReportUnitUsageTo) NQ->print_unit_usage(* Globals::ReportUnitUsageTo,0); if (Globals::ReportQvsPhatTo) NQ->print_Q_vs_Phat(* Globals::ReportQvsPhatTo,0); } delete NQ; return 1; } static Command TestOnce("TestNet", PredTestNet, "no arguments.\n\ Runs the neural net on the training, cross-training, and test sets,\n\ reporting as specified by Report... commands.\n\ "); static int PredEquilibrate(istream& in, Command* self, ostream&logfile) { if (!Globals::training) { logfile << "Can't equilibrate---no training set\n" << flush; return 1; } logfile << "Equilibrating " << Globals::nn()->name() << "\n" <print_data_header(* Globals::ReportNetQualityForTrainTo); Globals::nn()->test(Globals::training, NQ, Globals::ReportNetQualityForTrainTo, Globals::ReportNetQualityForTrainChainTo); if (Globals::ReportUnitUsageTo) NQ->print_unit_usage(* Globals::ReportUnitUsageTo,0); if (Globals::ReportQvsPhatTo) NQ->print_Q_vs_Phat(* Globals::ReportQvsPhatTo,0); Globals::nn()->equilibrate(NQ); delete NQ; return 1; } static Command EquilibrateCommand("Equilibrate", PredEquilibrate, "no arguments.\n\ Runs the neural net on the training set,\n\ then adjusts biases and weights to try to get roughly equal usage of\n\ all hidden units.\n\ Weights of frozen layers are not adjusted, so the preceding layer is\n\ not rebalanced.\n\ "); static int PredSetOverhang(istream& in, Command* self, ostream&logfile) { int OverhangLimit=9999; char word[50]; get_word(in,word, '\n'); if (word[0]) OverhangLimit=atoi(word); if (OverhangLimit<0) { logfile << "# Error: Overhang must be non-negative, not " << OverhangLimit << "\n" << "# Overhangs not changed\n"; return 1; } if (!Globals::nn()) { logfile << "Error: can't set the overhangs until a network exists\n"; return 1; } NeuralNet *nn = Globals::nn(); int numlayers = nn->num_layers(); int lay; // counter for layers nn->remove_activation(); // set overhang based on earlier layers, but only for hidden layers int prev_overhang=0; for (lay=0; layoverhang(lay), lay++) { if (! nn->is_layer_hidden(lay)) continue; int over = prev_overhang + (nn->layer(lay)->num_wind() /2); if (over >OverhangLimit) over=OverhangLimit; nn->layer(lay)->set_overhang(over); } // set overhang based on later layers, but only for hidden layers for (lay=numlayers-2; lay>0; lay--) { if (! nn->is_layer_hidden(lay)) continue; int over = nn->overhang(lay+1) + (nn->layer(lay+1)->num_wind() /2); if (over >OverhangLimit) over=OverhangLimit; if (over < nn->layer(lay)->overhang()) nn->layer(lay)->set_overhang(over); } nn->initialize_net(logfile, Globals::training); return 1; } static Command SetOverhang("SetOverhang", PredSetOverhang, "takes one optional argument, the maximum overhang to use on any layer.\n\ The overhang is set to the maximum usable overhang, limited by the\n\ optional argument.\n\ "); static int PredFreezeLayer(istream& in, Command* self, ostream&logfile) { int lay; in >> lay; if (lay<0 || lay >= Globals::nn()->num_layers()) { logfile << "Error: illegal layer number " << lay << ", not between 0 and " << Globals::nn()->num_layers()-1 << "\n"; return 1; } Globals::nn()->layer(lay)->freeze(); return 1; } static Command FreezeLayer("Freeze", PredFreezeLayer, "takes one argument, a layer number starting with layer 0.\n\ That layer will not be modified in subsequent training (until unfrozen).\n\ "); static int PredUnfreezeLayer(istream& in, Command* self, ostream&logfile) { int lay; in >> lay; if (lay<0 || lay >= Globals::nn()->num_layers()) { logfile << "Error: illegal layer number " << lay << ", not between 0 and " << Globals::nn()->num_layers()-1 << "\n"; return 1; } Globals::nn()->layer(lay)->unfreeze(); return 1; } static Command UnfreezeLayer("Unfreeze", PredUnfreezeLayer, "takes one argument, a layer number starting with layer 0.\n\ That layer will once again be modified in subsequent training.\n\ "); static int PredHideLayer(istream& in, Command* self, ostream&logfile) { int lay; in >> lay; if (lay<0 || lay >= Globals::nn()->num_layers()) { logfile << "Error: illegal layer number " << lay << ", not between 0 and " << Globals::nn()->num_layers()-1 << "\n"; return 1; } Globals::nn()->hide_interface(lay+1); logfile << "In " << Globals::nn()->name() << endl; for (int i=1; i<=Globals::nn()->num_layers(); i++) { logfile << " layer " << i-1 << " is " << (Globals::nn()->interface(i)->is_hidden()? "" : "not ") << "hidden\n"; } return 1; } static Command HideLayer("Hide", PredHideLayer, "takes one argument, a layer number starting with 0 and going up\n\ to the number of layers-1.\n\ That layer will no longer be trained to any training data that was\n\ input, but will be treated as a hidden layer.\n\ "); static int PredUnhideLayer(istream& in, Command* self, ostream&logfile) { int lay; in >> lay; if (lay<0 || lay >= Globals::nn()->num_layers()) { logfile << "Error: illegal layer number " << lay << ", not between 0 and " << Globals::nn()->num_layers()-1 << "\n"; return 1; } Globals::nn()->unhide_interface(lay+1); logfile << "In " << Globals::nn()->name() << endl; for (int i=1; i<=Globals::nn()->num_layers(); i++) { logfile << " layer " << i-1 << " is " << (Globals::nn()->interface(i)->is_hidden()? "" : "not ") << "hidden\n"; } return 1; } static Command UnhideLayer("Unhide", PredUnhideLayer, "takes one argument, a layer number starting with 0 and going up\n\ to the number of layers-1.\n\ That layer will once again be trained to any training data that\n\ was input for it.\n\ "); static int PredPrintPrediction(istream &in, Command*self, ostream& logfile) { char filename[500]; char CASP3_author_id[300]; char separator = get_word(in, filename, '\n',0); if (separator == '\n') { CASP3_author_id[0]='\0'; } else { get_word(in, CASP3_author_id, '\n'); } if (!filename[0]) { logfile << "# ERROR: No file specified for printing prediction.\n"; return 1; } if (!Globals::predicting) { logfile << "# ERROR: Cannot make Prediction. The predicting TrainSet " << " has not been allocated\n"; return 1; } if (Globals::predicting->num_chains() < 1) { logfile << "# ERROR: Cannot make prediction. The predicting TrainSet " << " is empty.\n"; return 1; } if (Globals::nn()==NULL) { logfile << "# ERROR: The neural net has not been defined.\n"; return 1; } Globals::nn()->initialize_net(logfile, Globals::training); NetActivation * netact = Globals::nn()->net_activation(); if (netact==NULL) { logfile << "# ERROR: The neural net has not been initialized. " << "The NetActivation structure is NULL\n"; return 1; } ofstream *predict_out = Filenames::open_output(filename); if (predict_out==NULL) { logfile << "# ERROR: Opening " << filename << " for prediction output.\n"; return 1; } logfile << "# Printing prediction to " << filename << "\n"; const OneChain *curr_chain= Globals::predicting->get_chain(0); int num_layers = Globals::nn()->num_layers(); int lastlayer = num_layers-1; *predict_out << "PFRMAT SS\n" << "TARGET " << curr_chain->get_ChainID() << endl; if (CASP3_author_id[0]) { *predict_out<< "AUTHOR " << CASP3_author_id << endl; } *predict_out<< "METHOD Using neural net " << Globals::nn()->name() <layer(layer); *predict_out << "METHOD\t" << setw(5) << nl->num_wind() << '\t' << setw(5) << nl->num_out(); if (nl->output_interface()->Alpha != NULL) { *predict_out << " (" << *(nl->output_interface()->Alpha) << ")"; } *predict_out << "\n" << flush; } *predict_out << "METHOD The input amino acid frequencies were determined from\n"; *predict_out << "METHOD alignment " << curr_chain->filename() << "\n" << flush; const InterfaceDescription *input_id =Globals::nn()->interface(0); assert (input_id != NULL); if (input_id->SequenceWeighter != NULL) { *predict_out << "METHOD with weighted counts, using " << input_id->SequenceWeighter->name() << "(" << input_id->SequenceWeightBitsToSave << " bits/column, " << input_id->SequenceWeightParam << ")\n"; if (input_id->WeightingRegularizer != NULL) { *predict_out << "METHOD The weighting was determined by the posterior distribution\n" << "METHOD after regularizing with " << input_id->WeightingRegularizer->name() << ".\n" ; } *predict_out << flush; } else { *predict_out << "METHOD with unweighted counts.\n"; } if (input_id->ReRegularizer != NULL) { *predict_out << "METHOD Counts were regularized to probabilities using\n" << "METHOD " << input_id->ReRegularizer->name() << "\n"; } *predict_out << "METHOD Total sequence weight for alignment was " << curr_chain->total_sequence_weight() << "\n" << flush; *predict_out << "METHOD\n"; AlphabetTuple *alphtup= Globals::nn()->interface(num_layers)->Alpha; *predict_out << IOSFIXED << setprecision(3); for (int j=0; jnum_chains(); j++) { *predict_out << "MODEL " << j+1 << "\n"; curr_chain = Globals::predicting->get_chain(j); int length = curr_chain->num_cols(); netact->assign_chain(curr_chain); netact->activate(); for (int i=0; irecord(lastlayer,i); int psec = ar->highest_prob_output(); *predict_out << curr_chain->guide_sequence_ifd0_char(i) << " " ; alphtup->print_unindex(*predict_out, psec); *predict_out << ' ' << setw(5) << ar->probs()[psec] << '\n'; } } *predict_out << "END\n"; predict_out->close(); delete predict_out; return 1; } static Command PrintPredictionCommand("PrintPrediction", PredPrintPrediction, "takes two arguments:\n\ \t(REQUIRED) the file to which predictions are to be reported.\n\ \t(OPTIONAL) the CASP3 identifier for the predictor.\n\ Prediction data is printed for all alignments that have been added to\n\ the predicting TrainSet. For adding to this set, see the command ReadForPredict.\n\ To clear the predicting TrainSet, see the ClearPrediction command.\n\ "); static int PredPrintRDB(istream &in, Command*self, ostream& logfile) { char filename[500]; bool use_old_format = 1; char separator = get_word(in, filename, '\n',0); if (separator != '\n') { verify_word(in, "correct"); use_old_format =0; } if (!filename[0]) { logfile << "# ERROR: No file specified for printing prediction in RDB format.\n"; return 1; } const TrainSet *print_for = Globals::predicting; if (!print_for && Globals::testing) { logfile << "# No TrainSet for prediction, using testing set\n"; print_for = Globals::testing; } if (!print_for && Globals::cross_training) { logfile << "# No TrainSet for prediction, using cross_training set\n"; print_for = Globals::cross_training; } if (!print_for && Globals::training) { logfile << "# No TrainSet for prediction, using training set\n"; print_for = Globals::training; } if (!print_for) { logfile << "# ERROR: Cannot make Prediction. The predicting TrainSet " << " has not been allocated\n"; return 1; } if (print_for->num_chains() < 1) { logfile << "# ERROR: Cannot make prediction. The TrainSet is empty.\n"; return 1; } if (Globals::nn()==NULL) { logfile << "# ERROR: The neural net has not been defined.\n"; return 1; } Globals::nn()->initialize_net(logfile, Globals::training); NetActivation * netact = Globals::nn()->net_activation(); if (netact==NULL) { logfile << "# ERROR: The neural net has not been initialized. " << "The NetActivation structure is NULL\n"; return 1; } ofstream *predict_out = Filenames::open_output(filename); if (predict_out==NULL) { logfile << "# ERROR: Opening " << filename << " for prediction output.\n"; return 1; } logfile << "# Printing prediction to " << filename << "\n"; const OneChain *curr_chain= print_for->get_chain(0); int num_layers = Globals::nn()->num_layers(); int lastlayer = num_layers-1; *predict_out << "# TARGET " << curr_chain->get_ChainID() << endl; *predict_out<< "# Using neural net " << Globals::nn()->name() <layer(layer); *predict_out << "#\t" << setw(5) << nl->num_wind() << '\t' << setw(5) << nl->num_out(); if (nl->output_interface()->Alpha != NULL) { *predict_out << " (" << *(nl->output_interface()->Alpha) << ")"; int start = nl->output_interface()->TupleStart; int stop = nl->output_interface()->TupleStop; if (start || stop) { *predict_out << " positions " << start << " .. " << stop; } } *predict_out << "\n" << flush; } *predict_out << "# The input amino acid frequencies were determined from\n"; *predict_out << "# alignment " << curr_chain->filename() << "\n" << flush; const InterfaceDescription *input_id =Globals::nn()->interface(0); assert (input_id != NULL); if (input_id->SequenceWeighter != NULL) { *predict_out << "# with weighted counts, using " << input_id->SequenceWeighter->name() << "(" << input_id->SequenceWeightBitsToSave << " bits/column, " << input_id->SequenceWeightParam << ")\n"; if (input_id->WeightingRegularizer != NULL) { *predict_out << "# The weighting was determined by the posterior distribution\n" << "# after regularizing with " << input_id->WeightingRegularizer->name() << ".\n" ; } *predict_out << flush; } else { *predict_out << "# with unweighted counts.\n"; } if (input_id->ReRegularizer != NULL) { *predict_out << "# Counts were regularized to probabilities using\n" << "# " << input_id->ReRegularizer->name() << "\n"; } *predict_out << "# Total sequence weight for alignment was " << curr_chain->total_sequence_weight() << "\n" << flush; *predict_out << "#\n"; const InterfaceDescription* out_ifd = Globals::nn()->interface(num_layers); int out_size = out_ifd->num_units(); *predict_out << (use_old_format? "Pos\tAA" :"Name\tPos\tin"); if (!use_old_format && curr_chain->structure(num_layers)) { *predict_out << "\tcorrect"; } for (int o=0; ounit_name(o); } *predict_out << (use_old_format? "\n10N\t1S" : "\n10S\t10N\t1S"); if (!use_old_format && curr_chain->structure(num_layers)) { *predict_out << "\t1S"; } for (int o=0; onum_chains(); j++) { // *predict_out << "# CHAIN " << j+1 << "\n"; curr_chain = print_for->get_chain(j); int length = curr_chain->num_cols(); netact->assign_chain(curr_chain); netact->activate(); for (int i=0; irecord(lastlayer,i); if (use_old_format) { *predict_out << i+1 <<"\t" << curr_chain->guide_sequence_ifd0_char(i); } else { *predict_out << curr_chain->name() << "\t" << i+1 <<"\t" << curr_chain->guide_sequence_ifd0_char(i); if (curr_chain->structure(num_layers)) { *predict_out << "\t" << out_ifd->unit_name(curr_chain->structure(num_layers,i)); } } for (int o=0; oprobs()[o]; // refuse to print out 0 or 1---truncate at 0.001 0.999 x = x<0.001? 0.001: x>0.999? 0.999: x; *predict_out << "\t" << setw(5) << x; } *predict_out << "\n"; } } predict_out->close(); delete predict_out; return 1; } static Command PrintRDBCommand("PrintRDB", PredPrintRDB, "takes one required argument:\n\ \t the file to which predictions are to be reported\n\ and one optional argument:\n\ \t correct\n\ which indicates that the new RDB format should be used.\n\ Prediction data is printed for all alignments that have been added to\n\ the predicting TrainSet. For adding to this set, see the command ReadForPredict.\n\ To clear the predicting TrainSet, see the ClearPrediction command.\n\ \n\ If there is no predicting TrainSet, the Testing, CrossTraining, or\n\ Training set will be used (in that order of preference).\n\ \n\ If the 'correct' keyword has been added after the filename and the\n\ TrainSet used has training data, it will be output as the\n\ 'correct' column of the RDB file. Furthermore, the 'AA' column will\n\ be renamed the 'in' column and the chain name will be output.\n\ \n\ Original column headers:Pos, AA, letters of alphabet\n\ with 'correct':Name, Pos, in, correct, letters of alphabet\n\ "); static int PredPrintPredictionFasta(istream &in, Command*self, ostream& logfile) { char filename[500]; // char aa_c; int length; const OneChain *curr_chain=NULL; get_word(in, filename, '\n'); if (!filename[0]) { logfile << "# ERROR: No file specified for printing FASTA-formatted prediction."; logfile << endl; return 0; } if (!Globals::predicting) { logfile << "# WARNING: Cannot make prediction. The predicting TrainSet " << " has not been allocated\n"; return 1; } if (Globals::predicting->num_chains() < 1) { logfile << "# WARNING: Cannot make prediction. The predicting TrainSet " << " is empty.\n"; return 1; } if (Globals::nn()==NULL) { logfile << "# ERROR: The neural net has not been defined.\n"; return 0; } Globals::nn()->initialize_net(logfile, Globals::training); NetActivation *netact = Globals::nn()->net_activation(); if (netact==NULL) { logfile << "# ERROR: The neural net has not been initialized. " << "The NetActivation structure is NULL\n"; return 0; } ofstream *predict_out = Filenames::open_output(filename); if (predict_out==NULL) { logfile << "# ERROR: Opening " << filename << " for prediction output.\n"; return 0; } logfile << "# Printing prediction in FASTA format to " << filename << "\n"; int lastlayer = netact->num_layers()-1; AlphabetTuple *alphtup= Globals::nn()->interface(lastlayer+1)->Alpha; for (int j=0; jnum_chains(); j++) { curr_chain = Globals::predicting->get_chain(j); length = curr_chain->num_cols(); netact->assign_chain(curr_chain); netact->activate(); #ifdef PRINT_CHAIN // First print out the amino acid sequence *predict_out << ">" << curr_chain->get_ChainID() << endl; for (int aa_index=0; aa_indexguide_sequence_ifd0_char(aa_index); *predict_out << aa_c; if (aa_index%50==0 && aa_index>0) {*predict_out << endl;} } *predict_out << endl; #endif // and now the predicted structure *predict_out << ">" << curr_chain->get_ChainID() << endl; for (int secondary_index=0; secondary_indexrecord(lastlayer,secondary_index)->highest_prob_output(); alphtup->print_unindex(*predict_out, psec); if (secondary_index%50==0 && secondary_index>0) {*predict_out << endl;} } *predict_out << endl; } predict_out->close(); return 1; } static Command PrintPredictionFastaCommand("PrintPredictionFasta", PredPrintPredictionFasta, "takes one argument, the file to which predictions are to be reported.\n\ Output is the amino acid sequence and secondary structure in FASTA format.\n\ Prediction data is printed for all alignments that have been added to\n\ the predicting TrainSet. For adding to this set, see the command ReadForPredict.\n\ To clear the predicting TrainSet, see the ClearPrediction command.\n\ "); static int PredClearPrediction(istream &in, Command*self, ostream& logfile) { if (!Globals::predicting) { logfile << "# WARNING: Cannot clear prediction data. The predicting TrainSet " << " has not been allocated\n"; return 1; } logfile << "# Clearing " << Globals::predicting->num_chains() << " chain(s) from " << "the predicting TrainSet.\n"; Globals::predicting->clear(); return 1; } static Command ClearPredictionCommand("ClearPrediction", PredClearPrediction, "takes no arguments. Removes all chains that have been added to the\n\ predicting TrainSet. See the command ReadForPredict for adding to this TrainSet.\n\ "); //CHANGE LOG: // 30 March 1998 Kevin Karplus // Created by moving stuff from ReadCommands.cc // Added commands for freezing and unfreezing layers. // 13 April 1998 Kevin Karplus // Improved output to include all interface descriptions. // 3 May 1998 Kevin Karplus // Added ReadParam command // 28 May 1998 Kevin Karplus // Moved output and usage commands incorrectly placed in ReadCommands.cc // 8 June 1998 Kevin Karplus // Modified PredPrintPrediction to use new CASP3 format // 22 June 1998 Kevin Karplus // Improved output of "methods" for CASP3 format. // This required some changes to OneChain and alignment to // propagate names. // 20 July 1998 Kevin Karplus // Added GetOutputFile to share more code for specifying output files. // Added individual chain output options for network quality. // 21 July 1998 Kevin Karplus // Added network initialization for prediction routines. // 15 September 1999 Sugato Basu // Added PredPrintRDB to print predictions in RDB format // 22 November 2001 kevin Karplus // Added PredCenterWeights // 18 August 2003 George Shackelford // Replaced deprecated form(...) function // 30 March 2004 Sol Katzman // Deprecated ReadInput,ReadA2m in favor of ReadForPredict // 21 April 2004 Sol Katzman // Renamed guide_sequence_char to guide_sequence_ifd0_char, since it // returns a char from the Interface 0 guide sequence // Mon Jun 13 04:40:33 PDT 2005 Kevin Karplus // Picked up Jes Frellsen's additions // PredPrint1HotFasta // design1st_alg // PredDesign1st // PredDesign1stDbg // and fixed indenting to match my style. // Mon Jun 13 06:12:47 PDT 2005 Kevin Karplus // Added decaying to learning rate in design1st // Mon Jun 13 09:07:12 PDT 2005 Kevin Karplus // started switch over to using QualityRecord // Mon Jun 13 09:17:49 PDT 2005 Kevin Karplus // using QualityRecord instead of Jes's CorrectPredictedLetters // Mon Jun 13 09:39:30 PDT 2005 Kevin Karplus // eliminated PredPrint1HotFasta // in favor of print_guide_sequence_0 // Mon Jun 13 10:21:22 PDT 2005 Kevin Karplus // Added forward sampling in Design commands. // Mon Jun 13 18:43:37 PDT 2005 Kevin Karplus // Eliminated PredDesign1stDbg // by making PredDesign1st take keyword arguments // Mon Jun 13 20:12:23 PDT 2005 Kevin Karplus // Added comments to top of quality out file for Design1st // Mon Jun 13 21:21:27 PDT 2005 Kevin Karplus // Added probabilistic forward sampling // Tue Jun 14 15:35:08 PDT 2005 Kevin Karplus // Used set_guide() to restore saved guide sequence. // Tue Jun 14 17:11:40 PDT 2005 Kevin Karplus // Added timing to quality report for Design1st // Tue Jun 14 18:36:13 PDT 2005 Kevin Karplus // Modified Design1st to print report the best sequence of each run, // rather than the last sequence. // Wed Jun 15 12:37:12 PDT 2005 Kevin Karplus // Separated out forward_sampling code for greater clarity. // Moved some of the error-checking code up to Design1st. // Wed Jun 15 12:49:13 PDT 2005 Kevin Karplus // Removed netact parameter from forward_sampling and forward_prop_one_design // Wed Jun 15 13:58:26 PDT 2005 Kevin Karplus // Added a second round, that starts with a profile built from the // sequences of the first round. // Fri Jun 17 15:36:36 PDT 2005 Kevin Karplus // Added output of residue recovery to Design1st // Fri Jun 17 15:58:31 PDT 2005 Kevin Karplus // Added print_quality() // 24 June 2005 Kevin Karplus // Added output of input encoding cost as measure of profile progress in // design. // Fri Jun 24 04:36:31 PDT 2005 Kevin Karplus // Fixed timer to use CLOCKS_PER_SEC for better portability // Fri Jun 24 20:44:10 PDT 2005 Kevin Karplus // Replaced Globals::NN with Globals::nn() // Thu Jul 7 04:54:11 PDT 2005 Kevin Karplus // Moved close before test for empty file name in GetOutputFile // Fri Jul 8 10:28:39 PDT 2005 Kevin Karplus // Added expected % sequence recovery to design quality reports. // Sat Jul 9 04:55:32 PDT 2005 Kevin Karplus // Changed Globals::design_chain to Globals::design_chains[0] // Sat Jul 9 08:13:12 PDT 2005 Kevin Karplus // Implemented multiple networks for design. // Sat Jul 9 16:01:50 PDT 2005 Kevin Karplus // Added most-probable input sequence option to design1st sampling // Sun Jul 10 16:55:46 PDT 2005 Kevin Karplus // Moved design routines to new file DesignCommands.cc // Mon Dec 27 14:33:24 PST 2010 Kevin Karplus // Modified PrintRDB to switch to testing, cross-training, or // training if prediction set not specified. // Added name of chain. // Added correct output if TrainSet used has a single correct output. // Mon Dec 27 16:00:22 PST 2010 Kevin Karplus // Fixed off-by-one bug for detecting single correct output // Fri Jan 7 08:26:38 PST 2011 Kevin Karplus // Made new format (with chain name and correct output) optional // PrintRDB, to maintain backward compatibility.