// GribskovReg.cc // Kevin Karplus // 1 November 1994 // Substitution Matrix using Gribskov average score method as a Regularizer #include #include #include // for bzero #include // for log and exp #include "GribskovReg.h" #include "Utilities/abs.h" // for abs // To do: // Make background counts be part of parameters to optimize! static NamedClass* create_GribskovReg(void) {return new GribskovReg;} IdObject GribskovReg::ID("GribskovReg", create_GribskovReg, Regularizer::init_is_a, "a regularizer based on score matrix (a log-probability substituion \nmatrix) using Gribskov's average score method." ); NameToPtr * GribskovReg::CommandTable=0; Regularizer* GribskovReg::copy(void) const { GribskovReg* c=new GribskovReg(alphabet_tuple(),name()); c->set_log_base(log_base()); for(int col=alphabet_size()-1; col>=0; col--) { c->background(col) = background(col); for (int row=alphabet_size()-1; row>=0; row--) c->element(row, col) = element(row,col); } return c; } // allocate and initialize arrays // Background is set to flat distribution // score matrix is set to nearly diagonal matrix // (AlphabetSize-1 on diagonal, -1 off diagonal) void GribskovReg::alloc(void) { delete [] Gribskov; delete [] cache_probs; delete [] Background; int AlphabetSize = alphabet_size(); Gribskov = new float[AlphabetSize*AlphabetSize]; cache_probs = new float[AlphabetSize]; Background = new float[AlphabetSize]; float share = 1./AlphabetSize; for (int i=AlphabetSize-1; i>=0; i--) { Background[i] = share; float *row = Gribskov+i*AlphabetSize; for (int j=AlphabetSize-1; j>=0; j--) row[j] = -1; row[i] = AlphabetSize-1; } } static int ReadLogBase(istream &in, Regularizer *change, RegInputCommand* self) { double LogBase; in >> LogBase; dynamic_cast(change)->set_log_base(LogBase); return 1; } static int ReadBase(istream &in, Regularizer *change, RegInputCommand* self) { double base_for_log; in >> base_for_log; dynamic_cast(change)->set_log_base(log(base_for_log)); return 1; } static int ReadScores(istream &in, Regularizer *change, RegInputCommand* self) { GribskovReg *g =dynamic_cast(change); g->alloc_if_needed(); const int *order = g->input_order(); double LogBase = g->log_base(); for (int i=0; ialphabet_size(); i++) { for (int j=0; jalphabet_size(); j++) { double d; in >> d; if (order) g->element(order[i], order[j]) = d*LogBase; else g->element( i, j ) = d*LogBase; } } g->normalize(); return 1; } static int ReadBackground(istream &in, Regularizer *change, RegInputCommand* self) { GribskovReg *g =dynamic_cast(change); g->alloc_if_needed(); const int *order = g->input_order(); for (int i=0; ialphabet_size(); i++) { double d; in >> d; g->background(order? order[i]: i) = d; } g->normalize(); return 1; } void GribskovReg::init_command_table() { assert(!CommandTable); CommandTable = new NameToPtr(7); CommandTable->ignore_case(); CommandTable->AddName(new RegInputCommand("Scores", ReadScores)); CommandTable->AddName(new RegInputCommand("Background", ReadBackground)); CommandTable->AddName(new RegInputCommand("LogBase", ReadLogBase)); CommandTable->AddName(new RegInputCommand("Base", ReadBase)); } void GribskovReg::write_knowing_type(ostream &out) const { Regularizer::write_knowing_type(out); out << "Base = " << exp(log_base()) << "\n"; out << "Background = \n" ; int i; for (i=0; i=0; i--) sum_counts += TrainCounts[i]; if (sum_counts<=0.) { for (i=alphabet_size()-1; i>=0; i--) probs[i] = Background[i]; return; } // do matrix mult for (i=alphabet_size()-1; i>=0; i--) probs[i] = 0.; for (int j=alphabet_size()-1; j>=0; j--) { if (TrainCounts[j]<=0.) continue; for (i=alphabet_size()-1; i>=0; i--) probs[i] += element(i,j) *TrainCounts[j]; } // get average score, expontentiate, and multiply by background for (i=alphabet_size()-1; i>=0; i--) cache_probs[i] = probs[i] = exp(probs[i]/sum_counts) *Background[i]; } void GribskovReg::partials1(float *part1, int i, const float* Counts) { bzero(part1, num_parameters()*sizeof(float)); if (sum_counts<=0.) return; for (register int j=alphabet_size()-1; j>=0; j--) { part1[alphabet_size()*i+j] = cache_probs[i]*Counts[j]/sum_counts; } } void GribskovReg::partials2(float *part1, float* part2, int i, const float* Counts) { bzero(part1, num_parameters()*sizeof(float)); bzero(part2, num_parameters()*sizeof(float)); if (sum_counts<=0.) return; for (register int j=alphabet_size()-1; j>=0; j--) { register float tmp=Counts[j]/sum_counts; part1[alphabet_size()*i+j] = cache_probs[i]*tmp; part2[alphabet_size()*i+j] = cache_probs[i]*tmp*tmp; } } // CHANGE LOG: // 18 Dec 1995 Kevin Karplus // Changed output of LogBase to output of Base (2.71828, instead // of 1.0 for natural logarithm), both accepted on input. // Flipped meaning of log_base, so that it is ln(base) used for // logarithm. // Added Background probabilities and modified GribskovReg definition // so that ln p(i|j)/p(i) is optimal for sample size 1. // Moved alloc() from .h to .cc file, and added initialization. // 1 Feb 1996 Kevin Karplus // Fixed bug in copy---forgot to copy background. // 15 March 2004 Kevin Karplus // Fixed old-style casts // control information for gnuemacs to simplify compilation // Local Variables: // compile-command: "rsh apache '(cd dna/src/Regularizer; make GribskovReg.o; ar ruv ../../lib/libultimate.a GribskovReg.o; ranlib ../../lib/libultimate.a)'" // End: