// Substitution Matrix method as a Regularizer // Gribskov method (averaging log scores) #ifndef GribskovReg_H #define GribskovReg_H #include "Regularizer.h" #include class GribskovReg : public Regularizer { float *Gribskov; // square matrix: score(i,j) float *Background; // assumed background probability p(i) //caches computed by get_modified_counts // and used in partials1 and partials2 float *cache_probs; // cache of probs double sum_counts; // cache of |Counts| double LogBase; // note: LogBase is the natural logarithm of the base used // for the scores in the files. // common ones are 1.0 (scores in file already natural logs) // M_LN2 (scores in file are in bits) // M_LN2/2 (scores are in half bits) // M_LN10 (scores in file are log base 10) // M_LN10/10 (scores in file are 10 log base 10) static IdObject ID; static NameToPtr* CommandTable; void write_knowing_type(ostream &out) const; void init_command_table(void); NameToPtr *command_table(void) {return CommandTable;} public: GribskovReg(void) : Regularizer() { Gribskov=0; cache_probs=0; Background=0; LogBase=1.0; } GribskovReg(const Alphabet *a, istream &in, const char *nm, double l_base=1.0) : Regularizer(a, nm) { Gribskov=0; cache_probs=0; Background=0; LogBase = l_base; alloc(); read_knowing_type(in); } GribskovReg(const Alphabet *a, const char* nm=0) : Regularizer(a, nm) { Gribskov=0; cache_probs=0; Background=0; LogBase = 1.0; alloc(); } GribskovReg(const AlphabetTuple *a, const char* nm=0) : Regularizer(a, nm) { Gribskov=0; cache_probs=0; Background=0; LogBase = 1.0; alloc(); } Regularizer *copy(void) const; ~GribskovReg() { delete [] Gribskov; delete [] cache_probs; delete [] Background; } static IdObject* classID(void) {return &ID;} virtual IdObject* type(void) const {return &ID;} void set_log_base(double b) {LogBase = b;} double log_base(void) const { return LogBase;} void alloc_if_needed(void) { if (!Gribskov) alloc(); } // allocate and initialize arrays // Background is set to flat distribution // score matrix is set to nearly diagonal matrix // (AlphabetSize-1 on diagonal, -1 off diagonal) void alloc(void) ; void get_modified_counts( const float* TrainCounts, // what you use as counts float* probs); // you fill this in with probs. // unique to GribskovReg: float &element(int i, int j) {return Gribskov[alphabet_size()*i+j];} float element(int i, int j) const {return Gribskov[alphabet_size()*i+j];} float &background(int i) {return Background[i];} float background(int i) const {return Background[i];} void normalize(void) // make matrix sum to 0. background sum to 1 { double sum=0., sum_back=0; int i; for (i=alphabet_size()-1; i>=0; i--) { sum_back+= Background[i]; for (register int j=alphabet_size()-1; j>=0; j--) sum += element(i,j); } for (i=alphabet_size()-1; i>=0; i--) Background[i] /= sum_back; sum/= alphabet_size()*alphabet_size(); for (i=num_parameters()-1; i>=0; i--) Gribskov[i] -=sum; } protected: int zero_second_deriv(void) const {return 0;} int num_parameters(void) const {return alphabet_size()*alphabet_size();} int use_log(void) const {return 0;} // don't use log(param), since parameters can be negative float parameter(int i) const {return Gribskov[i];} void set_parameter(int i, float p) {Gribskov[i] = p;} float min_parameter(int i) const {return -1.e4;} float max_parameter(int i) const {return 1.e4;} void partials1(float *part1, int i, const float* Counts); void partials2(float *part1, float *part2, int i, const float* Counts); }; //CHANGE LOG: // 30 March 2004 Kevin Karplus // Fixed some parameters that shadowed members. #endif