// Substitution Matrix [plus Pseudocounts] [plus scaled counts] // as a Regularizer #ifndef SUBSTPSEUDOReg_H #define SUBSTPSEUDOReg_H #include "Regularizer.h" #include class SubstPseudoReg : public Regularizer { int FreezeCols; // set if optimization should only adjust // sums of columns and not whole matrix int FreezePseudocounts; // set if pseudocounts should not be adjusted float *SumCols; // sums of the columns // (plus one if AddScaledCounts and in // the substitution matrix) float *subst; // non-square matrix (alphabet_size * alphabet_size+1) // posterior_counts = subst * cat(counts,1) // that is, last column used as pseudocounts int AddPseudocounts; // if set, use pseudocounts int AddScaledCounts; // if set, posterior_counts = subst * cat(counts,1) + |counts|counts double SumCount; // cache of sum of training counts // computed in get_modified_counts // used there and in partials1 static IdObject ID; static NameToPtr* CommandTable; void write_knowing_type(ostream &out) const; void init_command_table(void); NameToPtr *command_table(void) {return CommandTable;} int row_size_for_matrix(void) const {return alphabet_size()+1;} int row_size_for_parameters(void) const {return alphabet_size()+ ((AddPseudocounts && !FreezePseudocounts)? 1: 0);} void clear(void); // set deletable pointers to 0 public: SubstPseudoReg(void) {clear();} SubstPseudoReg(const Alphabet *a, istream &in, const char *nm) : Regularizer(a,nm) { clear(); alloc(); read_knowing_type(in); } SubstPseudoReg(const Alphabet *a, const char *nm=0) : Regularizer(a,nm) { clear(); alloc(); } SubstPseudoReg(const AlphabetTuple *a, const char *nm=0) : Regularizer(a,nm) { clear(); alloc(); } Regularizer* copy(void) const; void delete_all(void) // delete { delete [] subst; delete [] SumCols; } ~SubstPseudoReg() {delete_all();} void alloc(void); // alloc arrays as needed static IdObject* classID(void) {return &ID;} virtual IdObject* type(void) const {return &ID;} int num_columns(void) const {return row_size_for_parameters();} // translate a word for the variant of substitution matrix desired // (subst_only, pseudocounts, scaled_counts, pseudocounts+scaled_counts) // into the correct setting of use_scaled_counts and use_pseudocounts int translate_option(const char*word); int use_scaled_counts(void) const {return AddScaledCounts;} void use_scaled_counts(int i) { if (SumCols && AddScaledCounts!=i) { float ChangeToSum = i? 1 : -1; for (int col=alphabet_size()-1; col>=0; col--) SumCols[col] += ChangeToSum; } AddScaledCounts=i; } int use_pseudocounts(void) const {return AddPseudocounts;} void use_pseudocounts(int i) {AddPseudocounts=i;} void freeze_columns(void) {FreezeCols=1;} void unfreeze_columns(void) {FreezeCols=0;} void freeze_pseudocounts(void) {FreezePseudocounts=1;} void unfreeze_pseudocounts(void) {FreezePseudocounts=0;} void print_info(ostream &out) const { Regularizer::print_info(out); out << "Subst matrix" << (AddPseudocounts? "+pseudocounts" : "") << (AddScaledCounts? "+scaled counts: " : ": ") ; } void get_modified_counts( const float* TrainCounts, // what you use as counts float* probs); // you fill this in with probs. // unique to SubstPseudoReg: // column alphabet_size() used for pseudocounts float element(int row, int col) const { return subst[row_size_for_matrix()*row+col]; } void set_element(int row, int col , float val); // Smallest legal element for each position in matrix. float min_element(int row, int col) const; float sum_col(int col) const {return SumCols[col];} void normalize(void); protected: float& element_ref(int i, int j) const { return subst[row_size_for_matrix()*i+j]; } int num_parameters(void) const { return FreezeCols? row_size_for_parameters() : (alphabet_size()* row_size_for_parameters()); } float min_parameter(int i) const; float parameter(int i) const; void set_parameter(int i, float p); void partials1(float *part1, int i, const float* Counts); void compute_sum_cols(int col); }; // CHANGE LOG: // 15 Mar 1995 Kevin Karplus // Fixed normalization bug. // 26 May 1995 Kevin Karplus // Added freeze_columns() feature, and moved parameter routines // to .cc file // Eliminated reference access to element(i,j)---now need to use // set_element(i,j,value) // 31 May 195 Kevin Karplus // Added freeze_pseudocounts #endif