// BackgroundProbs.h // Fri Jun 17 04:33:40 PDT 2005 Kevin Karplus #ifndef BackgroundProbs_H #define BackgroundProbs_H #include "NamedClass/NamedClass.h" #include "AlphabetTuple/AlphabetTuple.h" #include #include class BackgroundProbs; // forward declaration class Regularizer; // class for keywords that can be used in BackgroundProbs input class BackgroundInputCommand: public NamedObject { typedef int (*fcn)(istream &in, BackgroundProbs *change, BackgroundInputCommand* self); fcn CommandFunction; // function to execute when keyword found. // Reading from "in" into "change". // Pass this down to function as 3rd arg, // so it can report error using self->name(). // Return 1 if input should continue, 0 if error or end of input. public: BackgroundInputCommand(const char *nm, fcn c=NULL, const char *hlp=NULL) : NamedObject(nm,hlp) { CommandFunction=c; } int execute(istream &in, BackgroundProbs *change) { return (*CommandFunction)(in, change, this); } }; int ReadComment(istream &in, BackgroundProbs *change, BackgroundInputCommand* self); // function for BackgroundInputCommand that treats keyword as a comment // and skips to end of line. // abstract class for providing background probability distributions class BackgroundProbs : public NamedClass, public NamedObject { AlphabetTuple *alph; static IdObject ID; static NameToPtr *CommandTable; int *InputOrder; // order of symbols for input (from read_order) float *Probs; // array of probabilities static NamedClass *create_BackgroundProbs(void); protected: virtual void write_knowing_type(ostream &out) const; int read_knowing_type(istream &in); virtual NameToPtr * command_table(void) {return CommandTable;} virtual void init_command_table(void); public: BackgroundProbs(void) {alph=NULL; InputOrder=NULL; Probs=NULL;} BackgroundProbs(const Alphabet *a, const char* nm=NULL, const char *h=NULL) : NamedObject(nm,h) { alph=new AlphabetTuple(a); InputOrder=NULL; Probs = new float[alph->num_normal()]; } BackgroundProbs(const AlphabetTuple *a, const char* nm=NULL, const char *h=NULL) : NamedObject(nm,h) { alph=new AlphabetTuple(*a); InputOrder=NULL; Probs = new float[alph->num_normal()]; } // constructor from regularizer // (regularize all-zero count vector to get background prob) BackgroundProbs(Regularizer *r, const char* nm=NULL, const char *h=NULL); virtual ~BackgroundProbs(void) { delete [] InputOrder; delete [] Probs; delete alph; }; virtual IdObject* type(void) const {return &BackgroundProbs::ID;} static IdObject* classID(void) {return &BackgroundProbs::ID;} inline const AlphabetTuple* alphabet_tuple() const {return alph;} inline int alphabet_size() const {return alph->num_normal();} // Note: this doesn't copy the tuple, but the BackgroundProbs now owns it. // set_alphabet_tuple should only be called with // set_alphabet_tuple(new AlphabetTuple ...) inline void set_alphabet_tuple(AlphabetTuple* a) { if (InputOrder) { cerr << "Can't change alphabet after InputOrder set\n" << flush; return; } delete alph; alph= a; delete Probs; Probs = new float[alphabet_size()]; } inline void set_alphabet(const Alphabet *a=Alphabet::ret_default()) { set_alphabet_tuple(new AlphabetTuple(a)); } void print_order(ostream&out) const; // print the order of the alphabet // (used in several "print" routines) // Note: this is NOT (at the moment) the same as the InputOrder. inline const int *input_order(void) const {return InputOrder;} void read_order(istream& in); // read an order for the alphabet and put it in InputOrder virtual void print_info(ostream& out) const { out << type()->name() << " " << name(); } // print some sort of useful message about your distrubution, // like the name and parameters. This will probably be used // to identify what distribution was used in a given experiment // so make it useful. static BackgroundProbs* read_new(istream& in, IdObject* required_type=&BackgroundProbs::ID); static BackgroundProbs* read_new(const char* filename, IdObject* required_type=&BackgroundProbs::ID); // make sure the probabilities sum to 1 inline void normalize(void) { double sum=0; for (int i=alphabet_size()-1; i>=0; i--) { assert(Probs[i]>=0); sum+=Probs[i]; } assert(sum>0); for (int i=alphabet_size()-1; i>=0; i--) { Probs[i] /= sum; } } // access to the probabilities inline const float* probs(void) const {return Probs;} inline float* probs(void) {return Probs;} inline float operator[](int i) const {return Probs[i];} inline float& operator[](int i) {return Probs[i];} double encoding_cost(const float *counts) const; // return cost of encoding an ordered sequence of characters, // given the counts of each character type in bits, inline double entropy(void) const { return encoding_cost(Probs); } }; //CHANGE LOG: // Fri Jun 17 04:33:53 PDT 2005 Kevin Karplus // BackgroundProbs header file derived by simplifying Regularizer // header file. // Fri Jun 17 10:13:09 PDT 2005 Kevin Karplus // Added constructor from Regularizer, // converted Probs array to float from double. // Sat Jun 18 20:34:12 PDT 2005 Kevin Karplus // Added Probs reallocation in set_alphabet_tuple // Sat Jun 18 21:36:31 PDT 2005 Kevin Karplus // Added [] operator. // Sat Jun 18 23:10:47 PDT 2005 Kevin Karplus // Added entropy() member function #endif