// TrainSet.h // 21 July 1997 // Kevin Karplus #ifndef TrainSet_H #define TrainSet_H #include #include #include #include #include "NeuralNet.h" // Forward References: class OneChain; class alignment; class DirichletReg; class TrainSet { int NumColumns; int NumChains, AllocChains; OneChain **Chains; NeuralNet *NN; // neural net that this is set up for // used to determine number of layers // and size of accumulate_counts layers float **accumulate_counts; int *training_order; public: TrainSet(void); ~TrainSet(void); void clear(void); void shuffle_training_order(void) const; // The following functions are associated with accumulating counts of // the correct output values void initialize_accumulation(NeuralNet *nn); inline void add_count(int layer, int output, float prob=1) { assert (layer>=0); if (output >= 0) accumulate_counts[layer][output]+= prob; } inline int layer_needs_counts(int layer) const { return accumulate_counts[layer]!=NULL; } inline double sum_counts(int layer) const { double sum=0; assert(layer>=0 && layernum_layers()); assert(accumulate_counts[layer] != NULL); for (int i=0; ilayer(layer)->num_out(); ++i) sum += accumulate_counts[layer][i]; return sum; } inline float output_count(int layer, int op) const { float count=0; assert(layer>=0 && layernum_layers()); assert(accumulate_counts[layer] != NULL); assert(op>=0 && oplayer(layer)->num_out()); count = accumulate_counts[layer][op]; return count; } // return average cost (in nats/position) of outputs, given null model // that matches observed distribution of outputs. inline double null_cost(int layer) const { if (! layer_needs_counts(layer)) return 0.; double sum=sum_counts(layer); if (sum<=0.) return 0.; double ent=0; for (int i=0; ilayer(layer)->num_out(); ++i) { double cnt = accumulate_counts[layer][i]; if (cnt>0) ent -= cnt * log( cnt/sum); } return ent/sum; } inline int num_chains(void) const {return NumChains;} inline int num_cols(void) const {return NumColumns;} void ReAlloc(int new_num_chains); void add_chain(OneChain *chain); const OneChain *get_chain(int k) const; const OneChain *get_shuffled_chain(int k) const; }; // CHANGE LOG: // 8 Jan 2001 Kevin Karplus // Added null_cost() and made sum_counts() return double. // 9 Oct 2001 Kevin Karplus // Added test for output<0 (wildcard) in add_count // 24 May 2004 Kevin Karplus // inlined simple functions #endif