// ActivationRecord.h // copyright 28 July 1997 Kevin Karplus #ifndef ActivationRecord_H #define ActivationRecord_H #include "NeuralLayer.h" class ActivationRecord { // In a NetActivation, there is one ActivationRecord for each // column of each layer. The number of columns of a layer equals // the overhangs for the layer plus the number of columns of // the OneChain being activated. This is called the layer length. // // Each activation record has the vector of output probabilities // for the given layer and column. When it computes these probabilities // it uses a window of inputs, which is a (small) subset of the // layer length of the previous layer. // For a more complete description of the architecture of a layer, // including the calculation of the output vector, see NeuralLayer.h const float **In; // what are the inputs (not owned by this) float *Probs; // normalized output probabilities // owned by this const NeuralLayer * Layer; // activation record for which layer? int MyPosition; // position in the range [0, layer length) double *Sums; // sum of gain*weight*in + bias for each output double *ExpSums; // exp(Sums)+Pseudo double InvSumExpSums; // 1.0 / sum of ExpSums // The following parameters are set when the cost of desired outputs // is computed. double NullCost; // encoding cost in nats of outputs, // given BackgroundProbs: sum out_weights[o] * log(BackgroundProbs[o]) double Cost; // encoding cost in nats of outputs, // given Probs: sum out_weights[o] * log(Probs[o]) double OutSum; // sum of weights of all outputs int MostProbable; // which output has highest predicted probability double PhatMostProbable; // predicted probability of MostProbable double MostOutWeight; // weight in output vector for most probable // single character // The following are used for backpropagation. double *Partials; // partials of error function with respect to Probs[o] double *Share; // partials of error function with respect to Pseudo[o] void clear(void); // clear previous uses, but leave allocated public: ActivationRecord(const NeuralLayer *lay); ~ActivationRecord(void); inline void tell_position(int pos) {MyPosition = pos;} inline int position(void) const {return MyPosition;} inline const NeuralLayer * layer(void) const {return Layer;} inline const float **in(void) const {return In;} inline const float * in(int wind) const {return In[wind];} inline const float in(int wind, int input) const {return In[wind][input];} // apply the layer to a set of inputs // caching probs, sums, and exp(sums)+pseudo for later use. void compute_probs(const float** inputs); // what would layer return if all inputs were 0? void compute_dummy_probs(void); int highest_prob_output(void) const; // return cached arrays. inline const float* probs(void) const {return Probs;} inline const double* sums(void) const {return Sums;} inline const double* exp_sums(void) const {return ExpSums;} // partials is the partial derivatives of the error function w.r.t. // Probs[o]---it needs to be set by set_share_from_entropy // or set_partials or add_partials_from_next inline const double* partials(void) const {return Partials;} // share is the partial derivatives of the error function with respect // to Pseudo[o] inline const double* share(void) const {return Share;} // exp_share(o) is the partial derivative of the error function // with respect to Sums[o] (as long as Pseudo hasn't been updated yet) inline const double exp_share(int o) const { return Share[o] * (ExpSums[o] - Layer->pseudo(o)); } inline void clear_cost(void) { Cost=0; NullCost=0; OutSum=0; MostOutWeight=0; MostProbable=0; PhatMostProbable = 0; } inline void clear_partials(void) { for (int o=Layer->num_out()-1; o>=0; o--) Share[o] = Partials[o] = 0; } // warning: need to set_share_from_partials after this inline void set_partials(const double *parts) { for (int o=Layer->num_out()-1; o>=0; o--) Partials[o] = parts[o]; } // The encoding cost in nats of the desired output // (set by either cost function that has an argument) inline double cost(void) const { return Cost; } inline double null_cost(void) const { return NullCost; } inline double out_sum(void) const { return OutSum; } inline double most_out_weight(void) const { return MostOutWeight;} inline int most_probable(void) const {return MostProbable;} inline double phat_most_probable(void) const {return PhatMostProbable;} // set Cost to the encoding cost of the counts in out_weights // (also set OutSum and MostOutWeight and NullCost) double cost(const float *out_weights); // compute the cost of encoding the counts in out_weights // with respect to the probs, and return the value double identity_cost(const float * probs, const float * out_weights, int num_out) const; // set Cost to the encoding cost of the single count at correct_out // (also set OutSum and MostOutWeight and NullCost) double cost(int correct_out); // compute the cost of encoding the single count at correct_out // with respect to the probs, and return the value double identity_cost(const float * probs, int correct_out) const; // set Cost to precomputed cost c inline void set_cost(double c){Cost = c;} // set OutSum to precomputed value sum inline void set_outsum(double sum){OutSum = sum;} // set Partials and Share // to the partial derivatives of Cost (times the multiplier) // with respect to outputs and Pseudo[i] void set_share_from_entropy(const float *out_weights, double multiplier=1.0); void set_share_from_entropy(int correct_out, double multiplier=1.0); // set Partials and Share // to the partial derivatives of // for non-output layers, // Given next (one of the fanouts of this) that has already had its // Partials and Share computed, add to the partial derivatives of // the error function with respect to the outputs. // // w tells which of the input sets of next this connects to. // // Note: do clear_partials before accumulating from fanouts. // When all contributions from the fanouts have been gathered, // do set_share_from_partials. void add_partials_from_next(const ActivationRecord *next, const int w); // Set to Share to the partial derivatives of the error // function with respect to Pseudo[o] // from already set partial derivatives of the error function // with respect to the outputs. void set_share_from_partials(void); inline void set_share_from_partials(const double *parts) { set_partials(parts); set_share_from_partials(); } }; #endif // CHANGE LOG: // 25 July 1998 Kevin Karplus // added MostProbable and PhatMostProbable // 20 Apr 2004 Sol Katzman // Make ActivationRecord data member Layer private,uppercase; add public layer() function. // inline a few existing functions without functional change. // 24 May 2004 Sol Katzman // inline numerous simple functions // 25 May 2004 Kevin Karplus // inlined remaining simple functions