// ActivationRecord.cc // copyright Kevin Karplus // 28 July 1997 #include #define __USE_ISOC99 1 // for isfinite() in math.h #include #include #include "Regularizer/BackgroundProbs.h" #include "ActivationRecord.h" #include "LearningParam.h" #include "NeuralNet.h" #include "Globals.h" ActivationRecord::ActivationRecord(const NeuralLayer *lay) { int size = lay->num_out(); Layer = lay; In=0; Probs = new float[size]; Sums = new double[size]; ExpSums = new double[size]; Partials = new double[size]; Share = new double[size]; clear(); } void ActivationRecord::clear(void) { int size = Layer->num_out(); for (int i=0; inum_wind(); int numout = Layer->num_out(); int numin = Layer->num_in(); In = inputs; // save for the record, usable as in(),in(w),in(w,i) assert(in() != NULL); int o; // counter for outputs for (o=numout-1; o>=0; o--) { double sum = 0; for (int w=numwind-1; w>=0; w--) { assert(in(w) != NULL); for (int i=numin-1; i>=0; i--) { assert(-999 < in(w,i)); assert (in(w,i) < 999); sum += Layer->weight(i,w,o) * in(w,i); assert (-999999 < sum); assert (sum < 99999); } } sum *= Layer->gain(o); sum += Layer->bias(o); assert (-999999 < sum); assert (sum < 99999); Sums[o] = static_cast(sum); } double SumExpSums=0.0; for (o=numout-1; o>=0; o--) { ExpSums[o] = exp(Sums[o]) + Layer->pseudo(o); #ifdef DEBUG assert(isfinite(ExpSums[o])); #endif SumExpSums += ExpSums[o]; } InvSumExpSums = 1.0/SumExpSums; for (o=numout-1; o>=0; o--) Probs[o] = static_cast( ExpSums[o] *InvSumExpSums); } int ActivationRecord::highest_prob_output(void) const { int best=0; float highest_prob=Probs[best]; int numout = Layer->num_out(); for (int i=numout-1; i>0; --i) { if (Probs[i]>highest_prob) { best = i; highest_prob=Probs[best]; } } return best; } // compute the activation of the network for all-zero inputs void ActivationRecord::compute_dummy_probs(void) { int o; // counter for outputs int numout = Layer->num_out(); for (o=numout-1; o>=0; o--) Sums[o] = static_cast (Layer->bias(o)); double SumExpSums=0; for (o=numout-1; o>=0; o--) { ExpSums[o] = exp(Sums[o]) + Layer->pseudo(o); #ifdef DEBUG assert(isfinite(ExpSums[o])); #endif SumExpSums += ExpSums[o]; } InvSumExpSums = 1.0/SumExpSums; for (o=numout-1; o>=0; o--) Probs[o] = static_cast ( ExpSums[o] *InvSumExpSums); } // set Cost to the encoding cost of the counts in out_weights double ActivationRecord::cost(const float *out_weights) { Cost=0; NullCost=0; OutSum=0; MostOutWeight=0; MostProbable=0; PhatMostProbable = Probs[MostProbable]; const InterfaceDescription* ifd = Layer->output_interface(); const BackgroundProbs* NullProbs= Globals::background_probs(ifd->Alpha); int numout = Layer->num_out(); for (int o=numout-1; o>=0; o--) { if (Probs[o] > PhatMostProbable) { MostProbable=o; PhatMostProbable = Probs[MostProbable]; } if (out_weights[o]) { NullCost -= out_weights[o] * log((*NullProbs)[o]); Cost -= out_weights[o] * log(Probs[o]); OutSum += out_weights[o]; } } MostOutWeight = out_weights[MostProbable]; return Cost; } // compute the cost of encoding the counts in out_weights // with respect to the probs, and return the value double ActivationRecord::identity_cost(const float * probs, const float * out_weights, int num_out) const { double cost = 0; for(int o=0; ooutput_interface(); const BackgroundProbs* NullProbs= Globals::background_probs(ifd->Alpha); NullCost = - log((*NullProbs)[correct_out]); } PhatMostProbable = Probs[MostProbable]; int numout = Layer->num_out(); for (int o=numout-1; o>=0; o--) { if (Probs[o] > PhatMostProbable) { MostOutWeight = 0; MostProbable=o; PhatMostProbable = Probs[MostProbable]; } } return Cost; } // compute the cost of encoding the single count at correct_out // with respect to the probs, and return the value double ActivationRecord::identity_cost(const float * probs, int correct_out) const { return correct_out<0? 0: -log(probs[correct_out]); } // Set to Share to the partial derivatives of the error // function with respect to Pseudo[o] // from partial derivatives of the error function // with respect to the outputs. void ActivationRecord::set_share_from_partials(void) { double sum_weighted_partials=0.; int o; // counter for outputs int numout = Layer->num_out(); for (o=numout-1; o>=0; o--) sum_weighted_partials += Probs[o] * Partials[o]; for (o=numout-1; o>=0; o--) Share[o] = Probs[o] * Partials[o] /ExpSums[o] - sum_weighted_partials*InvSumExpSums; } // set Partials and Share // to the partial derivatives of the encoding cost of the counts in // out_weights with respect to outputs and Pseudo[i]. void ActivationRecord::set_share_from_entropy(const float *out_weights, double multiplier) { double sum_weight=0; // sum of out_weights int o; // counter for outputs int numout = Layer->num_out(); for (o=numout-1; o>=0; o--) { sum_weight += out_weights[o]; } for (o=numout-1; o>=0; o--) { Share[o] = multiplier* (sum_weight*InvSumExpSums - out_weights[o]/ExpSums[o]); Partials[o] = -multiplier* out_weights[o] / Probs[o]; } } // set Partials and Share to the partial derivatives of the encoding // cost of the single count at correct_out with respect to outputs and // Pseudo[i] void ActivationRecord::set_share_from_entropy(int correct_out, double multiplier) { assert(correct_out >=0 ); int numout = Layer->num_out(); for (int o=numout-1; o>=0; o--) { Share[o] = multiplier*InvSumExpSums; Partials[o] = 0.0; } Share[correct_out] -= multiplier/ExpSums[correct_out]; double p = Probs[correct_out]; Partials[correct_out] = p>0? -multiplier/p: 99999.0; } // Given next (one of the fanouts of this) that has already had its // Partials and Share computed, add to the partial derivatives of // the error function with respect to the outputs. // // w tells which of the input sets of next this connects to. // // Note: do clear_partials before accumulating from fanouts. // When all contributions from the fanouts have been gathered, do // set_share_from_partials. void ActivationRecord::add_partials_from_next(const ActivationRecord *next, const int w) { assert(next!=NULL); assert(next->Layer!=NULL); const NeuralLayer *nl = next->Layer; assert(nl->num_in() == Layer->num_out()); // the following kluge is an attempt to // get neural nets to concentrate the correction on the // relevant outputs, rather than spreading it through all // outputs. This is unlikely to work well when some // output has too low a probability (it will learn only slowly), // but should help when several of the outputs are essentially // OR'ed at later levels of the net--only the relevant one gets // much correction, not all of them. // kluge=0 gives standard behavior, kluge=1 makes back-propagated // signal proportional to probability. double kluge=nl->owner()->learning_params()->ActPartialKluge; if (kluge==0) { for (int o=nl->num_out()-1; o>=0; o--) { double in_share= nl->gain(o) * next->exp_share(o); for (int i=Layer->num_out()-1; i>=0; i--) Partials[i] += in_share * nl->weight(i,w,o); } } else { for (int o=nl->num_out()-1; o>=0; o--) { double in_share= nl->gain(o) * next->exp_share(o); for (int i=Layer->num_out()-1; i>=0; i--) Partials[i] += in_share * nl->weight(i,w,o) * ::pow(Probs[i], kluge); } } } // CHANGE LOG: // 23 April 1998 Kevin Karplus // moved learnign parameters into LearningParam // 24 April 1998 Kevin Karplus // added kluge to add_partials_from_next // 30 April 1998 Kevin Karplus // added multiplier to set_share_from_entropy routines // 25 July 1998 Kevin Karplus // added MostProbable and PhatMostProbable // 15 September 1999 Sugato Basu // Cost changed to the difference of the actual encoding cost // and the optimal encoding cost, reported in nats // 15 September 1999 Sugato Basu // Added functions to compute IdentityCost for single correct // output or output probability vector, where IdentityCost is // the cost (in nats) of true outputs given inputs to first // layer of net, defined only if no. of outputs of the layer // = no. of inputs of first layer // 9 October 2001 Kevin Karplus // Added tests for correct_out<0 (wild-card on correct out) // 10 October 2001 Kevin Karplus // Fixed definition of cost and identity_cost for vector output, // to be just encoding cost, not difference in encoding cost. // 20 Apr 2004 Sol Katzman // Make ActivationRecord data member Layer private,uppercase; add public layer() function. // Tried to improve documentation and make some code more uniform without functional change. // 25 May 2004 Sol Katzman // Try to improve performance of add_partials_from_next. // 26 May 2004 Kevin Karplus // Added isfinite assertions, protected by ifdef DEBUG // Sat Jun 18 21:47:41 PDT 2005 Kevin Karplus // Added computation of NullCost to cost() routines. // Globals *must* have BackgroundProbs for alphabet.