// NetActivation.cc // copyright 6 August 1997 Kevin Karplus #include #include // for M_LN2 #include "Alphabet/Alphabet.h" #include "AlphabetTuple/AlphabetTuple.h" #include "Regularizer/BackgroundProbs.h" #include "NetActivation.h" #include "ActivationRecord.h" #include "OneChain.h" #include "InterfaceDescription.h" #include "LearningParam.h" #include "Globals.h" //SUGATO : 7/1/99 #include "PredictMix.h" NetActivation::NetActivation(const NeuralNet *n, int init_length) { Net=n; Chain=0; Records = 0; Dummies = 0; Inputs = 0; ReAlloc(init_length); } void NetActivation::DeleteAll(void) { if (!Records) return; Length=AllocLength; for (int lay=Net->num_layers()-1; lay>=0; lay--) { for (int j=layer_length(lay)-1; j>=0; j--) delete Records[lay][j]; delete [] Records[lay]; delete Dummies[lay]; delete [] Inputs[lay]; } delete [] Records; delete [] Dummies; delete [] Inputs; Records=0; AllocLength = Length = 0; } void NetActivation::ReAlloc(int new_length) { DeleteAll(); NumLayers = Net->num_layers(); typedef ActivationRecord* arp; typedef arp* arpp; Dummies = new arp[NumLayers]; Records = new arpp[NumLayers]; Length=AllocLength=new_length; for (int lay=NumLayers-1; lay>=0; lay--) { Dummies[lay] = new ActivationRecord(Net->layer(lay)); Records[lay] = new arp[layer_length(lay)]; for (int j=layer_length(lay)-1; j>=0; j--) { Records[lay][j] = new ActivationRecord(Net->layer(lay)); Records[lay][j]->tell_position(j); } } // Set up the Inputs array of arrays of arrays. // Each layer has length padded_layer_length(lay-1). Inputs = new const float** [NumLayers]; for (int iolay=0; iolaynum_cols(); if (chainlength > AllocLength) {ReAlloc(chainlength);} Chain = newchain; Length = chainlength; // Fill in each layer with pointers to the correct arrays in // the input chain or the records array. for (int iolay=0; iolayprobs_for_layer(0,-1) : Dummies[iolay-1]->probs(); int num_dummies = (size-len)/2; int i; for (i=0; i=len+num_dummies; i--) Inputs[iolay][i] = dummy; // now fill in the real positions for (i=0; iprobs_for_layer(0,i) : Records[iolay-1][i]->probs(); } } ActivationRecord* NetActivation::record(int lay, int pos) const { assert(lay>=0); int subscr= pos + Net->overhang(lay); if (subscr<0 || subscr>=layer_length(lay)) return Dummies[lay]; return Records[lay][subscr]; } void NetActivation::activate() { assert(NumLayers == Net->num_layers()); int lay; // counter for layers // int w; // counter for elements of window typedef const float* cfloatp; // activate one layer at a time, starting at primary inputs for(lay=0; layoverhang(lay); // activate one record at each position in the layer, // passing each such record its own window of inputs // The first input window set is // Inputs[lay][0..windowsize-1] const float ** window = Inputs[lay]; for (int j=-overhang; jcompute_probs(window++); } Dummies[lay]->compute_dummy_probs(); } } void NetActivation::test_outputs() { // go backwards through layers for(int lay=NumLayers-1; lay>=0; lay--) { int overhang=Net->overhang(lay); const InterfaceDescription* ifd=Net->interface(lay+1); //const AlphabetTuple *A = ifd->Alpha; if (ifd->is_TrainTo() && !ifd->is_hidden() && Chain->structure(lay+1)==NULL && Chain->probs_for_layer(lay+1)==NULL) { // Chain has no training data, so set outputs to background cerr << "DEBUG: chain has no training data\n" << flush; const BackgroundProbs* NullProbs= Globals::background_probs(ifd->Alpha,ZeroIfNew); if (NullProbs ==NULL) { cerr << "ERROR: need BackgroundProbs for " << ifd->Alpha->name() << " for chain with no training data\n"; assert(NullProbs); } const float * null_probs = NullProbs->probs(); for (int j=-overhang; jcost(null_probs); } cerr << "DEBUG: Cost is " << record(lay,0)->cost() << endl << flush; } else if (ifd->train_to_unique() && !ifd->is_hidden()) { assert(overhang==0); for (int j=-overhang; jNetRegularizer) { //int correct_out = Chain->correct_value(lay+1, j); //const float * mix = record(lay,j)->probs(); //const float * in_counts = Chain->counts_for_layer(0,j); // BAD CODE: The routine for computing cost_mix_output // for a single correct output has to be added to // PredictMix.cc // double c = cost_mix_output(mix,ifd->NetRegularizer,in_counts,correct_out); // record(lay,j)->set_cost(c*M_LN2); // cost_mix_output returns the cost in bits, which has // to be converted to nats // record(lay,j)->set_outsum(1); } else { int correct_out = Chain->correct_value(lay+1, j); // If the correct output is less than zero, it signifies // that at that structure position, there's a gap rather // than one correct output value.If there's a gap column, // we want to skip it rather than training or testing with // it. If there's a non-gap column, then we want to // continue training. if (correct_out >= 0) { record(lay,j)->cost(correct_out); } } } } else if (ifd->is_TrainTo() && !ifd->is_hidden()) { for(int j=-overhang; jNetRegularizer) { // cout << "lay: " << lay << endl << flush; const float * out_probs = Chain->probs_for_layer(lay+1,j); const float * in_counts = Chain->counts_for_layer(0,j); const float * mix = record(lay,j)->probs(); //cerr << "Mixture Probs:" << endl << flush; //for(int i=0; ilayer->num_out(); i++) // cerr << i << ": " << mix[i] << endl << flush; //cerr << "Input Counts:" << endl << flush; //for(int i=0; ilayer->input_interface()->num_units(); i++) // cerr << i << ": " << in_counts[i] << endl << flush; //cerr << "Output Probs:" << endl << flush; //for(int i=0; ilayer->input_interface()->num_units(); i++) // cerr << i << ": " << out_probs[i] << endl << flush; double sum=0; for(int i=0; ilayer()->input_interface()->num_units(); i++) sum+=out_probs[i]; double c = cost_mix_output(mix,ifd->NetRegularizer,in_counts,out_probs); record(lay,j)->set_cost(c*M_LN2); // cost_mix_output returns the cost in bits, which has // to be converted to nats record(lay,j)->set_outsum(sum); //cerr << "Cost is :" << c << endl << endl << flush; //cerr << "OutSum is :" << sum << endl << endl << flush; } else { const float * correct_probs = Chain->probs_for_layer(lay+1,j); record(lay,j)->cost(correct_probs); //cerr << "Output Counts:" << endl << flush; //for(int i=0; ilayer->output_interface()->num_units(); i++) //cerr << correct_probs[i] << endl << flush; //cerr << "Cost is:" << record(lay,j)->cost() << endl << flush; } } } } } void NetActivation::back_propagate(double weight) { const AlphabetTuple *A; int correct_out; int some_layer_trained=0; //cerr << Chain->name() << " weight= " << weight << "\n" << flush; // go backwards through layers for(int lay=NumLayers-1; lay>=0; lay--) { int overhang=Net->overhang(lay); const InterfaceDescription* ifd=Net->interface(lay+1); A = ifd->Alpha; if (ifd->train_to_unique() && !ifd->is_hidden()) { assert(overhang==0); int prev_correct_out, next_correct_out; prev_correct_out=correct_out=next_correct_out= Chain->correct_value(lay+1, -overhang); for (int j=-overhang; jcorrect_value(lay+1, j+1); } double multiplier = weight * ((correct_out==next_correct_out && correct_out==prev_correct_out)? 1.0 : Net->learning_params()->NetActChangeCorrectWeight) * (record(lay,j)->most_out_weight()==0? Net->learning_params()->NetActWrongWeight: 1.0); if (correct_out >= 0) { // not a gap position, train normally record(lay,j)->set_share_from_entropy(correct_out,multiplier); } else { // GAP in training data, propagate nothing back record(lay,j)->clear_partials(); } } some_layer_trained=1; } else if (ifd->is_TrainTo() && !ifd->is_hidden()) { for (int j=-overhang; jNetRegularizer) { double * partials = new double[ifd->NetRegularizer->num_components()]; const float * mix = record(lay,j)->probs(); const float * in_counts = Chain->counts_for_layer(0,j); const float * out_probs = Chain->probs_for_layer(lay+1,j); partials_of_cost_wrt_mix(partials,mix,ifd->NetRegularizer, in_counts,out_probs); record(lay,j)->set_share_from_partials(partials); delete [] partials; } else { const float * correct_probs = Chain->probs_for_layer(lay+1,j); record(lay,j)->set_share_from_entropy(correct_probs,1); } } some_layer_trained=1; } else if (some_layer_trained) { // propagate from next layer // clear the partials for this layer for (int pos=-overhang; posclear_partials(); // for each record in the next layer, // add to the partials for the inputs for this layer int next_overhang=Net->overhang(lay+1); int next_wind=Net->layer(lay+1)->num_wind(); for (int next_pos= -next_overhang; next_pos=0; w--) { int pos = next_pos+w-(next_wind-1)/2; record(lay,pos)-> add_partials_from_next(next_rec, w); } } // now use the partials that were computed to set the shares. for (int pos=-overhang; posset_share_from_partials(); } } } // Added by Jes Frellsen // For an already back-propagated set of activations records, // calculate the partial derivatives with respect to the inputs void NetActivation::add_InputPartials(double **partials) const { // This layer is -1 int next_lay = 0; const InterfaceDescription* next_ifd = Net->interface(next_lay); const NeuralLayer* next_layer = Net->layer(next_lay); int num_units = next_ifd->num_units(); assert(num_units == next_layer->num_in()); // Lookup overhang and windowsize for nex layer int next_overhang=Net->overhang(next_lay); int next_wind=Net->layer(next_lay)->num_wind(); // For each record in the next layer, // add to the partials for the inputs for this layer for(int next_pos = -next_overhang; next_pos < Length+next_overhang; next_pos++) { ActivationRecord* next_rec=record(next_lay, next_pos); // Add to the partials that the window covers in this layer for(int w=next_wind-1; w>=0; w--) { int pos = w + next_pos-(next_wind-1)/2; // Don't use any overhang to partials if( 0 <= pos && pos < Length) { // Set the partials for input in the column int num_out = next_layer->num_out(); for (int o=num_out-1; o>=0; o--) { double in_share = next_layer->gain(o) * next_rec->exp_share(o); assert(isfinite(in_share)); for (int i=num_out-1; i>=0; i--) { partials[pos][i] += in_share * next_layer->weight(i,w,o); assert(isfinite(partials[pos][i])); } } } } } } short int *NetActivation::psec(int layer) const { short int *str = new short int[Length]; for (int i=0; ihighest_prob_output(); } /* str[Length]=0; */ return str; } //Change Log // 21 March 1998 Kevin Karplus // Restored allocation to original intent (I hope), // and clarified documentation in .h files. // 30 April 1998 Kevin Karplus // Added NetActChangeCorrectWeight // Moved ActWrongWeight to NetActWrongWeight // 5 May 1998 Melissa Cline // Added functionality in back_propogation to skip over any columns // in which there's no correct answer - in which the correct output // is a gap character. // 11 May 1998 Melissa Cline // Debugged functionality to skip over any columns in which // there's no correct answer. // 28 July 1998 Kevin Karplus // Added weight to back_propagate, removed cost computation from // back_propagate. // 15 September 1999 Sugato Basu // Added code to compute cost for ALIGNMENTS output format // 15 September 1999 Sugato Basu // Added code to compute cost when NetRegularizer is used to predict // mixture components // 12 October 2001 Kevin Karplus // Added missing "some_layer_trained" in back_propagate for // TrainTo but not train_to_unique. // 20 Apr 2004 Sol Katzman // Make ActivationRecord data member Layer private,uppercase; add public layer() function. // Mon Jun 13 03:31:29 PDT 2005 Kevin Karplus // Picked up Jes Frellsen's additions of // calc_InputPartials and CorrectPredictedLetters // Modified indenting style to match my code. // Mon Jun 13 05:37:58 PDT 2005 Kevin Karplus // Added isfinite assertions to calc_InputPartials // Mon Jun 13 09:19:11 PDT 2005 Kevin Karplus // Eliminated CorrectPredictedLetters (using QualityRecords now) // Sat Jul 9 07:39:49 PDT 2005 Kevin Karplus // Split calc_InputPartials into clear_InputPartials and // add_InputPartials // Fri Jul 22 14:41:29 PDT 2005 Kevin Karplus // Added handling of chain without training data to test_outputs // (assumes background probabilities desired)