// NeuralNet.cc
// Kevin Karplus and Christian Barrett
// 14 Nov. 1997

#include <assert.h>
#include <math.h>

#include "Utilities/IOSmacros.h"
#include "Utilities/Random.h"
#include "Regularizer/BackgroundProbs.h"

#include "NeuralNet.h"
#include "NeuralLayer.h"
#include "ActivationRecord.h"
#include "InterfaceDescription.h"
#include "TrainSet.h"
#include "NetActivation.h"
#include "NetQuality.h"
#include "OneChain.h"
#include "LearningParam.h"

#include "Input/Input.h"
#include "EqualStrings/EqualStrings.h"

// information for the IdObject of NamedClass NeuralNet
static NamedClass *create_neural_net(void) {return new NeuralNet;}

IdObject NeuralNet::ID("NeuralNet",create_neural_net, 0,
"NeuralNet is a arbitrary layer neural network.\n");

NameToPtr* NeuralNet::CommandTable = 0;


NeuralNet::NeuralNet(void)
{   NumLayers=AllocLayers=0;
    layer_counter=ifd_counter=0;
    MyNetActivation=NULL;
    Layers=NULL;
    Interfaces=NULL;
    EpochCounter=0;
    LearningParams = 0;
    set_learning_params( new LearningParam);
}

// Copy everything EXCEPT MyNetActivation
NeuralNet::NeuralNet(const NeuralNet *old)
{   NumLayers=AllocLayers=0;
    MyNetActivation=NULL;
    Layers=NULL;
    Interfaces=NULL;
    Alloc(old->NumLayers);
    
    layer_counter=old->layer_counter;
    ifd_counter= old->ifd_counter;
    EpochCounter=old->EpochCounter;
    CenterWeight= old->CenterWeight;
    RangeWeight= old->RangeWeight;

    LearningParams = new LearningParam;
    (*LearningParams) = (* (old->LearningParams));
    
    NumLayers=  old->NumLayers;
    
    for (int lay=0; lay<layer_counter; lay++)
    {   Layers[lay] = new NeuralLayer (old->Layers[lay]);
    }
    for (int i=0; i<ifd_counter; i++)
    {	Interfaces[i] = new InterfaceDescription(old->Interfaces[i]);
    }
}

void NeuralNet::Alloc(int num)
{ 
  AllocLayers = num;
  assert(AllocLayers >0);
  
  typedef NeuralLayer* nlp;
  Layers = new nlp [AllocLayers];
  
  typedef InterfaceDescription *nid;
  Interfaces = new nid [AllocLayers+1];
  
  typedef unsigned int ui;
  
  for (int i=0; i<AllocLayers; i++) 
  {	Layers[i] =  NULL;
	Interfaces[i] =  NULL;     
  }
  Interfaces[AllocLayers] = NULL;     
}

NeuralNet::~NeuralNet(void) 
{   Dealloc();
    delete LearningParams;
}


void NeuralNet::Dealloc(void)
{
    // delete ActivationRecord first, since it relies on
    // the network for size information
    delete MyNetActivation;	MyNetActivation=0;
    
    for (int i=0; i<AllocLayers; i++) 
    {	delete Layers[i];
	delete Interfaces[i];
    }
    delete Interfaces[AllocLayers];        // Since there are AllocLayers+1 of these

    delete []Layers;		Layers=0;
    delete []Interfaces;	Interfaces=0;
  
    AllocLayers=0;
    ifd_counter=layer_counter=0;
}

// copy weights, bias, gain, and pseudo from "from"
void NeuralNet::copy_weights_from(const NeuralNet* from)
{
    assert (NumLayers==from->NumLayers);
    for (int lay=0; lay<NumLayers; lay++)
    {   NeuralLayer *layer = Layers[lay];
	const NeuralLayer *old = from->Layers[lay];
	assert (layer->num_in() == old->num_in());
	assert (layer->num_wind() == old->num_wind());
	assert (layer->num_out() == old->num_out());
	layer->copy_weights(old);
	for (int o=layer->num_out()-1; o>=0; o--)
	{   layer->set_bias(o, old->bias(o));
	    layer->set_pseudo(o, old->pseudo(o));
	    layer->set_gain(o, old->gain(o));
	}
    }
}


// Remove MyNetActivation, so that network can be safely
// resized (say, changing the overhangs).
// Need to call initialize_net() again after resizing
void NeuralNet::remove_activation(void)
{   delete MyNetActivation;
    MyNetActivation=NULL;
}


// Initializes any of the network parameters that *have not* been set
// These are currently the weights, pseudo, bias, and gain values
void NeuralNet::initialize_net(ostream &logfile, const TrainSet *training)
{
    NeuralLayer *curr_lay;
    // int windsize, numin;

    // Reallocate and define the NetActivation.  
    delete MyNetActivation;
    MyNetActivation  = new NetActivation(this);

    assert(MyNetActivation != NULL);

    // For each layer, if they aren't set, initialize weights, bias,
    // gain and pseudo
    for (int i=NumLayers-1; i>=0; --i)
    {	curr_lay=Layers[i];

	curr_lay->initialize_weights(logfile);
	curr_lay->initialize_pseudo(logfile,training);
	curr_lay->initialize_bias(logfile);
	curr_lay->initialize_gain(logfile);
        const InterfaceDescription* ifd=curr_lay->output_interface();
	
	if (ifd->Alpha && ! Globals::background_probs(ifd->Alpha,ZeroIfNew))
	{   BackgroundProbs *bp=new BackgroundProbs(ifd->Alpha);
	    cerr << "WARNING: BackgroundProbs for " << ifd->Alpha->name()
	    	<< " not read in before initializing network.\n";
	    const TrainSet* use_for_background=NULL;
	    if (Globals::training && Globals::training->num_cols()>0)
	    {	use_for_background= Globals::training;
	    	cerr << "Setting BackgroundProbs for " << ifd->Alpha->name()
			<< " based on training set\n";
	    }
	    else if (Globals::cross_training && Globals::cross_training->num_cols()>0)
	    {	use_for_background= Globals::cross_training;
	    	cerr << "Setting BackgroundProbs for " << ifd->Alpha->name()
			<< " based on cross-training set\n";
	    }
	    else if (Globals::testing && Globals::testing->num_cols()>0)
	    {	use_for_background= Globals::testing;
	    	cerr << "Setting BackgroundProbs for " << ifd->Alpha->name()
			<< " based on testing set\n";
	    }
	    
	    if (use_for_background)
	    {	for (int out=0; out<curr_lay->num_out(); ++out)
		{   double cnt = use_for_background->output_count(i,out);
		    (*bp)[out] = cnt;
		}
		bp->normalize();
		bp->set_name(ifd->Alpha->name());
		Globals::BackgroundProbsByName.AddName(bp);
	    }
	}
    }
    logfile << "# Network initialization done\n";
}

// This is the function that starts the show.  Since I don't know what
// to use for convergence/stop criterion, the net just cycles through
// num_iterations estimation cycles.
void NeuralNet::learning_loop(
	int num_iterations, 
	TrainSet *training, 
	ofstream *report_training, 
	ofstream *report_training_indiv, 
	
	TrainSet *cross_training, 
	ofstream *report_cross_training,
	ofstream *report_cross_training_indiv,
	
	ofstream *report_unit_usage,
	ofstream *report_Q_vs_Phat
	)
{
    int lay;	// frequently used counter for layers.
    
    NetQuality *TrainNQ=0;
    NetQuality *NewNQ = new NetQuality(this);
    NetQuality *OldNQ = new NetQuality(this);
    
    if (!cross_training || cross_training->num_cols()==0) cross_training=training;
    assert(training != NULL || (num_iterations==0 && cross_training!=NULL));

    assert(layer_counter==NumLayers);
    if (EpochCounter==0 && num_iterations>0)
    {	
	int distance_to_training=0;	// how far to next layer that
					// has output to train to?
	for(lay=num_layers()-1; lay>=0; lay--)
	{   distance_to_training = is_layer_hidden(lay)?
			distance_to_training+1: 0;
	    Layers[lay] -> initialize_learning_rates(training->num_chains(),
			distance_to_training);
	}
    }

    // Initialize, and see where we have to improve from
    for (lay=NumLayers-1; lay>=0; lay--)
    {	Layers[lay]->normalize();
	// Layers[lay]->center_weights();
	Layers[lay]->center_biases();
    }
    
    
    if (report_cross_training 
    	&& cross_training && cross_training->num_cols()>0 
	&& num_iterations>0)
    {	NewNQ->print_data_header(*report_cross_training);    
    }

    test(cross_training, NewNQ, 
		report_cross_training, report_cross_training_indiv);
    if (report_unit_usage)
    {  NewNQ->print_unit_usage(* report_unit_usage,EpochCounter);
    }
    if (report_Q_vs_Phat)
    {  NewNQ->print_Q_vs_Phat(* report_Q_vs_Phat,EpochCounter);
    }

    if (num_iterations<=0)
    {	delete NewNQ;
	delete OldNQ;
	return;
    }
    
    NeuralNet *BestNN=new NeuralNet(this);
    NetQuality *BestNQ=new NetQuality(NewNQ);
    double BestObjective =NewNQ->objective(NumLayers-1);
    int best_last_epoch = EpochCounter;
    
    if (training && training->num_cols()>0)
    {	TrainNQ = training==cross_training? new NetQuality(NewNQ)
    					: new NetQuality(this); 
	if (report_training)
	{   TrainNQ->print_data_header(*report_training);
	    (*report_training) << flush;
	}
    }

    bool do_shuffle=1;
    double PrevObjective=BestObjective;
    for (int i=1; i<=num_iterations;  ++i)
    {
	EpochCounter++;
	cerr << "Epoch: " << EpochCounter << "  " << flush;

	// Save the current point, in case we decide NOT
	// to accept the training epoch.
	NeuralNet *PrevNN = new NeuralNet(this);
	
	train(training, TrainNQ, 
		report_training, report_training_indiv, do_shuffle);
	for (lay=NumLayers-1; lay>=0; lay--)
	{   Layers[lay]->normalize();
	    Layers[lay]->center_weights();
	    Layers[lay]->center_biases();
	}

	// Cost changed to be reported as bits instead of nats
	if (TrainNQ != NULL)
	{   float cost_in_bits =  TrainNQ->record(NumLayers-1)->encoding_cost() 
	                                                       * M_LOG2E;
	    cerr << cost_in_bits;
	}

	NetQuality *switch_tmp = OldNQ;
	OldNQ = NewNQ;
	NewNQ = switch_tmp;

	test(cross_training, NewNQ, 
		report_cross_training, report_cross_training_indiv);
	if (report_unit_usage)
	{  NewNQ->print_unit_usage(* report_unit_usage, EpochCounter);
	}
	if (report_Q_vs_Phat)
	{  NewNQ->print_Q_vs_Phat(* report_Q_vs_Phat, EpochCounter);
	}
        update_rates(OldNQ, NewNQ);

	double Objective = NewNQ->objective(NumLayers-1);
        cerr << " cross_bits=" 
	     << NewNQ->record(NumLayers-1)->encoding_cost() * M_LOG2E
	     << " objective=" << Objective;
	if (Objective>BestObjective)
	{   // save the new best
	    delete BestNQ;
	    BestNQ=new NetQuality(NewNQ);
	    delete BestNN;
	    BestNN=new NeuralNet(this);
	    best_last_epoch = EpochCounter;
	    BestObjective=Objective;
	    // shuffle 10% of time, if new best
	    do_shuffle= (drandom() <= 0.10); 
	    cerr << " new best"
		 << (do_shuffle? ", reshuffling": "");
	}
	else if (EpochCounter-best_last_epoch >= learning_params()->NetResetAfter
		|| i==num_iterations)
	{   // reset from best so far.
	    best_last_epoch = EpochCounter;
	    delete NewNQ;
	    NewNQ = new NetQuality(BestNQ);
	    copy_weights_from(BestNN);
	    if (report_cross_training)
	    {   NewNQ->print_data(*report_cross_training);
		(*report_cross_training) << flush;
	    }
	    do_shuffle=1;
	    Objective=BestObjective;
	    cerr << " reset to old best (" << Objective << "), reshuffling";
	}
	else if (Objective - PrevObjective <
		learning_params()->NetTemperature *log(drandom()))
	{   // Reject this change and go back to previous network.
	    // That is if change is negative, accept with probability
	    //		exp(change/temperature).
	    delete NewNQ;    NewNQ = new NetQuality(OldNQ);
	    copy_weights_from(PrevNN);
	    Objective = PrevObjective;
	    do_shuffle = 1;
	    cerr << " worse network rejected, reshuffling";
	}
	else 
	{    do_shuffle =  (Objective <= PrevObjective)? 1 :
		(drandom() <= 0.4);	// shuffle 40% of time if better
	    cerr << (Objective>PrevObjective? " improvement" : " worse network") 
	    	 << " accepted" 
		 << (do_shuffle? ", reshuffling": "");
	}
	cerr << "\n" << flush;

	CenterWeight *= LearningParams->NetCenterDecay;
	RangeWeight *= LearningParams->NetRangeDecay;
	
	PrevObjective = Objective;
	delete PrevNN;
    }

    delete OldNQ;
    delete NewNQ;
    delete BestNQ;
    delete BestNN;
    delete TrainNQ;
}

void NeuralNet::test_or_train(bool do_training,
	const TrainSet *test_set, 
	NetQuality *nq,
	ofstream* report_summary,
	ofstream* report_individually,
	bool shuffle
	)
{	
    assert(nq!=NULL);
    // Note: individual chain reporting and SOV computations are
    // currently done only for the final output layer.
    // They should probably be done for all layers that have training data.
    
    
    for (int lay=NumLayers-1; lay>=0; --lay) 
    {	// Make certain the quality records are zero'd out
	nq->record(lay)->clear();
	
//	// reset null cost
//	nq->record(lay)->set_average_null_cost(test_set->null_cost(lay));        
	
	if (do_training)
        {   // reset weight and gain so all gains are 1.
	    Layers[lay]->normalize();
	}
    }
    
    if (do_training && shuffle)
	    test_set->shuffle_training_order();


    QualityRecord last_qr(layer(NumLayers-1));
	// last_qr is quality record for last layer of current chain.
	// It is used for setting chain weight and for indiviudal chain
	// reporting.
//    last_qr.set_average_null_cost(test_set->null_cost(NumLayers-1));
    if (report_individually)
    {    (*report_individually) << "# " << name() 
		<< " (" << degrees_freedom() << " degrees of freedom)"
		<< "\n"
	        << "# chainID          ";
	    last_qr.print_header(*report_individually);
    }

    int numchains = test_set->num_chains();
    for (int chain_ind=0; chain_ind<numchains; chain_ind++)
    {
        const OneChain* curr_chain = do_training?
		test_set->get_shuffled_chain(chain_ind):
		test_set->get_chain(chain_ind);

        MyNetActivation->assign_chain(curr_chain);
        MyNetActivation->activate();
	MyNetActivation->test_outputs();

	if (report_individually || do_training)
	    last_qr.clear();
//	last_qr.set_average_null_cost(test_set->null_cost(NumLayers-1));
	for (int la=NumLayers-1; la>=0; --la)
	{   QualityRecord *qualrecord= nq->record(la);

	    qualrecord->compile_unit_usage_data(MyNetActivation, curr_chain);
	    int num_actrecords = MyNetActivation->layer_length(la);
	    for (int n=0; n<num_actrecords; ++n)
	    {   qualrecord->add(MyNetActivation->record(la,n));
	    }
	    if(interface(la+1)->train_to_unique())
	    {	const short int *osec = curr_chain->osec(la);
		short int *psec = MyNetActivation->psec(la);
		qualrecord->addSOV(curr_chain->num_cols(), osec, psec);
		if (report_individually || do_training)
		{   
		    for (int n=0; n<num_actrecords; ++n)
		    {    
			last_qr.add(MyNetActivation->record(la,n));
		    }
		    last_qr.addSOV(curr_chain->num_cols(), osec, psec);
		}
		delete [] psec;
	    }
	}

	if (report_individually)
	{    (*report_individually) << IOSLEFT << std::setw(20)
		<< curr_chain->name() << IOSRIGHT
		<< " " << last_qr << flush;
	}
	
	if (do_training)
	{   MyNetActivation->back_propagate(
		pow(1.0-last_qr.q(), LearningParams->NetFractionWrongExponent)
		);
	    reest_weights(MyNetActivation);
	}
    }
    if (report_summary)
    {	nq->print_data(*report_summary);
	(*report_summary) << flush;
    }
}


// Changes the learning rates of the net based on the its improvement,
// or lack of, in quality.
void NeuralNet::update_rates(const NetQuality *old_nq, 
			     const NetQuality *new_nq)
{
    double old_cost, new_cost;
    double old_rms, new_rms;
    double old_rmsMDes2, new_rmsMDes2;

    // These are output layer values that are used to effect changes in
    // all of the layers.  It will have to be reworked if more than one
    // output layer exists in the network.
    int op = NumLayers-1;
    old_cost = old_nq->encoding_cost(op);
    new_cost = new_nq->encoding_cost(op);
    
    for (int k=NumLayers-1; k>=0; --k)
    {	
	old_rms = old_nq->rms_sum(k);
	new_rms = new_nq->rms_sum(k);
	old_rmsMDes2 = old_nq->rms_sum2_minus_des(k);
	new_rmsMDes2 = new_nq->rms_sum2_minus_des(k);

	NeuralLayer *l=Layers[k];
	if (l->is_frozen()) 
		continue;
	l->change_WeightRate(old_cost, new_cost);
	l->change_BiasRate(old_cost, new_cost, old_rms, new_rms);
	l->change_GainRate(old_cost, new_cost, 
				      old_rmsMDes2, new_rmsMDes2);
	l->change_PseudoRate(old_cost, new_cost);

	l->normalize();
    }
}


NeuralLayer* NeuralNet::next_TrainTo_layer(int start) const
{
   assert(start>=0 && start <NumLayers);
   for (int i=start; i<NumLayers; ++i) {
      if (Interfaces[i+1]->is_TrainTo())
	 return Layers[i];
   }
   return NULL;
}

// for a NetActivation record that has already had both
// activate and back_propagate done, update the weights in the network.
void NeuralNet::reest_weights(const NetActivation *app)
{
  int k, m;
  int num_units;
  NeuralLayer *nl;
  ActivationRecord *curr_record;
  int numlayers=app->num_layers();
  
  assert(numlayers == this->NumLayers);

  // Update weights, pseudo, gain, and bias parameters in the neural
  // net, beginning with the output layer
  for (k=numlayers-1; k>=0; --k) 
  {
     nl=Layers[k];
     num_units=app->layer_length(k);
     for (m=num_units-1; m>=0; --m)
     { 
        curr_record=app->record(k,m);
	nl->update_weights(curr_record);
	nl->update_pseudo(curr_record);
	nl->update_gain(curr_record);
	nl->update_bias(curr_record);
     }
  }
}


// Reads the NeuralNet basic structure, then assumes that the individual
// layers' NeuralLayer and InterfaceDescription definitions follow.  
// These are also read in and added to the structure.
// As a side effect, this function also creates the global NetActivation
// structure 
NeuralNet* NeuralNet::read_new(istream& in) 
{
  NeuralNet *retnet;
  // NamedClass *q;
  // int input_error=0;
  // int assign_error=0;
  int layers_in_net;
  int lay;

  NamedClass *p=NamedClass::read_new(in);
  if (!p) return 0;

  if (!p->is_a(NeuralNet::classID()))
  {
    cerr << "Error while attempting to read a NeuralNet.\n "
         << "Found " << p->type()->name() << "instead.\n";
    return 0;
  }
  retnet = dynamic_cast<NeuralNet *>(p);
  layers_in_net = retnet->NumLayers;

    // This routine now checks for consistency between the
    // specified layers and interfaces. 

    if (retnet->layer_counter != retnet->NumLayers)
    {    cerr << "Error: inconsistent number of layers "
		<<retnet->layer_counter << " and " << retnet->NumLayers
		<< " reading " << retnet->name()
		<< endl;
	delete retnet;
	return 0;	 
    }
    if (retnet->ifd_counter != retnet->NumLayers+1)
    {    cerr << "Error: inconsistent number of interfaces ("
		<<retnet->ifd_counter 
		<< ") and layers (" << retnet->NumLayers
		<< ") reading " << retnet->name()
		<< endl;
	delete retnet;
	return 0;	 
    }
    
    for (lay=0; lay<retnet->NumLayers; lay++)
    {   
	NeuralLayer *layer = retnet->Layers[lay];
	InterfaceDescription *before = retnet->Interfaces[lay];
	InterfaceDescription *after = retnet->Interfaces[lay+1];
	if (layer->num_in() != before->num_units())
	{   cerr << "Error: inconsistent number of units "
		<< " at input to layer " << lay
		<< " (" << before->num_units() << " and "
		<< layer->num_in() << ")\n";
	    delete retnet;
	    return 0;
	}
	if (layer->num_out() != after->num_units())
	{   cerr << "Error: inconsistent number of units "
		<< " at output to layer " << lay
		<< " (" << layer->num_out() << " and "
		<< after->num_units() << ")\n";
	    delete retnet;
	    return 0;
	}
    }
    
    return retnet;
}

// Returns 1 on error, 0 otherwise
int NeuralNet::add_interface(InterfaceDescription *descr)
{
   assert (ifd_counter <= AllocLayers+1);
   if (!Interfaces) 
   {  
      cerr << "Error adding " << descr->type() << " to\n"
           << type() << name()
           << ".  Must first allocate for member \"Interfaces\".\n";
      return 1;
   }
   Interfaces[ifd_counter++] = descr;

   return 0;
}


// Returns 1 on error, 0 otherwise
int NeuralNet::add_layer(NeuralLayer *layer)
{
   assert (layer_counter < AllocLayers);
   if (!Layers) 
   {  
      cerr << "Error adding " << layer->type() << " to\n"
           << type() << name()
           << ".  Must first allocate for member \"Layers\".\n";
      return 1;
   }
   layer->set_owner(this);
   layer->set_layer_number(layer_counter);
   Layers[layer_counter++] = layer;
   return 0;
}


// General purpose function for reading integer values for NeuralNet
// member variables
int ReadIntParam(istream &in, int &param, NeuralNet *change,
		  NNInputCommand* self)
{
   if (change->Layers)
   {  cerr << "ERROR: Can't change " << self->name()
	   << "   after allocation has been done---\n"
	   << "   specify NumLayers fist\n";
      return 1;
   }
   int tmp;
   in >> tmp;
   if (tmp <= 0) 
   {  cerr << "ERROR: must have " << self->name() << " >0\n";
      return 0;
   }
   param=tmp;
   return 1;
}


int ReadComment(istream &in, 
		NeuralNet *change,
		NNInputCommand* self)
{   
    SkipSeparators(in, 1, '\n');
    return 1;
}

int ReadName(istream &in, NeuralNet *chg, NNInputCommand *self)
{  assert(chg!=NULL);
   char word[500];
   get_word(in, word);
   chg->set_name(word);
   return 1;
}

int ReadNumLayers(istream &in, NeuralNet *chg, NNInputCommand *self)
{  assert(chg!=NULL);
  if (ReadIntParam(in, chg->NumLayers, chg, self)) 
  {   assert(chg->NumLayers>0);
      chg->Alloc(chg->NumLayers);
      return 1;
  }
  else {return 0;}
  
}

int ReadLayerOrInterface(istream &in, NeuralNet *chg, NNInputCommand *self)
{   assert(chg!=NULL);
    NamedClass *q = NamedClass::read_new_after_classname(in);
    if (q->is_a(InterfaceDescription::classID()))
        return  ! chg->add_interface(dynamic_cast<InterfaceDescription *> (q));
    else if (q->is_a(NeuralLayer::classID()))
        return ! chg->add_layer(dynamic_cast<NeuralLayer *>(q));

    cerr << "Unrecognized object in NeuralNet: "
    		<< q->type()->name() << "\n";
    return 0;
  
}

int VerifyClassName(istream &in, NeuralNet *change,
		    NNInputCommand *self)
{  assert(change!=NULL);
   char word[100];
   get_word(in, word);
   const IdObject *end_id = IdObject::id(word);
   if (end_id != change->type()) 
   { cerr << "Warning: " << self->name() << word << " doesn't match "
          << change->type()->name() << endl << flush;
   }
   // continue if "ClassName", stop if "EndClassName"
   return EqualStrings(self->name(), "ClassName", 1);
}

void NeuralNet::init_command_table()
{   assert(!CommandTable);
    CommandTable = new NameToPtr(10);
    CommandTable->ignore_case();
    CommandTable->AddName(new NNInputCommand("Name", ReadName)); 
    CommandTable->AddName(new NNInputCommand("Comment", ReadComment));
    CommandTable->AddName(new NNInputCommand("NumLayers", ReadNumLayers)); 
    CommandTable->AddName(new NNInputCommand("ClassName", ReadLayerOrInterface));
    CommandTable->AddName(new NNInputCommand("EndClassName", VerifyClassName));
}


int NeuralNet::read_knowing_type(istream &in)
{
  if (! command_table()) {init_command_table();}

  char word[300];
  while (in.good()) 
  { get_word(in, word, '=');
    NNInputCommand *comm = dynamic_cast<NNInputCommand *>
	      (command_table()->FindOldName(word, ZeroIfNew));
    if (comm) 
    { if (!comm->execute(in, this)) return 1;
    }
    else 
    { cerr << "Unrecognized keyword: " << word
	   << " for type " << type()->name()
	   << " " << name() << endl;
    }
  }  
  return 0;
}


void NeuralNet::write_knowing_type(ostream &out) const
{
      out
	<< "Name = " << name() << endl
	<< "NumLayers = " << NumLayers << endl
	<< endl
	;
    int numlayers = num_layers();
    for (int i=0; i<numlayers; ++i) 
    {	interface(i)->write(out);
	out << endl;
	layer(i)->write(out);
	out << endl;
    }
    interface(numlayers)->write(out);
    out << endl;
}

// Using the usage statistics in NQ, adjust the biases of each layer
// and the weights of the next layer to get approximately equal usage of
// each hidden unit.
// Do not adjust the biases for layers with training data for the
// outputs, nor for the layer before a frozen layer.

void NeuralNet::equilibrate(NetQuality *NQ)
{
    for (int i=0; i<NumLayers; i++)
    {   NeuralLayer *lay = layer(i);
        if (! lay->output_interface()->is_hidden()
    		|| layer(i+1)->is_frozen())
	    continue;	// skip layers that can't be equilibrated.
	
	NeuralLayer *next_lay = layer(i+1);
	int next_num_w = next_lay->num_wind();
	int next_num_o = next_lay->num_out();
	
	int num_out = lay->num_out();
	for (int o=num_out-1; o>=0; o--)
	{   double phat = NQ->record(i)->phati(o);	// usage of unit o
	    double scale = phat * num_out;	// how much to shrink unit
	    double log_scale = (scale <=0.01 )? -4.60517: log(scale);
	    lay->set_bias(o, lay->bias(o) - log_scale);
	    
	    for (int w=0; w< next_num_w; w++)
	    {   for (int no=0; no < next_num_o; no++)
	    	{   next_lay->set_weight(o,w,no,
			const_cast<const NeuralLayer*>(next_lay)->weight(o,w,no)*scale);
		}
	    }
	}
	
    }
    
}


// CHANGE LOG:
// 12 March 1998 Kevin Karplus
//	Added initialize_learning_rate to learning_loop
// 13 April 1998 Kevin Karplus
//	Improved output to include Name of network.
// 14 April 1998 Kevin Karplus
//	Added keeping best and restarting from it after 30 worse epochs.
// 18 May 1998 Kevin Karplus
//	Added normalize and center_weights to training loop.
// 9 June 1998 Kevin Karplus
//	Modified learning_loop so that cross_training set not
//	required (uses training set to choose best if cross_training missing).
// 20 July 1998 Kevin Karplus
//	Added individual sequence reporting option to test() and train()
//	Moved summary printing from learning_loop to test() and train().
//	Merged test() and train() into test_or_train()
//	Fixed bug in set_learning_params which caused CenterWeight and
//	RangeWeight not to be reset properly.
//	Added comment to NetQuality file about what layers are frozen.
// 25 July 1998 Kevin Karplus
//	Added print_Q_vs_Phat to learning_loop
// 15 September 1999 Sugato Basu
//      Added code to handle output format ALIGNMENT
// 5 Nov 1999 Kevin Karplus
//	Changed format of networks to have interfaces and layers
//	inside the NeuralNet instead of outside.
// 10 Dec 1999 Kevin Karplus
//	Added equilibrate()
// 7 Jan 2000 Kevin Karplus
//	Added center_biases after each center_weights 
// 28 Feb 2000 Kevin Karplus
//	Modified learning_loop so that training set not shuffled after new
//	best and only shuffled 30% of time after an improvement.
// 8 Jan 2001 Kevin Karplus
//	Added null_cost initialization and negated objective function.
// 22 Nov 2001 Kevin Karplus
//	Commented out center_weights in learning_loop---now need to use
//	explicit CenterWeights command.
// 12 Dec 2001 Kevin Karplus
//	Added NetTemperature and copy_weights_from()
//	Added extra info to log file on training iterations.
// 16 Aug 2003 George Shackelford
//	Replaced deprecated 'form(...)' with proper formating
// 01 June 2004 Sol Katzman
//      Replaced drand48() with utilities from ultimate library.
// Sat Jun 18 22:13:17 PDT 2005 Kevin Karplus
//	Added initialization of BackgroundProbs from cross_training in learning_loop
// Sun Jun 19 07:16:21 PDT 2005 Kevin Karplus
//	Moved initialization of BackgroundProbs to initialize_net
// Thu Jul  7 04:44:17 PDT 2005 Kevin Karplus
//	Removed a couple of spaces before print_header()
// Thu Jul  7 13:45:46 PDT 2005 Kevin Karplus
//	Added check for empty training/cross-training sets,
//	to existing checks for null pointers.
// Sat Jul 23 17:32:14 PDT 2005 Kevin Karplus
//	Fixed do_training and shuffle parameters of test_or_train to
//		be bool (not int).