// LearningParam.h
// copyright 21 April 1998 Kevin Karplus

#ifndef LearningParam_H
#define LearningParam_H

#include "NamedClass/NamedObject.h"
#include "NamedClass/NamedClass.h"

// LearningParam holds lots of parameters for
// controlling the neural-net learning technique.
// [As of 21 April 1998, this class is not used anywhere---it should
// be integrated soon.]

// All constants that are related to the learning parameters
// should be collected here!

// Members starting with 
//	"Net" are used in NeuralNet.cc
//	"NetAct" are use in NetActivation.cc
//	"QR" are used in QualityRecord.cc
//	"Act" are used in ActivationRecord.cc
//	"Lay" are used in NerualLayer.cc

class LPInputCommand;	// forward declaration of input commands for class
	
class LearningParam: public NamedClass
{
    private:
	// For NamedClass
	static IdObject ID;	
	virtual int read_knowing_type(istream &in);
	virtual void write_knowing_type(ostream &out) const;
	static NameToPtr *CommandTable;	// for reading input
  public:
        static const NameToPtr *command_table(void)
	{   return CommandTable;
	}
	static void init_command_table(void);
	static LearningParam *read_new(istream &in);
	
    // initial relative weights for parts of the objective function
    double NetInitCenterWeight;	// keep sums centered on 0
    double NetInitRangeWeight;	// scale weights to get desired variance on sums
      
    // multipliers to reduce CenterWeight and RangeWeight on each epoch
    double NetCenterDecay;
    double NetRangeDecay;

    int NetResetAfter;	// reset network to best after this many epochs with no
			// improvement
    
    // reject change to network if 
    //		delta-objective < -NetTemperature*log(random)
    // That is if change is negative, accept with probability
    //		exp(change/temperature).
    double NetTemperature;	
    
    // when back propagating, weight chains by (1-q) ^ NetFractionWrongExponent
    double NetFractionWrongExponent;

    // multiplier for update for incorrect weights
    double NetActWrongWeight;
    
    //multiplier for update for transitions in what is correct
    //	(like hhhhHEeee )
    double NetActChangeCorrectWeight;

    // multipliers for relative importance of encoding cost and % correct
    // when choosing best (minimize
    //			 QRBestCostMult*encoding_cost(last layer)
    //			-QRBesQMult *q(last layer)
    //			-QRBestSOVMult *SOVall(last layer)
    double QRBestCostMult, QRBestQMult, QRBestSOVMult;
    
    // ActPartialKluge changes the partials in the backpropagation,
    // multiplying them by prob^kluge (so 0 gives standard
    // back propagation). 
    // The intent is to try to handle OR functionality better, by
    // concentrating the correction on the relevant input. 
    double ActPartialKluge;

    // These are the parameters for initializing rates using
    // initialize_learning_rates().
    // A "BaseRate" is set inversely proportional to number of sequences.
    double LayBaseTimesSeq;
    double LayMaxBaseRate;
    
    // learning rates are set proportional to BaseRate,
    // but constant may depend on whether multiplicative updates are
    // being done.
    double LayWeightOverBase, LayWeightOverBaseForMult;
    double LayPseudoOverBase, LayPseudoOverBaseForMult;
    double LayBiasOverBase, LayBiasOverBaseForMult;
    double LayGainOverBase, LayGainOverBaseForMult;
    
    // Variance of sums should grow with the number of weights
    double LayDesiredSqOverNumWeights;
    
    // Extinction per chain update is exp(-WeightRate*constant)
    //	here are the constants (may depend on whether multiplicative 
    // updates are done)  [Note: eliminate "extinction_weight" from NeuralNet]
    double LayGainExtConst, LayBiasExtConst, LayPseudoExtConst;
    double LayGainExtConstForMult, LayBiasExtConstForMult, LayPseudoExtConstForMult;
    
    // Weight decay is faster as you get further from the
    // center of the window.  Use GainExtinction^(LayWindowShape*distance)
    // to shape the decays.  Note that this gives no decay at the center,
    // but the Gain decay (controlled by GainExtinction) affects all weights.
    double LayWindowShapeExp;
    
    // updates are limited by bounds on parameter being updated
    //  and by maximum change 
    double LayMaxWeight, LayMaxWeightChange;
    double LayMaxBias, LayMaxBiasChange;
    double LayMaxPseudo, LayMaxPseudoChange;
    double LayMaxGain, LayMaxGainChange;
    
    // For updates, we can change the relative importance of the
    // main (log_loss) objective for different parameter types,
    // possibly dependent on whether multiplicative updates are done.
    
    double LayMainObjWeight, LayMainObjWeightForMult;
    double LayMainObjBias, LayMainObjBiasForMult;
    double LayMainObjGain, LayMainObjGainForMult;
    double LayMainObjPseudo, LayMainObjPseudoForMult;
    

    // Learning rates are not allowed to grow too fast
    double LayMaxWeightRateFactor, LayMaxBiasRateFactor,
		LayMaxPseudoRateFactor, LayMaxGainRateFactor; 
    
    // Learning rates slow down on each epoch, using these multipliers.
    // The rate may depend on whether the network got better or worse
    // on the previous epoch.
    double LayWeightRateDecay, LayPseudoRateDecay, 
	LayBiasRateDecayForLowerCost, 	LayBiasRateDecayForHigherCost,
	LayGainRateDecayForLowerCost, 	LayGainRateDecayForHigherCost;


    // If the weights have to be set randomly, what range is used?
    //	set range= LayRangeMult(LayRangeDenom+distance_from_center)
    //  for additive update, use (-range,+range)
    //  for multiplicative update, use (LayRangeMinForMult*range, range)
    double LayRangeMult, LayRangeDenom, LayRangeMinForMult;
    
    // If PseudoCounts have to be set, what is their initial sum?
    double LayTotalInitialPseudo;
    
  
  public:
    LearningParam(void);
    
    // Necessary NamedClass functions
    inline static IdObject* classID(void) {return &ID;}
    virtual IdObject* type(void) const {return &ID;}

};

// class for keywords that can be used in LearningParam input
class LPInputCommand: public NamedObject
{
	typedef     int (*fcn)(istream &in, 
		    LearningParam *change,
		    LPInputCommand* self);

	fcn CommandFunction;

	// function to execute when keyword found.
	//	Reading from "in" into "change".
	//	Pass this down to function as 3rd arg, 
	//		so it can report error using self->name().
	//  Return 1 if input should continue, 0 if error or end of input.


    public:
	LPInputCommand(const char *nm, fcn c=0, const char *use="")
		: NamedObject(nm,use)
	{   CommandFunction=c;
	}
	
	inline int execute(istream &in, LearningParam *change)
	{    return (*CommandFunction)(in, change, this);
	}
};

#endif

// CHANGE LOG:
// 30 April 1998 Kevin Karplus
//	Added NetResetAfter as parameter (was constant before)
//	Added NetActChangeCorrectWeight
//	Moved ActWrongWeight to NetActWrongWeight
// 3 May 1998 Kevin Karplus
//	Added input for LearningParam
// 28 July 1998 Kevin Karplus
//	Added NetFractionWrongExponent
// 12 Dec 2001 Kevin Karplus
//	Added NetTemperature
// 24 May 2004 Kevin Karplus
//	inlined simple functions