// LearningParam.h // copyright 21 April 1998 Kevin Karplus #ifndef LearningParam_H #define LearningParam_H #include "NamedClass/NamedObject.h" #include "NamedClass/NamedClass.h" // LearningParam holds lots of parameters for // controlling the neural-net learning technique. // [As of 21 April 1998, this class is not used anywhere---it should // be integrated soon.] // All constants that are related to the learning parameters // should be collected here! // Members starting with // "Net" are used in NeuralNet.cc // "NetAct" are use in NetActivation.cc // "QR" are used in QualityRecord.cc // "Act" are used in ActivationRecord.cc // "Lay" are used in NerualLayer.cc class LPInputCommand; // forward declaration of input commands for class class LearningParam: public NamedClass { private: // For NamedClass static IdObject ID; virtual int read_knowing_type(istream &in); virtual void write_knowing_type(ostream &out) const; static NameToPtr *CommandTable; // for reading input public: static const NameToPtr *command_table(void) { return CommandTable; } static void init_command_table(void); static LearningParam *read_new(istream &in); // initial relative weights for parts of the objective function double NetInitCenterWeight; // keep sums centered on 0 double NetInitRangeWeight; // scale weights to get desired variance on sums // multipliers to reduce CenterWeight and RangeWeight on each epoch double NetCenterDecay; double NetRangeDecay; int NetResetAfter; // reset network to best after this many epochs with no // improvement // reject change to network if // delta-objective < -NetTemperature*log(random) // That is if change is negative, accept with probability // exp(change/temperature). double NetTemperature; // when back propagating, weight chains by (1-q) ^ NetFractionWrongExponent double NetFractionWrongExponent; // multiplier for update for incorrect weights double NetActWrongWeight; //multiplier for update for transitions in what is correct // (like hhhhHEeee ) double NetActChangeCorrectWeight; // multipliers for relative importance of encoding cost and % correct // when choosing best (minimize // QRBestCostMult*encoding_cost(last layer) // -QRBesQMult *q(last layer) // -QRBestSOVMult *SOVall(last layer) double QRBestCostMult, QRBestQMult, QRBestSOVMult; // ActPartialKluge changes the partials in the backpropagation, // multiplying them by prob^kluge (so 0 gives standard // back propagation). // The intent is to try to handle OR functionality better, by // concentrating the correction on the relevant input. double ActPartialKluge; // These are the parameters for initializing rates using // initialize_learning_rates(). // A "BaseRate" is set inversely proportional to number of sequences. double LayBaseTimesSeq; double LayMaxBaseRate; // learning rates are set proportional to BaseRate, // but constant may depend on whether multiplicative updates are // being done. double LayWeightOverBase, LayWeightOverBaseForMult; double LayPseudoOverBase, LayPseudoOverBaseForMult; double LayBiasOverBase, LayBiasOverBaseForMult; double LayGainOverBase, LayGainOverBaseForMult; // Variance of sums should grow with the number of weights double LayDesiredSqOverNumWeights; // Extinction per chain update is exp(-WeightRate*constant) // here are the constants (may depend on whether multiplicative // updates are done) [Note: eliminate "extinction_weight" from NeuralNet] double LayGainExtConst, LayBiasExtConst, LayPseudoExtConst; double LayGainExtConstForMult, LayBiasExtConstForMult, LayPseudoExtConstForMult; // Weight decay is faster as you get further from the // center of the window. Use GainExtinction^(LayWindowShape*distance) // to shape the decays. Note that this gives no decay at the center, // but the Gain decay (controlled by GainExtinction) affects all weights. double LayWindowShapeExp; // updates are limited by bounds on parameter being updated // and by maximum change double LayMaxWeight, LayMaxWeightChange; double LayMaxBias, LayMaxBiasChange; double LayMaxPseudo, LayMaxPseudoChange; double LayMaxGain, LayMaxGainChange; // For updates, we can change the relative importance of the // main (log_loss) objective for different parameter types, // possibly dependent on whether multiplicative updates are done. double LayMainObjWeight, LayMainObjWeightForMult; double LayMainObjBias, LayMainObjBiasForMult; double LayMainObjGain, LayMainObjGainForMult; double LayMainObjPseudo, LayMainObjPseudoForMult; // Learning rates are not allowed to grow too fast double LayMaxWeightRateFactor, LayMaxBiasRateFactor, LayMaxPseudoRateFactor, LayMaxGainRateFactor; // Learning rates slow down on each epoch, using these multipliers. // The rate may depend on whether the network got better or worse // on the previous epoch. double LayWeightRateDecay, LayPseudoRateDecay, LayBiasRateDecayForLowerCost, LayBiasRateDecayForHigherCost, LayGainRateDecayForLowerCost, LayGainRateDecayForHigherCost; // If the weights have to be set randomly, what range is used? // set range= LayRangeMult(LayRangeDenom+distance_from_center) // for additive update, use (-range,+range) // for multiplicative update, use (LayRangeMinForMult*range, range) double LayRangeMult, LayRangeDenom, LayRangeMinForMult; // If PseudoCounts have to be set, what is their initial sum? double LayTotalInitialPseudo; public: LearningParam(void); // Necessary NamedClass functions inline static IdObject* classID(void) {return &ID;} virtual IdObject* type(void) const {return &ID;} }; // class for keywords that can be used in LearningParam input class LPInputCommand: public NamedObject { typedef int (*fcn)(istream &in, LearningParam *change, LPInputCommand* self); fcn CommandFunction; // function to execute when keyword found. // Reading from "in" into "change". // Pass this down to function as 3rd arg, // so it can report error using self->name(). // Return 1 if input should continue, 0 if error or end of input. public: LPInputCommand(const char *nm, fcn c=0, const char *use="") : NamedObject(nm,use) { CommandFunction=c; } inline int execute(istream &in, LearningParam *change) { return (*CommandFunction)(in, change, this); } }; #endif // CHANGE LOG: // 30 April 1998 Kevin Karplus // Added NetResetAfter as parameter (was constant before) // Added NetActChangeCorrectWeight // Moved ActWrongWeight to NetActWrongWeight // 3 May 1998 Kevin Karplus // Added input for LearningParam // 28 July 1998 Kevin Karplus // Added NetFractionWrongExponent // 12 Dec 2001 Kevin Karplus // Added NetTemperature // 24 May 2004 Kevin Karplus // inlined simple functions