// LearningParam.cc // copyright 23 April 19988 Kevin Karplus #include "LearningParam.h" #include "Command/Command.h" // information for the NamedClass LearningParam static NamedClass *create_learning_param(void) {return new LearningParam;} IdObject LearningParam::ID("LearningParam",create_learning_param, 0, "LearningParam is a collection of all the constants used in controlling\n\ the learning of NeuralNet.\n\ Parameters whose names start with \n\ \"Net\" are used in NeuralNet.cc\n\ \"NetAct\" are use in NetActivation.cc\n\ \"QR\" are used in QualityRecord.cc\n\ \"Act\" are used in ActivationRecord.cc\n\ \"Lay\" are used in NeuralLayer.cc\n\ "); NameToPtr* LearningParam::CommandTable = 0; // create LearningParam record with all default values LearningParam::LearningParam(void) { NetInitCenterWeight = 1.0e-3; // keep sums centered on 0 NetInitRangeWeight = 1.0e-5;// scale weights for desired variance on sums NetCenterDecay = 0.95; NetRangeDecay =0.99; NetTemperature = 1.e10; // accept all changes (old behavior) NetResetAfter = 30; NetFractionWrongExponent = 0; QRBestCostMult=1.0; QRBestQMult = 1.4; QRBestSOVMult = 0.7; NetActWrongWeight = 1.2; // extra weight for correcting wrong answers NetActChangeCorrectWeight = 1.20; // extra weight for transitions // where the correct answer changes ActPartialKluge = 0.25; LayBaseTimesSeq = 0.05; LayMaxBaseRate = 0.01; LayWeightOverBase = 1.00; LayWeightOverBaseForMult = 1.5; LayPseudoOverBase = 0.1; LayPseudoOverBaseForMult =0.1; LayBiasOverBase = 0.1; LayBiasOverBaseForMult = 0.5; LayGainOverBase = 0.01; LayGainOverBaseForMult = 0.01; LayDesiredSqOverNumWeights = 0.06; LayGainExtConst = 2.0e-4; LayBiasExtConst = 4.0e-5; LayPseudoExtConst = 8.0e-4; LayGainExtConstForMult = 2.0e-5; LayBiasExtConstForMult = 4.0e-5; LayPseudoExtConstForMult = 4.0e-5; LayWindowShapeExp = 0.3; LayMaxWeight=5.00; LayMaxWeightChange=0.01; LayMaxPseudo = 10.0; LayMaxPseudoChange = 2.0; LayMaxBias=10.0; // old code limited bias to window size, // which may be better LayMaxBiasChange=0.1; LayMaxGain=2.0; // keep gain in [1/MaxGain, MaxGain] LayMaxGainChange = 1.5; // scale by [1/GainChange, GainChange] LayMainObjWeight= LayMainObjWeightForMult=1.0; LayMainObjPseudo= LayMainObjPseudoForMult = 1.0; LayMainObjBias=0.01; LayMainObjBiasForMult=0.05; LayMainObjGain = LayMainObjGainForMult =1.0; LayMaxWeightRateFactor = 2.0; LayMaxPseudoRateFactor = 2.0; LayMaxBiasRateFactor = 1.1; LayMaxGainRateFactor = 1.1; LayWeightRateDecay=0.992; LayPseudoRateDecay = 0.99; LayBiasRateDecayForLowerCost = 1.2; LayBiasRateDecayForHigherCost=0.88; LayGainRateDecayForLowerCost = 1.1; LayGainRateDecayForHigherCost=0.7; LayRangeMult = 0.6; LayRangeDenom = 0.4; LayRangeMinForMult = 0.1; LayTotalInitialPseudo = 1.0; } static int ReadComment(istream &in, LearningParam *change, LPInputCommand* self) { SkipSeparators(in, 1, '\n'); return 1; } static int VerifyClassName(istream &in, LearningParam *change, LPInputCommand* self) { char word[100]; get_word(in, word); const IdObject *end_id = IdObject::id(word); if (end_id != change->type()) { cerr << "Warning: " << self->name() << " = " << word << " doesn't match " << change->type()->name() << "\n" << flush; } // continue if "ClassName", stop if "EndClassName" return EqualStrings(self->name(), "ClassName", 1); } static int ReadIntParam(istream &in, int ¶m, LPInputCommand* self) { in >> param; return 1; } static int ReadDoubleParam(istream &in, double ¶m, LPInputCommand* self) { in >> param; return 1; } // Now for the LearningParam-specific commands static int ReadNetInitCenterWeight(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->NetInitCenterWeight, self); } static int ReadNetInitRangeWeight(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->NetInitRangeWeight, self); } static int ReadNetCenterDecay(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->NetCenterDecay, self); } static int ReadNetRangeDecay(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->NetRangeDecay, self); } static int ReadNetFractionWrongExponent(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->NetFractionWrongExponent, self); } static int ReadNetActWrongWeight(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->NetActWrongWeight, self); } static int ReadNetActChangeCorrectWeight(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->NetActChangeCorrectWeight, self); } static int ReadQRBestCostMult(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->QRBestCostMult, self); } static int ReadQRBestQMult(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->QRBestQMult, self); } static int ReadQRBestSOVMult(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->QRBestSOVMult, self); } static int ReadActPartialKluge(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->ActPartialKluge, self); } static int ReadLayBaseTimesSeq(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayBaseTimesSeq, self); } static int ReadLayMaxBaseRate(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayMaxBaseRate, self); } static int ReadLayWeightOverBase(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayWeightOverBase, self); } static int ReadLayWeightOverBaseForMult(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayWeightOverBaseForMult, self); } static int ReadLayPseudoOverBase(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayPseudoOverBase, self); } static int ReadLayPseudoOverBaseForMult(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayPseudoOverBaseForMult, self); } static int ReadLayBiasOverBase(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayBiasOverBase, self); } static int ReadLayBiasOverBaseForMult(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayBiasOverBaseForMult, self); } static int ReadLayGainOverBase(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayGainOverBase, self); } static int ReadLayGainOverBaseForMult(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayGainOverBaseForMult, self); } static int ReadLayDesiredSqOverNumWeights(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayDesiredSqOverNumWeights, self); } static int ReadLayGainExtConst(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayGainExtConst, self); } static int ReadLayBiasExtConst(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayBiasExtConst, self); } static int ReadLayPseudoExtConst(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayPseudoExtConst, self); } static int ReadLayGainExtConstForMult(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayGainExtConstForMult, self); } static int ReadLayBiasExtConstForMult(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayBiasExtConstForMult, self); } static int ReadLayPseudoExtConstForMult(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayPseudoExtConstForMult, self); } static int ReadLayWindowShapeExp(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayWindowShapeExp, self); } static int ReadLayMaxWeight(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayMaxWeight, self); } static int ReadLayMaxWeightChange(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayMaxWeightChange, self); } static int ReadLayMaxBias(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayMaxBias, self); } static int ReadLayMaxBiasChange(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayMaxBiasChange, self); } static int ReadLayMaxPseudo(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayMaxPseudo, self); } static int ReadLayMaxPseudoChange(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayMaxPseudoChange, self); } static int ReadLayMaxGain(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayMaxGain, self); } static int ReadLayMaxGainChange(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayMaxGainChange, self); } static int ReadLayMainObjWeight(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayMainObjWeight, self); } static int ReadLayMainObjWeightForMult(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayMainObjWeightForMult, self); } static int ReadLayMainObjBias(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayMainObjBias, self); } static int ReadLayMainObjBiasForMult(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayMainObjBiasForMult, self); } static int ReadLayMainObjGain(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayMainObjGain, self); } static int ReadLayMainObjGainForMult(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayMainObjGainForMult, self); } static int ReadLayMainObjPseudo(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayMainObjPseudo, self); } static int ReadLayMainObjPseudoForMult(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayMainObjPseudoForMult, self); } static int ReadLayMaxWeightRateFactor(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayMaxWeightRateFactor, self); } static int ReadLayMaxPseudoRateFactor(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayMaxPseudoRateFactor, self); } static int ReadLayMaxBiasRateFactor(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayMaxBiasRateFactor, self); } static int ReadLayMaxGainRateFactor(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayMaxGainRateFactor, self); } static int ReadLayWeightRateDecay(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayWeightRateDecay, self); } static int ReadLayPseudoRateDecay(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayPseudoRateDecay, self); } static int ReadLayBiasRateDecayForLowerCost(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayBiasRateDecayForLowerCost, self); } static int ReadLayBiasRateDecayForHigherCost(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayBiasRateDecayForHigherCost, self); } static int ReadLayGainRateDecayForLowerCost(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayGainRateDecayForLowerCost, self); } static int ReadLayGainRateDecayForHigherCost(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayGainRateDecayForHigherCost, self); } static int ReadLayRangeMult(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayRangeMult, self); } static int ReadLayRangeDenom(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayRangeDenom, self); } static int ReadLayRangeMinForMult(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayRangeMinForMult, self); } static int ReadLayTotalInitialPseudo(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->LayTotalInitialPseudo, self); } static int ReadNetResetAfter(istream &in, LearningParam *change, LPInputCommand* self) { return ReadIntParam(in, change->NetResetAfter, self); } static int ReadNetTemperature(istream &in, LearningParam *change, LPInputCommand* self) { return ReadDoubleParam(in, change->NetTemperature, self); } void LearningParam::init_command_table(void) { if (CommandTable) return; CommandTable = new NameToPtr(57); CommandTable->ignore_case(); CommandTable->AddName(new LPInputCommand("NetInitCenterWeight", ReadNetInitCenterWeight, "importance of keeping sums centered near 0")); CommandTable->AddName(new LPInputCommand("NetInitRangeWeight", ReadNetInitRangeWeight, "importance of controlling variance of sums")); CommandTable->AddName(new LPInputCommand("NetCenterDecay", ReadNetCenterDecay, "multiplier per epoch for decaying importance of centering")); CommandTable->AddName(new LPInputCommand("NetRangeDecay", ReadNetRangeDecay, "multiplier per epoch for decaying importance of variance")); CommandTable->AddName(new LPInputCommand("NetFractionWrongExponent", ReadNetFractionWrongExponent, "back-propagate with weight (1-Q(chain))^NetFractionWrongExponent")); CommandTable->AddName(new LPInputCommand("NetResetAfter", ReadNetResetAfter, "reset network to best if no improvement in this many epoch")); CommandTable->AddName(new LPInputCommand("NetTemperature", ReadNetTemperature, "accept worse network with probability exp(change/temperature).\n\ Default is very large value to accept all changes.")); CommandTable->AddName(new LPInputCommand("NetActWrongWeight", ReadNetActWrongWeight, "multiplier for extra weight when output is wrong")); CommandTable->AddName(new LPInputCommand("NetActChangeCorrectWeight", ReadNetActChangeCorrectWeight, "multiplier for extra weight when correct answer is not the same as\n\ neighboring positions")); CommandTable->AddName(new LPInputCommand("QRBestCostMult", ReadQRBestCostMult, "multiplier for bits saved in objective function")); CommandTable->AddName(new LPInputCommand("QRBestQMult", ReadQRBestQMult, "multiplier for fraction correct in objective function")); CommandTable->AddName(new LPInputCommand("QRBestSOVMult", ReadQRBestSOVMult, "multiplier for SOV in objective function")); CommandTable->AddName(new LPInputCommand("ActPartialKluge", ReadActPartialKluge, "power for phat(x) when back-propagating into x (0 is normal gradient descent)")); CommandTable->AddName(new LPInputCommand("LayBaseTimesSeq", ReadLayBaseTimesSeq, "Set base learning rate to this/number of sequences")); CommandTable->AddName(new LPInputCommand("LayMaxBaseRate", ReadLayMaxBaseRate, "maximum allowed for BaseRate")); CommandTable->AddName(new LPInputCommand("LayWeightOverBase", ReadLayWeightOverBase, "multiplier for BaseRate to get WeightRate")); CommandTable->AddName(new LPInputCommand("LayWeightOverBaseForMult", ReadLayWeightOverBaseForMult, "multiplier for BaseRate to get WeightRate for multiplicative updates")); CommandTable->AddName(new LPInputCommand("LayPseudoOverBase", ReadLayPseudoOverBase, "")); CommandTable->AddName(new LPInputCommand("LayPseudoOverBaseForMult", ReadLayPseudoOverBaseForMult, "")); CommandTable->AddName(new LPInputCommand("LayBiasOverBase", ReadLayBiasOverBase, "")); CommandTable->AddName(new LPInputCommand("LayBiasOverBaseForMult", ReadLayBiasOverBaseForMult, "")); CommandTable->AddName(new LPInputCommand("LayGainOverBase", ReadLayGainOverBase, "")); CommandTable->AddName(new LPInputCommand("LayGainOverBaseForMult", ReadLayGainOverBaseForMult, "")); CommandTable->AddName(new LPInputCommand("LayDesiredSqOverNumWeights", ReadLayDesiredSqOverNumWeights, "")); CommandTable->AddName(new LPInputCommand("LayGainExtConst", ReadLayGainExtConst, "")); CommandTable->AddName(new LPInputCommand("LayBiasExtConst", ReadLayBiasExtConst, "")); CommandTable->AddName(new LPInputCommand("LayPseudoExtConst", ReadLayPseudoExtConst, "")); CommandTable->AddName(new LPInputCommand("LayGainExtConstForMult", ReadLayGainExtConstForMult, "")); CommandTable->AddName(new LPInputCommand("LayBiasExtConstForMult", ReadLayBiasExtConstForMult, "")); CommandTable->AddName(new LPInputCommand("LayPseudoExtConstForMult", ReadLayPseudoExtConstForMult, "")); CommandTable->AddName(new LPInputCommand("LayWindowShapeExp", ReadLayWindowShapeExp, "")); CommandTable->AddName(new LPInputCommand("LayMaxWeight", ReadLayMaxWeight, "")); CommandTable->AddName(new LPInputCommand("LayMaxWeightChange", ReadLayMaxWeightChange, "")); CommandTable->AddName(new LPInputCommand("LayMaxBias", ReadLayMaxBias, "")); CommandTable->AddName(new LPInputCommand("LayMaxBiasChange", ReadLayMaxBiasChange, "")); CommandTable->AddName(new LPInputCommand("LayMaxPseudo", ReadLayMaxPseudo, "")); CommandTable->AddName(new LPInputCommand("LayMaxPseudoChange", ReadLayMaxPseudoChange, "")); CommandTable->AddName(new LPInputCommand("LayMaxGain", ReadLayMaxGain, "")); CommandTable->AddName(new LPInputCommand("LayMaxGainChange", ReadLayMaxGainChange, "")); CommandTable->AddName(new LPInputCommand("LayMainObjWeight", ReadLayMainObjWeight, "")); CommandTable->AddName(new LPInputCommand("LayMainObjWeightForMult", ReadLayMainObjWeightForMult, "")); CommandTable->AddName(new LPInputCommand("LayMainObjBias", ReadLayMainObjBias, "")); CommandTable->AddName(new LPInputCommand("LayMainObjBiasForMult", ReadLayMainObjBiasForMult, "")); CommandTable->AddName(new LPInputCommand("LayMainObjGain", ReadLayMainObjGain, "")); CommandTable->AddName(new LPInputCommand("LayMainObjGainForMult", ReadLayMainObjGainForMult, "")); CommandTable->AddName(new LPInputCommand("LayMainObjPseudo", ReadLayMainObjPseudo, "")); CommandTable->AddName(new LPInputCommand("LayMainObjPseudoForMult", ReadLayMainObjPseudoForMult, "")); CommandTable->AddName(new LPInputCommand("LayMaxWeightRateFactor", ReadLayMaxWeightRateFactor, "maximum amount to multiply WeightRate by on each epoch")); CommandTable->AddName(new LPInputCommand("LayMaxPseudoRateFactor", ReadLayMaxPseudoRateFactor, "maximum amount to multiply PseudoRate by on each epoch")); CommandTable->AddName(new LPInputCommand("LayMaxBiasRateFactor", ReadLayMaxBiasRateFactor, "")); CommandTable->AddName(new LPInputCommand("LayMaxGainRateFactor", ReadLayMaxGainRateFactor, "maximum amount to multiply GainRate by on each epoch")); CommandTable->AddName(new LPInputCommand("LayWeightRateDecay", ReadLayWeightRateDecay, "")); CommandTable->AddName(new LPInputCommand("LayPseudoRateDecay", ReadLayPseudoRateDecay, "")); CommandTable->AddName(new LPInputCommand("LayBiasRateDecayForLowerCost", ReadLayBiasRateDecayForLowerCost, "multiplier each epoch for BiasRate, given that cost on training set has decreased.")); CommandTable->AddName(new LPInputCommand("LayBiasRateDecayForHigherCost", ReadLayBiasRateDecayForHigherCost, "multiplier each epoch for BiasRate, given that cost on training set has increased.")); CommandTable->AddName(new LPInputCommand("LayGainRateDecayForLowerCost", ReadLayGainRateDecayForLowerCost, "multiplier each epoch for GainRate, given that cost on training set has decreased.")); CommandTable->AddName(new LPInputCommand("LayGainRateDecayForHigherCost", ReadLayGainRateDecayForHigherCost, "multiplier each epoch for GainRate, given that cost on training set has increased.")); CommandTable->AddName(new LPInputCommand("LayRangeMult", ReadLayRangeMult, "")); CommandTable->AddName(new LPInputCommand("LayRangeDenom", ReadLayRangeDenom, "")); CommandTable->AddName(new LPInputCommand("LayRangeMinForMult", ReadLayRangeMinForMult, "")); CommandTable->AddName(new LPInputCommand("LayTotalInitialPseudo", ReadLayTotalInitialPseudo, "")); CommandTable->AddName(new LPInputCommand("Comment", ReadComment)); CommandTable->AddName(new LPInputCommand("ClassName", VerifyClassName)); CommandTable->AddName(new LPInputCommand("EndClassName", VerifyClassName)); } LearningParam* LearningParam::read_new(istream &in) { NamedClass *p=NamedClass::read_new(in); if (!p) return 0; if (!p->is_a(LearningParam::classID())) { cerr << "Error while attempting to read learning parameters.\n " << "Found " << p->type()->name() << "instead.\n"; return 0; } return dynamic_cast(p); } int LearningParam::read_knowing_type(istream &in) { if (! CommandTable) init_command_table(); char word[300]; while (in.good()) { get_word(in, word,'='); LPInputCommand *comm = dynamic_cast (CommandTable->FindOldName(word, ZeroIfNew)); if (comm) { if (!comm->execute(in, this)) return 1; } else { cerr << "Unrecognized keyword: " << word << " for type " << type()->name() << "\n" << flush; } } return 0; } void LearningParam::write_knowing_type(ostream &out) const { cerr << "Error: Output for LearningParam not implemented yet.\n" << flush; out << flush; } // CHANGE LOG: // 3 May 1998 Kevin Karplus // Added input for LearningParam // 12 Dec 2001 Kevin Karplus // Added NetTemperature