// QualityRecord.h // copyright 4 August 1997 Kevin Karplus #ifndef QualityRecord_H #define QualityRecord_H #include #include "NeuralNet.h" #include "NeuralLayer.h" #include "LearningParam.h" #include "Globals.h" #include // forward declarations class NetActivation; class OneChain; // SOV-related things // DOCUMENT MEANINGS---THESE ARE INCOMPREHENSIBLE. typedef struct { enum {SEG1ONCE, SEG1REPEAT} sov_method; float sov_delta; float sov_delta_s; int sov_out; // if set to 2, does some debugging output const char *fname; } SOVparameters; // A QualityRecord is a collection of measures of how good a particular // layer of a neural network is. class QualityRecord { private: static double PseudoCountWeight; // weight of pseudocounts for bucketed Q computation const NeuralLayer* Layer; const NeuralLayer* NextTrainableLayer; // The first set of measures are only defined for a NeuralLayer // that has a defined correct output. double TrialsWeight; // total weight of all outputs (independent of prediction) // ==PredictCount if all outputs have total weight=1 // cost measures for predictive accuracy double EncodingCost; // cost (in nats) of true outputs given predictions double NullCost; // cost (in nats) of true outputs given background frequencies // BackgroundProbs are obtained from Globals double IdentityCost; // cost (in nats) of true outputs given inputs to first layer // of net, is defined only if no. of outputs of this layer = // no. of inputs of first layer double WeightOfMax; // total weight of outputs with max predicted probability // (Note: PhD's Q3 measure is WeightOfMax/TrialsWeight // when correct output is unweighted single-correct EHL // alphabet) // The next set of values depends on the inputs to the Layer, // but does not measure how well prediction is done. // These measures are generic measures for whether hidden layers // carry information, now what information they carry. int PredictCount; // how many predictions made? double SumSum2; // the sum of the squares of the "sums()" // of the layer. Keeping this small is the // goal of training the Bias values of the layer. double SumSumBias2; // The sum of squares of (sums()-bias) // The goal of the gain optimization is to make // this close to DesiredSq (*PredictCount) double SumSum2MinusDes2; // The sum of ((sums()-bias)^2-DesiredSq)^2 // Keeping this small is the goal of gain setting. // Note that DesiredSq is an adjustable parameter. double *SOVsums; // sums of SOV measures for particular outputs int NumSummed; // How many chains were used to generate these sums. double *SumPhati; // sum of predicted probability for particular output double *SumPj; // sum of real probability for paticular output // of next trainable layer double **SumPhatiPhatj; // sum of cross products of predictions double **SumPhatiPj; // cross products of predictions with // real outputs of next trainable layer // Bucket the Q3 computation according to the Phat of the predicted // output. // The cxx xompiler does not allow constants to be declared this // way, so we have to use the uglier #define method. // const double BucketSize= 0.01; // const int NumBuckets= ceil(1.0000001/BucketSize); // cxx also doesn't realize that "ceil" can be part of a constant // expression! #define QualityRecordBucketSize (0.01) #define QualityRecordNumBuckets \ (static_cast(-(-1.0000001/QualityRecordBucketSize))) double BucketedWeightOfMax[QualityRecordNumBuckets]; // sum of weight of predicted double BucketedTrialsWeight[QualityRecordNumBuckets]; // sum of weight of all outputs inline int bucket_index(double Phat) const { int index= static_cast (Phat*QualityRecordNumBuckets); return index==QualityRecordNumBuckets? index-1: index; } inline double bucket_low(int index) const {return index*QualityRecordBucketSize;} public: QualityRecord(const NeuralLayer*lay); QualityRecord(const QualityRecord*old) ; // copy constructor ~QualityRecord(); void clear(void); // update quality measure based on ActivationRecord // (rec should already have had its cost computed). void add(const ActivationRecord *rec); void compile_unit_usage_data(const NetActivation *MyNetAct, const OneChain *chain); void print_unit_usage(ostream &to) const; inline double phati(int i) const {return SumPhati[i]/PredictCount;} inline double phati_given_j(int i, int j) const { return SumPhatiPj[i][j]/SumPj[j]; } // encoding_cost is the cost in nats/character of encoding // the desired outputs inline double encoding_cost(void) const { return TrialsWeight<=0? 0: (EncodingCost/TrialsWeight); } // null_cost is the cost in nats/character of encoding // the desired outputs given just their background frequencies inline double null_cost(void) const { return TrialsWeight<=0? 0: (NullCost/TrialsWeight); } // identity cost is the cost in nats/character of encoding // the true outputs, given the inputs to the first layer of // the net, defined only if no.of inputs to first layer = // no. of outputs of last layer inline double identity_cost(void) const { return TrialsWeight<=0? 0: (IdentityCost/TrialsWeight); } // q is the fraction of the time that the highest probability // output was the correct one. inline double q(void) const { return TrialsWeight<=0? 0: (WeightOfMax/TrialsWeight); } // bucketted Q computation uses pseudocounts // to reduce wild fluctuation from low counts // (prior assumption is that Q=bucket_low). inline double q(int index) const { return (BucketedWeightOfMax[index] + bucket_low(index)*PseudoCountWeight) / (BucketedTrialsWeight[index] + PseudoCountWeight); } void print_Q_vs_Phat(ostream &out) const; inline double rms_sum(void) const { return sqrt(SumSum2/(PredictCount*Layer->num_out())); } inline double rms_sum_bias(void) const { return sqrt(SumSumBias2/(PredictCount*Layer->num_out())); } inline double rms_sum2_minus_des(void) const { return sqrt(SumSum2MinusDes2/(PredictCount*Layer->num_out())); } double avg_gain(void) const; double avg_pseudo(void) const; double rms_weight(void) const; // root mean square of weights in layer of network. // Keeping this small is a traditional way to suppress // unused "noisy" weights. double rms_bias(void) const; // root mean square of bias friend ostream& operator << (ostream& s, const QualityRecord& qr); void print_header(ostream &s); static SOVparameters DefaultParameters; float sov(int n_aa, int n_indices, const short int *sss1, const short int *sss2, int sov_what, const SOVparameters &pdata=DefaultParameters); // adds to SumSOVall, SumSOVe, SumSOVh, SumSOVl void addSOV(int n_aa, const short int *osec, const short int *psec, const SOVparameters &pdata=DefaultParameters); // i==0 corresponds to avg SOV over all indices inline double SOV(int i=0) const { assert(i>=0); assert(i <= Layer->num_out()); return NumSummed==0 ? 0.0 : SOVsums[i]/NumSummed; } inline double objective(void) const { const LearningParam *lp = Layer->owner()->learning_params(); return (null_cost()-encoding_cost())*M_LOG2E*lp->QRBestCostMult + q()*lp->QRBestQMult + SOV()*lp->QRBestSOVMult; } }; //CHANGE LOG: // 14 April 1998 Kevin Karplus // Added copy constructor // 30 April 1998 Christian Barrett // Added SOVsums field and destructor // 1 May 1998 Kevin Karplus // Added objective function (and default param for SOV) // 25 July 1998 Kevin Karplus // Added Bucketed... members. Added comments for members added // earlier by Christian. Moved construct/destructor to .cc file // Improved slightly the parameterization of SOV method. // Added print_Q_vs_Phat // 15 September 1999 Sugato Basu // Added IdentityCost measure // 6 Nov 1999 Kevin Karplus // Changed objective function slightly, to use encoding_cost in bits // rather than nats. // 8 Jan 2001 Kevin Karplus // Added null_cost and negated objective function. // 24 May 2004 Kevin Karplus // inlined simple functions // Fri Jun 17 17:53:24 PDT 2005 Kevin Karplus // Made operator << use const QualityRecord // Sat Jun 18 21:29:40 PDT 2005 Kevin Karplus // Eliminated AvgNullCost and put in NullCost for correct // computation of info gain. #endif