// QualityRecord.cc // copyright 4 August 1997 Kevin Karplus #include #include #include // for M_LOG2E #include #include "Utilities/IOSmacros.h" #include "QualityRecord.h" #include "ActivationRecord.h" #include "NeuralLayer.h" #include "InterfaceDescription.h" #include "AlphabetTuple/AlphabetTuple.h" #include "OneChain.h" #include "NetActivation.h" double QualityRecord::PseudoCountWeight = 15.0; SOVparameters QualityRecord::DefaultParameters ={SOVparameters::SEG1REPEAT, 1.0, 0.5, 0, "default SOV params"}; // WARNING: using SEG1ONCE can result in SOV scores larger than 1, for // predictions that are clearly bad. QualityRecord::QualityRecord(const NeuralLayer*lay) { assert(lay != 0); Layer=lay; NextTrainableLayer=Layer->owner()->next_TrainTo_layer(lay->layer_number()); assert(NextTrainableLayer != 0); int num_out=Layer->num_out(); int next_num_out=NextTrainableLayer->num_out(); SOVsums= new double[num_out+1]; SumPhati = new double[num_out]; SumPhatiPhatj = new double*[num_out]; for (int d=0; dLayer; NextTrainableLayer = old->NextTrainableLayer; TrialsWeight= old->TrialsWeight; EncodingCost= old->EncodingCost; NullCost= old->NullCost; IdentityCost= old->IdentityCost; WeightOfMax = old->WeightOfMax; PredictCount= old->PredictCount; SumSum2 = old->SumSum2; SumSumBias2 = old->SumSumBias2; SumSum2MinusDes2= old->SumSum2MinusDes2; int num_out = Layer->num_out(); int next_num_out = NextTrainableLayer->num_out(); if (old->SOVsums) { SOVsums = new double[num_out+1]; for (int j=Layer->num_out(); j>=0; j--) SOVsums[j] = old->SOVsums[j]; } NumSummed = old->NumSummed; // Copy the Sum arrays if (old->SumPhati && old->SumPhatiPhatj && old->SumPj && old->SumPhatiPj) { SumPhati = new double[num_out]; SumPhatiPhatj = new double*[num_out]; for (int i=0; iSumPhati[i]; SumPhatiPhatj[i] = new double[num_out]; for (int j=0; jSumPhatiPhatj[i][j]; } } SumPj = new double[next_num_out]; SumPhatiPj = new double*[num_out]; for (int f=0; fSumPj[f]; } for (int e=0; eSumPhatiPj[e][f]; } } } for (int b=0; bBucketedWeightOfMax[b]; BucketedTrialsWeight[b] = old->BucketedTrialsWeight[b]; } } QualityRecord::~QualityRecord() { delete [] SOVsums; int num_out=Layer->num_out(); // free the allocated arrays delete [] SumPhati; delete [] SumPj; for (int d=0; dnum_out(); int next_num_out = NextTrainableLayer->num_out(); if (SOVsums) { for (int j=num_out; j>=0; j--) SOVsums[j]=0.0; } NumSummed = 0; // Clear the Sum arrays for (int i=0; inum_out(); int index = bucket_index(rec->phat_most_probable()); EncodingCost += rec->cost(); NullCost += rec->null_cost(); //cerr << "Encoding Cost:" << EncodingCost << endl << flush; if((Layer->owner()->layer(0)->num_in() == Layer->num_out()) && !(Layer->output_interface()->is_hidden())) { const OneChain* chain=Layer->owner()->net_activation()->chain(); const float * InputProbs = chain->probs_for_layer(0,rec->position()); //float * InputCounts = chain->counts_for_layer(0,rec->position()); chain->counts_for_layer(0,rec->position()); if(Layer->output_interface()->train_to_unique()) { int correct_out = chain->correct_value(Layer->layer_number()+1,rec->position()); IdentityCost += rec->identity_cost(InputProbs,correct_out); } else if(Layer->output_interface()->is_TrainTo()) { const float * out_weights = chain->probs_for_layer(Layer->layer_number()+1,rec->position()); IdentityCost += rec->identity_cost(InputProbs,out_weights, Layer->num_out()); //cerr << "Input Counts::" << endl << flush; //for(int o=0; onum_out(); o++) // { //cerr << o << ": " << InputCounts[o] << endl << flush; //} //cerr << "Input Probs::" << endl << flush; //for(int o=0; onum_out(); o++) //{ // cerr << o << ": " << InputProbs[o] << endl << flush; //} //cerr << "Output::" << endl << flush; //for(int o=0; onum_out(); o++) //{ //cerr << o << ": " << out_weights[o] << endl << flush; //} } //cerr << "Trials weight is: " << TrialsWeight << endl << flush; //cerr << "Identity Cost is :"<< identity_cost()<< endl<< endl<< flush; } WeightOfMax += rec->most_out_weight(); BucketedWeightOfMax[index] += rec->most_out_weight(); TrialsWeight += rec->out_sum(); BucketedTrialsWeight[index] += rec->out_sum(); PredictCount ++; for(int o=num_out-1; o>=0; o--) { double sum=rec->sums()[o]; double sum_no_bias = sum-Layer->bias(o); double sum_no_bias2 = sum_no_bias*sum_no_bias; SumSum2 += sum*sum; SumSumBias2 += sum_no_bias2; double Sum2MinusDes=sum_no_bias2 - Layer->DesiredSq; SumSum2MinusDes2 += Sum2MinusDes * Sum2MinusDes; } } void QualityRecord::print_unit_usage(ostream &to) const { int num_out=Layer->num_out(); int next_num_out=NextTrainableLayer->num_out(); const InterfaceDescription *out_int=Layer->output_interface(); const InterfaceDescription *next_out_int=NextTrainableLayer->output_interface(); to << "Layer: " << Layer->layer_number() << " " << Layer->name() << endl; for (int i=0; iunit_name(i); } to << endl << "E[Phat(col)] = " << endl; to << IOSFIXED << std::setprecision(5); for (int i=0; i0) to << std::setw(8) << SumPhatiPj[i][j]/SumPj[j]; else to << " -----"; } to << endl; } to << endl << endl; to << "E[Phat(row)*Phat(col)] / E[Phat(row)]*E[Phat(col)] = " << endl; for (int i=0; inum_out(); int next_num_out = NextTrainableLayer->num_out(); int next_trainable_layer_number = NextTrainableLayer->layer_number(); NetActivation *NetAct=Layer->owner()->net_activation(); int length = chain->num_cols(); int this_lay_num = Layer->layer_number(); int next_trainable_lay_num = NextTrainableLayer->layer_number(); float * tmp_desired_probs = NULL; if(NextTrainableLayer->output_interface()->train_to_unique()) { tmp_desired_probs = new float [next_num_out]; } const float* desired_probs= tmp_desired_probs; // Begin collection of data here for (int pos=0; posrecord(this_lay_num,pos)->probs(); //const float *next_probs = NetAct->record(next_trainable_lay_num,pos)->probs(); NetAct->record(next_trainable_lay_num,pos)->probs(); if(!NextTrainableLayer->output_interface()->train_to_unique()) { desired_probs = chain->probs_for_layer(next_trainable_layer_number+1,pos); } else { int correct_out; correct_out = chain->correct_value(next_trainable_layer_number+1,pos); for (int nextj=0; nextjnum_out()-1; o>=0; o--) { double bias = Layer->bias(o); sum_bias2 += bias*bias; } return sqrt(sum_bias2 / Layer->num_out()); } double QualityRecord::avg_gain(void) const { double sum=0; for(int o=Layer->num_out()-1; o>=0; o--) { sum += Layer->gain(o); } return sum/ Layer->num_out(); } double QualityRecord::avg_pseudo(void) const { double sum=0; for(int o=Layer->num_out()-1; o>=0; o--) { sum += Layer->pseudo(o); } return sum/ Layer->num_out(); } double QualityRecord::rms_weight(void) const { double sum_weight2=0.0; for(int o=Layer->num_out()-1; o>=0; o--) { for(int w=Layer->num_wind()-1; w>=0; w--) { for(int i=Layer->num_in()-1; i>=0; i--) { double weight = Layer->weight(i,w,o); sum_weight2 += weight*weight; } } } return sqrt(sum_weight2 / (Layer->num_out()*Layer->num_wind()*Layer->num_in())); } void QualityRecord::addSOV(int n_aa, const short int *osec, const short int *psec, const SOVparameters &pdata) { int num_indices = Layer->num_out(); for (int j=0; j<=num_indices; j++) { SOVsums[j] += sov(n_aa,num_indices,osec,psec,j,pdata); } NumSummed++; } void QualityRecord::print_header(ostream &s) { s << " out_bits IDBits Q" << IOSLEFT << std::setw(2) << Layer->output_interface()->num_units() << IOSRIGHT << " SOV object count "; const InterfaceDescription* out_ifd = Layer->output_interface(); if(out_ifd->train_to_unique()) { for (int o=0; o< Layer->num_out(); o++) { s << "SOV_" << IOSLEFT << std::setw(4) << out_ifd->unit_name(o) << IOSRIGHT; } } // s << " rms_nobias" // << " avg_pseudo rms_bias rms_w avg_gain" // << " WghtRt PseudoRt BiasRt GainRt" s << endl; } ostream& operator << (ostream& s, const QualityRecord& qr) { if (! qr.Layer->output_interface()->is_hidden()) { double ec = qr.encoding_cost()*M_LOG2E; double nc = qr.null_cost()*M_LOG2E; double ic = qr.identity_cost()*M_LOG2E; double q3 = qr.q(); double obj = qr.objective(); //double weightrate = qr.Layer->weight_rate(); //double pseudorate = qr.Layer->pseudo_rate(); //double biasrate = qr.Layer->bias_rate(); //double gainrate = qr.Layer->gain_rate(); qr.Layer->weight_rate(); qr.Layer->pseudo_rate(); qr.Layer->bias_rate(); qr.Layer->gain_rate(); long oldprecision = s.precision(4); s << IOSFIXED; s << std::setw(7) << nc-ec; if (ic>0) s << std::setw(7) << nc-ic; else s << " -------"; s << ' ' << std::setw(7) << q3 << ' ' << std::setw(7) << qr.SOV() << ' ' << std::setw(7) << obj << ' ' << std::setprecision(0) << std::setw(7) << qr.TrialsWeight << std::setprecision(4); if(qr.Layer->output_interface()->train_to_unique()) { for (int o=1; o<= qr.Layer->num_out(); o++) s << ' ' << std::setw(7) << qr.SOV(o); } s << std::setprecision(oldprecision); } // s << form(" %7.4f", qr.rms_sum_bias()) // << form(" %7.4f %7.4f %7.4f %7.4g", // qr.avg_pseudo(), qr.rms_bias(), qr.rms_weight(), qr.avg_gain()) // << form(" %7.4g %7.4g %7.4g %7.4g", // weightrate, pseudorate, biasrate, gainrate) s << endl; return s; } // sov - evaluate SSp by the Segment OVerlap quantity (SOV) // Input: secondary structure segments float QualityRecord::sov(int n_aa, int n_indices, const short int *sss1, const short int *sss2, int sov_what, const SOVparameters &pdata) { // if sov_what==0, then count anything in [0..n_indices-1] // otherwise just count for sov_what-1 int low_end= sov_what==0? 0: sov_what-1; int hi_end= sov_what==0? n_indices-1: low_end; // "n" counts the number of sss1 elements that matter int n=0; double s=0.0; // accumulate sum here // index i goes through sss1 for (int i=0; ihi_end) { continue; } // segment in sss1 is [beg_s1..end_s1] and has length1 pieces int beg_s1=i; while(i=beg_s1 && beg_s2<=end_s1) { // segments overlap // Count all residues from segment1 repeatedly if sov_method==1 if (multiple>0 && pdata.sov_method==SOVparameters::SEG1REPEAT) { n+=length1; } multiple++; // j1 is the later beginning of a segment // j2 is the earlier begining of a segment int j1 = (beg_s1>beg_s2)? beg_s1: beg_s2; int j2 = (beg_s1>beg_s2)? beg_s2: beg_s1; // k1 is the earlier ending of a segment // k2 is the later ending of a segment int k1 = (end_s1>end_s2)? end_s2: end_s1; int k2 = (end_s1>end_s2)? end_s1: end_s2; // minov is the length of the region that both segments cover // maxov is the length of the region that either covers int minov=k1-j1+1; int maxov=k2-j2+1; // d1 and d2 are scaled versions of the segment lengths int d1= static_cast(floor(length1*pdata.sov_delta_s)); int d2= static_cast(floor(length2*pdata.sov_delta_s)); int d; // set d to the minimum of d1, d2, minov, maxov-minov // (unless sov_method==SEG1ONCE, in which case omit d2) if(d1>d2) d=d2; if(d1<=d2 || pdata.sov_method==SOVparameters::SEG1ONCE) d=d1; if(d>minov) {d=minov;} if(d>maxov-minov) {d=maxov-minov;} double x= (minov + pdata.sov_delta*d) *length1; if (maxov>0) {s += x/maxov;} else { cerr << "\n ERROR! minov = " << IOSLEFT << std::setw(4) << minov << " maxov = " << std::setw(4) << maxov << " length = " << std::setw(4) << length1 << " d = " << std::setw(4) << d << ' ' << IOSRIGHT << std::setw(4) << beg_s1+1 << ' ' << std::setw(4) << end_s1+1 << " " << std::setw(4) << beg_s2+1 << ' ' << std::setw(4) << end_s2+1; } if(pdata.sov_out==2) { cerr << "\n TEST: minov = " << IOSLEFT << std::setw(4) << minov << " maxov = " << std::setw(4) << maxov << " length = " << std::setw(4) << length1 << " d = " << std::setw(4) << d << ' ' << IOSRIGHT << std::setw(4) << beg_s1+1 << ' ' << std::setw(4) << end_s1+1 << " " << std::setw(4) << beg_s2+1 << ' ' << std::setw(4) << end_s2+1; } } } } if(pdata.sov_out==2) { cerr << "\n TEST: Number of considered residues = " << n; } return n>0? s/n: 1.0; } // print RDB file for Q as a function of Phat (from Bucketed...) void QualityRecord::print_Q_vs_Phat(ostream &out) const { out << "Layer\tPhat_low\tQ_no_pseudo\tQ_est\tcorrect\ttotal\n" << "5N\t10N\t10N\t10N\t10N\t10N\n"; int layer = Layer->layer_number(); for (int b=0; b< QualityRecordNumBuckets; b++) { if (BucketedTrialsWeight[b]>0) { out << layer << "\t" << bucket_low(b) << "\t" << BucketedWeightOfMax[b] /BucketedTrialsWeight[b] << "\t" << q(b) << "\t" << BucketedWeightOfMax[b] << "\t" << BucketedTrialsWeight[b] << "\n" ; } } } // CHANGE LOG: // 13 May 1998 Kevin Karplus // Added AlphabetTuple printing to SOV output. // 14 May 1998 Kevin Karplus // Cleaned up some very ugly and inefficient code in SOV computation // 29 May 1998 Kevin Karplus // added missing parentheses in computing // SumPhatiPhatj[k][l] = (SumPhatiPhatj[k][l]/PredictCount) / // (SumPhati[k]*SumPhati[l]); // 25 July 1998 Kevin Karplus // Added Bucketed... members. // Moved construct/destructor to .cc file // Improved slightly the parameterization of SOV method. // Added print_Q_vs_Phat // 15 Septemebr 1999 Sugato Basu // Added code to compute and report IdentityCost // 15 September 1999 Sugato Basu // Costs are now reported in BITS in the quality report, // though still internally represented in NATS // 6 Nov 1999 Kevin Karplus // Fixed error in printing objective function---it was previously // scaled by M_LOG2E, though it is not in units of bits or nats. // 8 Jan 2001 Kevin Karplus // Changed bits output to bits-saved (null cost minus encoding cost) // Fixed bug in computing SumPj. // 10 Oct 2001 Kevin Karplus // Fixed known bug in computing unit usage for prob vector output. // 18 August 2003 George Shackelford // Replaced deprecated form(...) function // Sat Jun 18 22:22:29 PDT 2005 Kevin Karplus // Added NullCost computation // Thu Jul 7 04:42:49 PDT 2005 Kevin Karplus // Added extra space in print_header() // Sat Jul 9 11:23:47 PDT 2005 Kevin Karplus // changed "Bits_saved" to "out_bits" in print_header(). // Fri Jul 22 14:39:12 PDT 2005 Kevin Karplus // Made several float* into const float* for change in probs_for_layer()