#include #include "DirichletReg.h" #include "OrderSort/OrderSort.h"// for comment on symbol frequency in printout #include "Input/Input.h" #include "LogGamma/LogGamma.h" #include "EqualStrings/EqualStrings.h" #include "Utilities/log2.h" #include "Utilities/minmax.h" #include "Utilities/abs.h" // for abs // To Do: // 2) Turn off extra caching, now that the LogGamma function is // working better? Maybe not--the extra caching only involves // log, not lgamma calls, and is pretty cheap. static NamedClass* create_DirichletReg(void) {return new DirichletReg;} IdObject DirichletReg::ID("DirichletReg", create_DirichletReg, Regularizer::init_is_a, "a regularizer based on a Dirichlet mixture. \nEach component of the mixture is a set of pseudocounts defining a \nDirichlet prior on the possible distributions of letters. \nThe mixture coefficients specify the probability associated with each \ndifferent set of psuedocounts. \nNote that a single-component Dirichlet mixture is equivalent to a \nsimple pseudocount regularizer." ); NameToPtr* DirichletReg::CommandTable=0; // Pre-compute gamma(z_c(i) + s) for s=0, 1, 2, ... , MaxPlus #ifdef DirichletReg_No_Extra_Caching const int DirichletReg::MaxPlus = 0; #else const int DirichletReg::MaxPlus = 30; #endif void DirichletReg::clear(void) { FreezeComp = 0; FreezeMix = 0; NumComp=AllocComp=0; MixtureCoeff=0; Components=0; } DirichletReg::DirichletReg(const MLPReg &dir) : Regularizer(dir.alphabet_tuple(),dir.name()) { clear(); alloc(1); AddComponent(1, dir.pseudocounts()); } DirichletReg::DirichletReg(const MLZReg &dir) : Regularizer(dir.alphabet_tuple(),dir.name()) { clear(); alloc(1); float *counts = new float[alphabet_size()]; for (int i=alphabet_size()-1; i>=0; i--) counts[i] = dir.zero_offset(); AddComponent(1, counts); delete [] counts; } // copy constructor DirichletReg::DirichletReg(const DirichletReg &dir) : Regularizer(dir.alphabet_tuple(),dir.name()) { FreezeComp = dir.FreezeComp; FreezeMix = dir.FreezeMix; NumComp=AllocComp=0; MixtureCoeff=0; Components=0; alloc(dir.num_components()); for (int c=0; cset_alphabet_tuple(new AlphabetTuple(* alphabet_tuple())); dir->set_name(name()); dir->clear(); dir->alloc(num_components()); component_probs(TrainCounts, SumCounts, CompProbs); float *new_comp = new float[alphabet_size()]; for (int c=0; c=0; j--) new_comp[j] = comp[j] + TrainCounts[j]; dir->AddComponent(CompProbs[c], new_comp); } delete [] new_comp; return dir; } // return 1 if x is a small integer, and set ix to that integer // (ix undefined if x not a small integer) inline int DirichletReg::is_small_int(float x, int &ix) { if (x==0) { ix=0; // short-cut for most common case return 1; } #ifndef DirichletReg_No_Extra_Caching const float MAX_ERROR=0.005; // allow for slight error float fl = floor(x); if (x-fl < MAX_ERROR) { ix = (int) fl; return (ix>= 0 && ix <=MaxPlus); } else if (x-fl > 1.-MAX_ERROR) { ix = (int) fl +1; return (ix>= 0 && ix <=MaxPlus); } #endif return 0; } // Note: if c==NumComp, then adding new component, not updating old one void DirichletReg::set_component(int c, int lett, float z) { assert(0<=lett && lett0) { LogBetaComp[c] += LogGamma(SumComponents[c]); } SumComponents[c] += diff; if (SumComponents[c] < 1e-4 || (-1e-5 *diff < z && z < 1e-5*diff)) { // possible accumulated round-off error, re-sum double sum=0., log_beta=0.; // cerr << "Setting component " << c << " letter " << lett // << " triggers re-computing sum\n" // << "Old SumComponents= " << SumComponents[c] // << " LogBetaComp= " << LogBetaComp[c] // << "\n" // << flush; for (int i=alphabet_size()-1; i>=0; i--) { double comp = component(c)[i]; sum += comp; // cerr << i << " sum= " << sum << "\n" << flush; // don't add in the uninitialized lgamma_components if (comp > 0) log_beta += lgamma_component(c,i); // cerr << i << " log_beta= " << log_beta << "\n" << flush; } // cerr << "New SumComponents is " << sum << "\n" << flush; SumComponents[c] = sum; LogBetaComp[c] = log_beta; } LogGamma_derivs(SumComponents[c], lgamma_tmp, Lgamma1Sum[c], Lgamma2Sum[c]); LogBetaComp[c] -= lgamma_tmp; } void DirichletReg::scale_component(int i, float multiplier) { for(int letter= alphabet_size()-1; letter>=0; letter--) set_component(i, letter, multiplier*component(i)[letter]); } void DirichletReg::delete_component(int i) { assert(0<=i && i < NumComp); // cerr << " Deleting component " << i << " from " << name() << " .\n"; if (i < NumComp) { const float *comp = component(NumComp -1); for (int letter= alphabet_size()-1; letter>=0; letter--) set_component(i, letter, comp[letter]); set_mixture(i, MixtureCoeff[NumComp-1]); } NumComp --; } int DirichletReg::num_parameters(void) const { return FreezeComp? NumComp: FreezeMix? (alphabet_size()*NumComp): (alphabet_size()+1)*NumComp; } float DirichletReg::min_parameter(int i) const { return 0; // let things disappear! } float DirichletReg::max_parameter(int i) const { if (FreezeComp || (!FreezeMix && i % (alphabet_size()+1) == alphabet_size())) return 1.e4; // max mixture coeff // allow some room--normalize() will rescale else return 1.e5; // max pseudocount } float DirichletReg::parameter(int i) const { if (FreezeComp) return MixtureCoeff[i]; int row_size = alphabet_size()+ (FreezeMix?0:1); int which_dist=i/row_size; int subscript=i-which_dist*row_size; return subscript==alphabet_size()? MixtureCoeff[which_dist]: component(which_dist)[subscript]; } void DirichletReg::set_parameter(int i, float p) { assert(p >= min_parameter(i)); assert(p <= max_parameter(i)); if (FreezeComp) { MixtureCoeff[i] = p; return; } int row_size = alphabet_size()+ (FreezeMix?0:1); int which_dist=i/row_size; int subscript=i-which_dist*row_size; if (subscript==alphabet_size()) MixtureCoeff[which_dist]= p; else set_component(which_dist,subscript,p); } // Make mixture coefficients sum to 1. // Make components have no element less than 1.e-6. // Recompute SumComponents and LogBeta to avoid roundoff errors. void DirichletReg::normalize(void) { double sum= 0.; int c; for (c=NumComp-1; c>=0; c--) sum += MixtureCoeff[c]; for (c=NumComp-1; c>=0; c--) { MixtureCoeff[c] /= sum; float min_of_component=1.e30; int i; for (i= alphabet_size()-1; i>=0; i--) { min_of_component= min(component(c)[i], min_of_component); } if (min_of_component < 1.e-6) { float offset = 1e-5 - min_of_component; for (i= alphabet_size()-1; i>=0; i--) set_component(c,i, component(c)[i]+offset); } double log_beta=0.; double sum_c=0; for (i=alphabet_size()-1; i>=0; i--) { sum_c += component(c)[i]; log_beta += lgamma_component(c,i); } SumComponents[c] = sum_c; double lgamma_tmp; LogGamma_derivs(sum_c, lgamma_tmp, Lgamma1Sum[c], Lgamma2Sum[c]); log_beta -= lgamma_tmp; LogBetaComp[c] = log_beta; } } static int ReadAlphaChar(istream &in, Regularizer *change, RegInputCommand* self) { int NumChar; in >> NumChar; if (NumChar != change->alphabet_size()) { cerr << "Number of characters " << NumChar << " from " << self->name() << " command, doesn't match " << change->alphabet_size() << " for " << *(change->alphabet_tuple()) << "\n" << flush; } return 1; } static int ReadNumDistr(istream &in, Regularizer *change, RegInputCommand* self) { int NumDistr; in >> NumDistr; dynamic_cast(change)->alloc(NumDistr+5); // leave some room to grow return 1; } static int ReadNumber(istream &in, Regularizer *change, RegInputCommand* self) { DirichletReg *g = dynamic_cast(change); int Number; in >> Number; if (Number != g->num_components()) { cerr << "Warning: reading " << change->type()->name() << " " << g->name() << " " << self->name() << " = " << Number << ", but " << g->num_components() << " already defined.\n" << flush; } return 1; } static float MixtureCoeffSaved= -1.0; // location for saving mixture coeff between Mixture and Alpha // commands // -1 is sentinel to indicate no mixture coeff read static int ReadMixture(istream &in, Regularizer *change, RegInputCommand* self) { in >> MixtureCoeffSaved; return 1; } static int ReadAlpha(istream &in, Regularizer *change, RegInputCommand* self) { DirichletReg *g = dynamic_cast(change); if (MixtureCoeffSaved <=0) { cerr << "Warning: reading " << change->type()->name() << " " << g->name() << " for component " << g->num_components() << " need Mixture command before " << self->name() << "\n"; cerr << "Assuming Mixture=1\n"; MixtureCoeffSaved = 1; } double sum_alpha; float *comp = new float[g->alphabet_size()]; const int* order=g->input_order(); in >> sum_alpha; for (int i=0; ialphabet_size(); i ++) { in >> comp[order? order[i]: i]; } g->AddComponent(MixtureCoeffSaved, comp); double sum_c = g->sum_component(g->num_components()-1); if (kk::abs(sum_c - sum_alpha) > 1.e-5* sum_c) { cerr << "Warning: reading " << change->type()->name() << " " << g->name() << " for component " << g->num_components()-1 << "\n" << " first argument of " << self->name() << " was " << sum_alpha << " instead of " << sum_c << "\n"; } MixtureCoeffSaved = -1; delete [] comp; return 1; } void DirichletReg::init_command_table() { assert(!CommandTable); CommandTable = new NameToPtr(13); CommandTable->ignore_case(); CommandTable->AddName(new RegInputCommand("AlphaChar", ReadAlphaChar)); CommandTable->AddName(new RegInputCommand("NumDistr", ReadNumDistr)); CommandTable->AddName(new RegInputCommand("Number", ReadNumber)); CommandTable->AddName(new RegInputCommand("Mixture", ReadMixture)); CommandTable->AddName(new RegInputCommand("Alpha", ReadAlpha)); CommandTable->AddName(new RegInputCommand("FullUpdate", ReadComment)); CommandTable->AddName(new RegInputCommand("QUpdate", ReadComment)); CommandTable->AddName(new RegInputCommand("StructID", ReadComment)); } void DirichletReg::AddComponent(float MixCoeff,const float*comp) { // cerr << "Adding component to " << name() // << " with MixCoeff= " << MixCoeff << "\n" << flush; if (NumComp>=AllocComp) { // make room for 10 more float* old_comp = Components; Components=0; float* old_mix = MixtureCoeff; MixtureCoeff=0; int old_num=NumComp; delete_all(); alloc(old_num+10); for (int i=0; i=0; lett--) { component(c)[lett] = 0.; } for (lett=alphabet_size()-1; lett>=0; lett--) set_component(c,lett,comp[lett]); NumComp++; // cerr << "Sum of component=" << SumComponents[c] << "\n" << flush; } void DirichletReg::print_ordered_component(ostream &out, int c) const { // Get estimated background probabilities for // adding comments float *probs = new float[alphabet_size()]; int letter; for (letter=0; letter< alphabet_size(); letter++) probs[letter] = NumComp==1? 1.: 0.; if (NumComp >1) { for (int d=0; dprint_unindex(out,low_cost_first[ord]); if (ord+1 < alphabet_size()) { // put out separator symbol(s) out << " "; float f = cost[low_cost_first[ord]]; float fnext = cost[low_cost_first[ord+1]]; int trunc_f = static_cast(f); int trunc_fnext = static_cast(fnext); for (int i=trunc_f ; i <"; trunc_fnext++; // zero crossing produces extra symbol } else if (i trunc_f) out << " "; } } delete [] low_cost_first; delete [] cost; delete [] probs; } void DirichletReg::write_knowing_type(ostream &out) const { Regularizer::write_knowing_type(out); out << "AlphaChar= " << alphabet_size() << "\n" << "NumDistr= " << NumComp <<"\n"; for (int c=0; c= 0; letter--) ex_prob[letter] = share ; } if (ex_prob2) { double share2= share*share; for (int letter = alphabet_size()-1; letter >= 0; letter--) ex_prob2[letter] = share2 ; } return; } component_probs(TrainCounts, SumCounts, CompProbs); int letter; for (letter = alphabet_size()-1; letter >= 0; letter--) X[letter] = 0.; if (ex_prob2) { for (letter = alphabet_size()-1; letter >= 0; letter--) ex_prob2[letter] = 0.; } for (int c=NumComp-1; c>=0; c--) { float *comp=component(c); double weight= CompProbs[c] / (SumCounts+SumComponents[c]); for (letter = alphabet_size()-1; letter >= 0; letter--) X[letter] += weight * (TrainCounts[letter]+comp[letter]); if (ex_prob2) { weight /= (SumCounts+SumComponents[c]+1); for (letter = alphabet_size()-1; letter >= 0; letter--) { double num = (TrainCounts[letter]+comp[letter]); ex_prob2[letter] += weight * num * (num+1); } } } if (ex_prob) { for (letter = alphabet_size()-1; letter >= 0; letter--) ex_prob[letter] = X[letter]; } } void DirichletReg::get_modified_counts( const float* TrainCounts, float* probs) { if (NumComp==0) { double share = 1./alphabet_size(); for (int letter = alphabet_size()-1; letter >= 0; letter--) probs[letter] = X[letter] = share; return; } component_probs(TrainCounts, SumCounts, CompProbs); int alph_size_minus_1=alphabet_size()-1; int letter; for (letter = alphabet_size()-1; letter >= 0; letter--) X[letter] = 0.; for (int c=NumComp-1; c>=0; c--) { register double weight= CompProbs[c] / (SumCounts+SumComponents[c]); register const float*comp = component(c); for (letter = alph_size_minus_1; letter >= 0; letter--) { X[letter] += weight*(TrainCounts[letter] + comp[letter]); } } for (letter = alphabet_size()-1; letter >= 0; letter--) probs[letter] = X[letter]; } // Fill in Lgamma1_Counts so that // lgamma1_counts(c,k)=lgamma1(Counts[k] + component(c)[k]) // and Lgamma1_SumCounts[c] = lgamma1(SumCounts + SumComponents[c]) void DirichletReg::compute_lgamma1(const float *Counts) { for (int c=num_components()-1; c>=0; c--) { Lgamma1_SumCounts[c] = LogGamma_1(SumCounts + SumComponents[c]); for (int k=alphabet_size()-1; k>=0; k--) { if (Counts[k]>0) lgamma1_counts(c,k) = LogGamma_1(Counts[k] +component(c)[k]); else lgamma1_counts(c,k) = lgamma1_comp(c)[k]; } } Lgamma1CountsInvalid=0; } // Fill in Lgamma1_Counts so that // lgamma1_counts(c,k)=lgamma1(Counts[k] + component(c)[k]) // and Lgamma1_SumCounts[c] = lgamma1(SumCounts + SumComponents[c]) // also // lgamma2_counts(c,k)=lgamma2(Counts[k] + component(c)[k]) // and Lgamma2_SumCounts[c] = lgamma2(SumCounts + SumComponents[c]) void DirichletReg::compute_lgamma2(const float *Counts) { for (int c=num_components()-1; c>=0; c--) { double lg; LogGamma_derivs(SumCounts + SumComponents[c], lg, Lgamma1_SumCounts[c], Lgamma2_SumCounts[c]); for (int k=alphabet_size()-1; k>=0; k--) { if (Counts[k]>0) LogGamma_derivs(Counts[k] +component(c)[k], lg, lgamma1_counts(c,k), lgamma2_counts(c,k)); else { lgamma1_counts(c,k) = lgamma1_comp(c)[k]; lgamma2_counts(c,k) = lgamma2_comp(c)[k]; } } } Lgamma1CountsInvalid=0; Lgamma2CountsInvalid=0; } void DirichletReg::partials1(float *part1, int i, const float* Counts) { // X_i = sum_d Prob(d) (Counts[i] + component(d)[i])/ // (SumCounts+SumComponents[d])) // // X'_i = sum_d Prob(d) (component'(d)[i] / (SumCounts+SumComponents[d]) // - SumComponents'[d](Counts[i] + component(d)[i])/ // (SumCounts+SumComponents[d])^2) // + Prob'(d) (Counts[i] + component(d)[i])/ // (SumCounts+SumComponents[d])) // // For derivative with respect to MixtureCoeff[c], // X'_i = sum_d Prob'(d) (Counts[i] + component(d)[i]) // / (SumCounts+SumComponents[d]) // // For derivative w.r.t component(c)[j], // X'_i = Prob(c) (delta(i==j) / (SumCounts+SumComponents[d]) // - (Counts[i] + component(c)[i])/ // (SumCounts+SumComponents[c])^2) // + sum_d Prob'(d) (Counts[i] + component(d)[i])/ // (SumCounts+SumComponents[d])) // Note: derivative of Prob(d) w.r.t. component(c,k) or MixtureCoeff[c] // can be derived as follows: // (letting q_d = MixtureCoeff[d] and alpha_d=component(d)) // Prob(d) = q_d B(alpha_d+s)/B(alpha_d) / // sum_e q_e (B(alpha_e+s)/B(alpha_e)) // = exp(ln q_d + log_beta_over_beta(d)) / // sum_e exp(ln q_e + log_beta_over_beta(e)) // Letting u_e = ln q_e + log_beta_over_beta(e), // we have Prob(d)=exp(u_d)/ sum_e exp(u_e), then // Prob'(d) = exp(u_d) u'_d / sum_e exp(u_e) // - exp(u_d) (sum_e exp(u_e) u'_e) / (sum_e exp(u_e))^2 // = Prob(d) (u'_d - (sum_e exp(u_e) u'_e) / sum_e exp(u_e) ) // = Prob(d) (u'_d - sum_e Prob(e) u'_e) // // Notice that u'_e = 0 for e!= c (where the derivative is with respect // to Mixture coefficient q_c or any part of component c), // so Prob'(d) = Prob(d) (delta(d==c) -Prob(c)) u'_c // sum_d Prob'(d) (Counts[i] + component(d)[i]) // / (SumCounts+SumComponents[d]) // = u'(c) Prob(c) (Counts[i] + component(c)[i]) // / (SumCounts+SumComponents[c]) // - u'(c) Prob(c) sum_d Prob(d) (Counts[i] + component(d)[i]) // / (SumCounts+SumComponents[d]) // = u'(c) Prob(c) [(Counts[i] + component(c)[i]) // / (SumCounts+SumComponents[c]) // - X_i] // u'_c = 1/q_c when taking partial with respect to mixture coeff q_c // u'_c = lgamma1(Counts[j] + component(c)[j]) - lgamma1_comp(c)[j] // -lgamma1(SumCounts+SumComponents[c]) + Lgamma1Sum // for partial with respect to component(c)[j] // So we can simplify the derivatives of X'_i: // // For derivative with respect to MixtureCoeff[c], // X'_i = Prob(c) /MixtureCoeff[c] // [(Counts[i] + component(c)[i]) / (SumCounts+SumComponents[c]) // - X_i] // // For derivative w.r.t component(c)[j], // X'_i = Prob(c) (delta(i==j) / (SumCounts+SumComponents[c]) // - (Counts[i] + component(c)[i])/ // (SumCounts+SumComponents[c])^2) // +Prob(c) // * (lgamma1(Counts[j] + component(c)[j]) - lgamma1_comp(c)[j] // -lgamma1(SumCounts+SumComponents[c]) + Lgamma1Sum) // * [(Counts[i] + component(c)[i])/ (SumCounts+SumComponents[c]) // -X_i] // if (Lgamma1CountsInvalid && num_components()>1) if (Lgamma1CountsInvalid) compute_lgamma1(Counts); int as1=alphabet_size() + (FreezeMix? 0:1); for (int c=NumComp-1; c>=0; c--) { double counts_plus=(Counts[i] + component(c)[i]); double sum_counts_plus = (SumCounts+SumComponents[c]); double normalized_post = counts_plus / sum_counts_plus; double this_minus_x = normalized_post - X[i]; // force this_minus_x to be zero if it is not just because of // bad round-off error if (kk::abs(this_minus_x) < 1.0e-8* normalized_post) this_minus_x=0; double prob_c_this_minus_x = CompProbs[c]*this_minus_x; // first handle the derivatives w.r.t. the mixture coefficients. if (FreezeComp) { part1[c] = prob_c_this_minus_x / MixtureCoeff[c]; continue; } if (!FreezeMix) part1[c*as1+alphabet_size()] =prob_c_this_minus_x/MixtureCoeff[c]; double prob_norm_over_sum = CompProbs[c]*normalized_post/sum_counts_plus; for (int k=alphabet_size()-1; k>=0; k--) { double u_prime_c= lgamma1_counts(c,k) - lgamma1_comp(c)[k] -Lgamma1_SumCounts[c] + Lgamma1Sum[c]; part1[c*as1+k] = prob_c_this_minus_x *u_prime_c - prob_norm_over_sum; } part1[c*as1+i] += CompProbs[c]/sum_counts_plus; } } void DirichletReg::partials2(float *part1, float*part2, int i, const float* Counts) { // Letting u_c = ln q_c + log_beta_over_beta(c), // counts_plus= (Counts[i] + component(c)[i]) // sum_counts_plus = (SumCounts+SumComponents[c]) // normalized_post = counts_plus/sum_counts_plus // For derivative w.r.t. MixtureCoeff[c] or component(c)[j] // Prob'(d) = Prob(d) u'(c) (delta(d==c) - Prob(c)) // In particular, Prob'(c)=Prob(c)(1-Prob(c))u'(c) // for derivative w.r.t. MixtureCoeff[c] // u'(c) = 1/MixtureCoeff[c], and u''(c) = -1/MixtureCoeff[c]^2 // normalized_post' = 0 // // X'_i = Prob(c) u'(c) (normalized_post-X_i) // X''_i = (1-2 Prob(c)) u'(c) X'_i + Prob(c) u''(c) (normalized_post-X_i) // = -2 Prob(c) X'_i / MixtureCoeff[c] // for derivative w.r.t. component[c](j) // normalized_post' = (delta(i==j) - normalized_post)/sum_counts_plus // normalized_post'' = -2 (delta(i==j) - normalized_post)/sum_counts_plus^2 // // X'_i = Prob(c) // [ u'(c) (normalized_post-X_i) + // (delta(i==j) - normalized_post)/sum_counts_plus] // X''_i = Prob'(c)/Prob(c) X'_i // + Prob(c) u''(c) (normalized_post-X_i) // + Prob(c) u'(c)(delta(i==j) - normalized_post)/sum_counts_plus) // - Prob(c) u'(c) X'_i // -2 Prob(c) (delta(i==j)-normalized_post)/ sum_counts_plus ^2 // // X''_i = (1-2 Prob(c))u'(c) X'_i // + Prob(c) u''(c) (normalized_post-X_i) // + Prob(c) (u'(c)-2/sum_counts_plus) // (delta(i==j) - normalized_post)/sum_counts_plus) // // u'(c) = lgamma1(counts_plus) - lgamma1_comp(c)[j] // -lgamma1(sum_counts_plus) + Lgamma1Sum // u''(c) = lgamma2(counts_plus) - lgamma2_comp(c)[j] // -lgamma2(sum_counts_plus) + Lgamma2Sum // if (Lgamma2CountsInvalid && num_components()>1) if (Lgamma2CountsInvalid) compute_lgamma2(Counts); int as1=alphabet_size() + (FreezeMix? 0:1); double *u_prime_c = new double[alphabet_size()]; for (int c=NumComp-1; c>=0; c--) { double counts_plus=(Counts[i] + component(c)[i]); double sum_counts_plus = (SumCounts+SumComponents[c]); double normalized_post = counts_plus / sum_counts_plus; double this_minus_x = normalized_post - X[i]; // force this_minus_x to be zero if it is not just because of // bad round-off error if (kk::abs(this_minus_x) < 1.0e-8* normalized_post) this_minus_x=0; double prob_c_this_minus_x = CompProbs[c]*this_minus_x; // first handle the derivatives w.r.t. the mixture coefficients. if (FreezeComp) { part1[c] = prob_c_this_minus_x / MixtureCoeff[c]; part2[c]= -2 *part1[c] * ( CompProbs[c]/MixtureCoeff[c]); continue; } if (!FreezeMix) { part1[c*as1+alphabet_size()] =prob_c_this_minus_x/MixtureCoeff[c]; part2[c*as1+alphabet_size()] = part1[c*as1+alphabet_size()] * -2 * ( CompProbs[c]/MixtureCoeff[c]); } double prob_norm_over_sum = CompProbs[c]*normalized_post/sum_counts_plus; double Lgamma1Sums = Lgamma1Sum[c]-Lgamma1_SumCounts[c] ; int k; for (k=alphabet_size()-1; k>=0; k--) { u_prime_c[k] = lgamma1_counts(c,k)-lgamma1_comp(c)[k] +Lgamma1Sums; part1[c*as1+k] = prob_c_this_minus_x *u_prime_c[k] - prob_norm_over_sum; } part1[c*as1+i] += CompProbs[c]/sum_counts_plus; double Lgamma2Sums = Lgamma2Sum[c]-Lgamma2_SumCounts[c] ; for (k=alphabet_size()-1; k>=0; k--) { double u_2_c = lgamma2_counts(c,k)-lgamma2_comp(c)[k] +Lgamma2Sums; part2[c*as1+k] = (1-2*CompProbs[c])*part1[c*as1+k] *u_prime_c[k] + u_2_c*prob_c_this_minus_x - prob_norm_over_sum*(u_prime_c[k] - 2/sum_counts_plus); } part2[c*as1+i] += CompProbs[c]*(u_prime_c[i] - 2/sum_counts_plus) /sum_counts_plus; } delete [] u_prime_c; } // compute Beta(counts+z)/Beta(z) for each component // and return the max value for normalizing // double DirichletReg::log_beta_over_beta(const float* TrainCounts, double &SumTrainCounts, double* log_Beta_Beta) const { double max_log_Beta_Beta= -1.e99; int alph_size = alphabet_size(); int gamma_component_row_size = alph_size*(MaxPlus+1); double *lgamma_component_base = LogGammaComp + NumComp*gamma_component_row_size; for (short int c=NumComp-1; c>=0; c--) { lgamma_component_base -= gamma_component_row_size; register double log_beta_beta = -LogGamma(SumTrainCounts+SumComponents[c]); register const float * comp = component(c); for(int letter=alph_size-1; letter>=0; letter--) { int icount; double tc = TrainCounts[letter]; if (is_small_int(tc, icount)) { log_beta_beta += lgamma_component_base[(MaxPlus+1)*letter+icount]; } else { log_beta_beta += LogGamma(tc + comp[letter]); } } log_beta_beta -= LogBetaComp[c]; if (log_beta_beta > max_log_Beta_Beta) max_log_Beta_Beta = log_beta_beta; log_Beta_Beta[c] = log_beta_beta; } return max_log_Beta_Beta; } // Return the probability for each component given the sample. // comp_probs should be pre-allocated with num_components() floats. // // probability of component c computed by normalizing // mixture_coeff(c) * Prob(TrainCounts | component(c)) // (TrainCounts treated as the summary of a list of characters---no // correction for permutations) // // optionally (in *log_sum) // return ln sum_c mixture_coeff(c) * Prob(TrainCounts | component(c)) // void DirichletReg::component_probs(const float* TrainCounts, double& SumTrainCounts, double* probs, double* log_sum) { Lgamma1CountsInvalid=1; // the derivatives are not computed yet Lgamma2CountsInvalid=1; SumTrainCounts=0.; for (int letter=alphabet_size()-1; letter>=0; letter--) SumTrainCounts += TrainCounts[letter]; if (NumComp==1 && !log_sum) { probs[0] =1; // speed up for single component return ; } double *sw = new double[NumComp]; // fill sw[c] with log Beta(comp+counts)/Beta(comp) double max_sw = log_beta_over_beta(TrainCounts, SumTrainCounts, sw); // fill comp_prob[c] // with a scaled version of Beta(comp+counts)/Beta(comp) // multiplied by the existing mixture coefficient double sum=0; int c; for (c=NumComp-1; c>=0; c--) { probs[c] = exp(sw[c]-max_sw) * MixtureCoeff[c]; sum += probs[c]; } // normalize to sum to one double scale = 1./sum; for (c=NumComp-1; c>=0; c--) probs[c] *= scale; if (log_sum) (*log_sum) = max_sw + log(sum); delete [] sw; } // return the natural log of the probability of the count vector // along with the partial derivatives of the natural log (if arrays provided) // Note: second derivative only taken with respect to same parameter twice, // not off-diagonal. // double DirichletReg::log_probability(const float* TrainCounts, float *deriv1, float*deriv2) { double log_prob; component_probs(TrainCounts,SumCounts, CompProbs, &log_prob); if (SumCounts<=0) { if (deriv1) { for (int i=num_parameters()-1; i>=0; i--) deriv1[i] = 0; } if (deriv2) { for (int i=num_parameters()-2; i>=0; i--) deriv2[i] = 0; } return 0; // Probability of the 0 vector is always 1 } int c; double sum_mix = 0.; // sum of mixture coefficients for (c=num_components()-1; c>=0; c--) sum_mix += MixtureCoeff[c]; log_prob -= log(sum_mix); if (!deriv1 && !deriv2) return log_prob; if (deriv2) compute_lgamma2(TrainCounts); else if (deriv1) compute_lgamma1(TrainCounts); int as1=alphabet_size() + (FreezeMix? 0:1); for (c=num_components()-1; c>=0; c--) { // which parameter is the mixture coefficient int mix_subscript = FreezeComp? c : (as1*c + alphabet_size()); if (!FreezeMix) { double mult = CompProbs[c]/MixtureCoeff[c]; if (deriv1) deriv1[mix_subscript] = mult - 1/sum_mix; if (deriv2) deriv2[mix_subscript] = 1/(sum_mix*sum_mix) -mult*mult; } if (FreezeComp) continue; float *deriv1c = deriv1+c*as1; // beginning of row of param float *deriv2c = deriv2+c*as1; double Lgamma1Sums = Lgamma1Sum[c]-Lgamma1_SumCounts[c] ; double Lgamma2Sums = deriv2? (Lgamma2Sum[c]-Lgamma2_SumCounts[c]) : 0; for (int j=alphabet_size()-1; j>=0; j--) { double lgm1 = lgamma1_counts(c,j)-lgamma1_comp(c)[j] +Lgamma1Sums; if (deriv1) deriv1c[j] = CompProbs[c] * lgm1; if (deriv2) { double lgm2 = lgamma2_counts(c,j)-lgamma2_comp(c)[j] +Lgamma2Sums; deriv2c[j] = CompProbs[c] * (lgm1*lgm1*(1-CompProbs[c])+lgm2); } } } return log_prob; } int DirichletReg::verify_log_prob_partials(ostream &logfile, const float*TrainCounts, float tolerance) { int ret=1; // return value, set to 0 if bad partial found float *part1 = new float[num_parameters()]; float *part2 = new float[num_parameters()]; double log_prob = log_probability(TrainCounts, part1, part2); for (int p=num_parameters()-1; p>=0; p--) { float save=parameter(p); double delta = save< 0.1? kk::abs(save*0.1) : kk::abs(save*0.01); set_parameter(p, save+delta); double log_prob_plus = log_probability(TrainCounts); set_parameter(p, save-delta); double log_prob_minus = log_probability(TrainCounts); set_parameter(p, save); double diff= 0.5*(log_prob_plus - log_prob_minus); double loosened_tolerance = max(static_cast(tolerance), 1.e-6 *kk::abs(log_prob / diff)); double deriv = diff/delta; double abserr = kk::abs(deriv - part1[p]); if (abserr > loosened_tolerance && kk::abs(abserr/ deriv) > loosened_tolerance) { logfile << "First log_prob partial outside tolerance " << loosened_tolerance << "\n for " << type()->name() << " " << name() << "\n parameter " << p << " = " << save << "\n"; logfile << "counts="; for (int j=0; j(tolerance), 1.e-6*kk::abs(log_prob/diff2)); double deriv2 = diff2/(delta*delta); double abserr2 = kk::abs(deriv2 - part2[p]); if (abserr2 > loosened_tolerance && kk::abs(abserr2/ deriv2) > loosened_tolerance) { logfile << "Second log_prob partial outside tolerance " << loosened_tolerance << "\n for " << type()->name() << " " << name() << "\n parameter " << p << " = " << save << "\n"; logfile << "counts="; for (int j=0; j=0. // 9 April 2004 Kevin Karplus // Changed include because minmax.h and abs.h moved to Utilities/. // Thu Jan 26 15:26:07 PST 2006 Kevin Karplus // Switch abs back to kk::abs, since g++ 3.4.4 gets confused by abs.