// PredictMix.cc
// copyright 29 July 1997 Kevin Karplus

#include "PredictMix.h"
#include <math.h> 

const float REALBIGCOST = 1.0e33;
// This file is for plug-in functions to add to a NeuralNet
// to make it predict mixture coefficients for a Dirichlet mixture
// in each position.

// The objective function is to minimize the encoding cost of the 
// observed counts (or weighted counts) given the inputs to the network
// (usually single template residues, but possibly an alignment of
// a subfamily).

// Replace the mixture coefficients of dir with mix (which should have
//	dir->num_components() entries.
// then compute the encoding cost of the out_counts using
// the mean posterior estimate of the modified mixture given the in_counts

double cost_mix_output(const float *mix,
	const DirichletReg *dir, 
	const float *in_counts,
	const float *out_probs)
{
    // copy dir and reset the mixture coefficients
    DirichletReg d (*dir);
    for (int c=d.num_components()-1; c>=0; c--)
	d.set_mixture(c, mix[c]);

    float *Probs = new float[d.alphabet_size()];
    float cost= d.encodingCostForColumnCounts(out_probs, in_counts, Probs);

    for(int i = d.alphabet_size()-1; i >= 0; i--)
    {   
	if (out_probs[i]==0.) continue;
        if (out_probs[i] <=0.) return(REALBIGCOST);
	 cost += out_probs[i] * log(out_probs[i]) * M_LOG2E;
    }

    // Cost changed to return the difference of encoding cost over the optimal
    // cost

    delete [] Probs;
    return cost;
}

// Compute the partials of cost_mix_output with respect to mix.
// The output array partials should have dir->num_components() elements.
void partials_of_cost_wrt_mix(double *partials,
	const float* mix,
	const DirichletReg *dir, 
	const float *in_counts,
	const float *out_counts)
{
    // copy dir and reset the mixture coefficients
    DirichletReg d (*dir);
    int c;	// counter for components
    int i;	// counter for letters of output alphabet
    int alph_size = d.alphabet_size();
    
    for (c=d.num_components()-1; c>=0; c--)
	d.set_mixture(c, mix[c]);
    
    float *Probs = new float[alph_size];
    d.get_probs(in_counts, Probs);

    const double *ComponentProbs = d.component_probs();
    double SumInCounts=0;
    for (i=alph_size-1; i>=0; i--)
	SumInCounts += in_counts[i];
    
    // now partial of Probs[i] w.r.t. mix[c] is
    // ComponentProbs[c] / mix[c]
    //	* (  (in_counts[i]+d.component(c,i)) / (SumInCounts+d.sum_component(c))
    //			- Probs[i])
    
    // partial of encoding cost w.r.t. mix[c] is
    // - sum_i out_counts[i] 
    //		* ComponentProbs[c] / mix[c]
    //		* (  (in_counts[i]+d.component(c,i)) 
    //			/ (SumInCounts+d.sum_component(c)
    //			/ Probs[i]
    //		  -1 )
    
    for (c=d.num_components()-1; c>=0; c--)
    {   double scale = ComponentProbs[c] / mix[c];
	partials[c] = 0.;
	double SumInPlus = SumInCounts+d.sum_component(c);
	for (i=alph_size-1; i>=0; i--)
	{   if (out_counts[i] == 0.) continue; 
	    partials[c] -=	out_counts[i] * scale *
		((in_counts[i]+d.component(c,i)) / (SumInPlus*Probs[i])
		 -1.0);
	}
    }
    delete [] Probs;        
}

// CHANGE LOG:
// 15 September 1999 Sugato Basu
//      Cost changed to return the difference of encoding cost over the 
//      optimal cost