// ActivationRecord.h
// copyright 28 July 1997 Kevin Karplus

#ifndef ActivationRecord_H
#define ActivationRecord_H

#include "NeuralLayer.h"

class ActivationRecord
{    
    // In a NetActivation, there is one ActivationRecord for each
    // column of each layer. The number of columns of a layer equals
    // the overhangs for the layer plus the number of columns of 
    // the OneChain being activated. This is called the layer length.
    // 
    // Each activation record has the vector of output probabilities
    // for the given layer and column. When it computes these probabilities
    // it uses a window of inputs, which is a (small) subset of the
    // layer length of the previous layer.

    // For a more complete description of the architecture of a layer,
    // including the calculation of the output vector, see NeuralLayer.h

    const float **In;	// what are the inputs (not owned by this)
    float *Probs;	// normalized output probabilities
			// owned by this
    
    const NeuralLayer * Layer;	// activation record for which layer?
    int MyPosition;     // position in the range [0, layer length)

    double *Sums;	// sum of gain*weight*in + bias for each output
    double *ExpSums;	// exp(Sums)+Pseudo
    double InvSumExpSums; // 1.0 / sum of ExpSums

    // The following parameters are set when the cost of desired outputs
    // is computed.
    double NullCost;	// encoding cost in nats of outputs,
			// given BackgroundProbs: sum out_weights[o] * log(BackgroundProbs[o])

    double Cost;	// encoding cost in nats of outputs,
			// given Probs: sum out_weights[o] * log(Probs[o])
    double OutSum;	// sum of weights of all outputs
    
    int MostProbable;	// which output has highest predicted probability
    double PhatMostProbable;	// predicted probability of MostProbable
    double MostOutWeight; // weight in output vector for most probable 
			// single character
    
    // The following are used for backpropagation.
    double *Partials;	// partials of error function with respect to Probs[o]
    double *Share;	// partials of error function with respect to Pseudo[o]
    
    void clear(void);	// clear previous uses, but leave allocated
	
    public:

	ActivationRecord(const NeuralLayer *lay);
	~ActivationRecord(void);

	inline void tell_position(int pos) {MyPosition = pos;}
	inline int position(void) const	{return MyPosition;}

	inline const NeuralLayer * layer(void) const {return Layer;}

	inline const float **in(void)                const {return In;}
	inline const float * in(int wind)            const {return In[wind];}
	inline const float   in(int wind, int input) const {return In[wind][input];}
	
	// apply the layer to a set of inputs
	// caching probs, sums, and exp(sums)+pseudo for later use.
	void compute_probs(const float** inputs);
	
	// what would layer return if all inputs were 0?
	void compute_dummy_probs(void);

	int highest_prob_output(void) const;
	
	// return cached arrays.
	inline const float* probs(void) const {return Probs;}
	inline const double* sums(void) const {return Sums;}
	inline const double* exp_sums(void) const {return ExpSums;}
	
	// partials is the partial derivatives of the error function w.r.t.
	// Probs[o]---it needs to be set by set_share_from_entropy
	// or set_partials or add_partials_from_next
	inline const double* partials(void) const {return Partials;}
	
	// share is the partial derivatives of the error function with respect
	// to Pseudo[o]
	inline const double* share(void) const {return Share;} 
	
	// exp_share(o) is the partial derivative of the error function 
	// with respect to Sums[o] (as long as Pseudo hasn't been updated yet)
	inline const double exp_share(int o) const
	{    return Share[o] * (ExpSums[o] - Layer->pseudo(o));
	}
	
	inline void clear_cost(void)
	{   Cost=0;
	    NullCost=0;
	    OutSum=0;
	    MostOutWeight=0;
	    MostProbable=0;
	    PhatMostProbable = 0;
	}
	
	inline void clear_partials(void)
	{    for (int o=Layer->num_out()-1; o>=0; o--)
		Share[o] = Partials[o] = 0;
	}
	
	// warning: need to set_share_from_partials after this
	inline void set_partials(const double *parts)
	{    for (int o=Layer->num_out()-1; o>=0; o--)
		Partials[o] = parts[o];
	}
	
	// The encoding cost in nats of the desired output
	//	(set by either cost function that has an argument)
	inline double cost(void) const {	return Cost;	}
	inline double null_cost(void) const {	return NullCost;	}
	inline double out_sum(void) const {	return OutSum;	}
	inline double most_out_weight(void) const  { return MostOutWeight;}
	inline int most_probable(void) const {return MostProbable;}
	inline double phat_most_probable(void) const {return PhatMostProbable;}
	
	// set Cost to the encoding cost of the counts in out_weights 
	// (also set OutSum and MostOutWeight and NullCost)
	double cost(const float *out_weights);
	
	// compute the cost of encoding the counts in out_weights
	// with respect to the probs, and return the value
	double identity_cost(const float * probs,
		const float * out_weights, int num_out) const;
	
	// set Cost to the encoding cost of the single count at correct_out
	// (also set OutSum and MostOutWeight and NullCost)
	double cost(int correct_out);

	// compute the cost of encoding the single count at correct_out
	// with respect to the probs, and return the value
	double identity_cost(const float * probs, int correct_out) const;

	// set Cost to precomputed cost c
	inline void set_cost(double c){Cost = c;}
	// set OutSum to precomputed value sum
	inline void set_outsum(double sum){OutSum = sum;}

	// set  Partials and Share
	// to the partial derivatives of Cost (times the multiplier)
	// with respect to outputs and Pseudo[i]
	void set_share_from_entropy(const float *out_weights, 
		double multiplier=1.0);
	void set_share_from_entropy(int correct_out, double multiplier=1.0);
	
	// set Partials and Share
	// to the partial derivatives of 
	// for non-output layers, 
	// Given next (one of the fanouts of this) that has already had its
	// Partials and Share computed, add to the partial derivatives of 
        // the error function with respect to the outputs.
	//
	// w tells which of the input sets of next this connects to.
	//
	// Note: do clear_partials before accumulating from fanouts.
	// When all contributions from the fanouts have been gathered, 
	//	do set_share_from_partials.
	void add_partials_from_next(const ActivationRecord *next, const int w);
	
	// Set to Share to the partial derivatives of the error
	//  function with respect to Pseudo[o]
	//  from already set partial derivatives of the error function 
	//  with respect to the outputs.
	void set_share_from_partials(void);
	
	inline void set_share_from_partials(const double *parts)
	{   set_partials(parts);
	    set_share_from_partials();
	}

};


#endif

// CHANGE LOG:
// 25 July 1998 Kevin Karplus
//	added MostProbable and PhatMostProbable
// 20 Apr 2004 Sol Katzman
//      Make ActivationRecord data member Layer private,uppercase; add public layer() function.
//      inline a few existing functions without functional change.
// 24 May 2004 Sol Katzman
//      inline numerous simple functions
// 25 May 2004 Kevin Karplus
//	inlined remaining simple functions