// NeuralLayer.h
// 23 July 1997 Kevin Karplus

#ifndef NeuralLayer_H
#define NeuralLayer_H

#include <iostream>
#include <fstream>
#include <assert.h>

#include "NamedClass/NamedObject.h"
#include "NamedClass/NamedClass.h"


// forward declarations
class NeuralLayer;	
class TrainSet;
class InterfaceDescription;

// class for keywords that can be used in NeuralLayer input
class NLInputCommand: public NamedObject
{
	typedef     int (*fcn)(istream &in, 
		    NeuralLayer *change,
		    NLInputCommand* self);

	fcn CommandFunction;

	// function to execute when keyword found.
	//	Reading from "in" into "change".
	//	Pass this down to function as 3rd arg, 
	//		so it can report error using self->name().
	//  Return 1 if input should continue, 0 if error or end of input.


    public:
	NLInputCommand(const char *nm, fcn c=0)
	{   set_name(nm);
	    CommandFunction=c;
	}
	
	inline int execute(istream &in, NeuralLayer *change)
	{    return (*CommandFunction)(in, change, this);
	}
};

// NeuralLayer represents one layer of a neural network.
// Only the permanent structure is recorded here, not the activation
// for particular inputs.

// Each layer has a "2-dimensional" input---a fixed size window 
//      of fixed length input vectors (the window is a subset of 
//      the full sequence of input vectors that is derived from
//      a training or prediction chain.)
// A typical input vector is a prob distribution over an alphabet,
// but may include other unrelated elements.
// All inputs values are expected to be scaled to the range [0,1].

// The window size and the length of the input vectors may be
// different for each layer.

// The outputs are treated as a probability vector---that is, they
// will be scaled to sum to one (using a new variant of soft-max, 
// developed by Kevin Karplus).

// All inputs from the window are connected to all output units.
// Each output unit also has a bias input, which is used just to
// keep the unit in the high-gain region.
// There is an additional pseudocount for each unit (used just before
// the normalization to sum to one) that allows the network to get
// background probabilities correct.

// For output o, windowpositions w, inputelements i:
// X[o] = bias[o] + gain[o] * ( sum_i,w  weights[o,i,w] * in[i,w] )
//
// out[o] = (exp(X[o]) + pseudo[o]) / sum (exp(X) + pseudo)

// Note: the Gain parameter is redundant with the weights, and
//	normalization of the network can set the gain to 1 by rescaling
//	the weights.

class ActivationRecord;	// forward reference
class NeuralNet;	// forward reference

class NeuralLayer: public NamedClass, public NamedObject
{
    private:
	// For NamedClass
	static IdObject ID;	
	virtual int read_knowing_type(istream &in);
	virtual void write_knowing_type(ostream &out) const;

	static NameToPtr *CommandTable;	// for reading input

	NeuralNet *Owner;
	NeuralLayer* NextTrainableLayer;

	int MyLayerNumber;    
	int IsFrozen;	    // set to 1 if you don't want to update
				// during training
	int UseMultUpdate;   // set to 1 if you want to use multiplicative
				// updates instead of additive ones.
				// (requires all positive weights)

	int NumInputs;	// number of inputs in each position of sequence
	int WindowSize;	// how big is window
			    // True number of inputs is NumInputs*WindowSize
	int NumWeights;	// NumInputs*Windowsize
	int NumOutputs;	// how many outputs

	int Overhang;	// How far does layer extend before beginning
			    // and after end of sequence?

	float *Weights;	// array of NumInputs*WindowSize*NumOutputs weights
	float *Bias;	// array of NumOuputs bias inputs
	float *Gain;	// array of NumOuputs gain parameters
	float *Pseudo;	// array of NumOutputs pseudocounts

	void Alloc(int in, int wind, int out);
	void Alloc_Weights(void);
	void Alloc_Pseudo(void);
	void Alloc_Gain(void);
	void Alloc_Bias(void);
	void Dealloc(void);

	// procedures for input commands.
	virtual NameToPtr * command_table(void) {return CommandTable;}
	virtual void init_command_table(void);	
    
	inline float& weight(int in, int wind, int out) 
	{    return Weights[in+NumInputs*(wind + WindowSize*out)];
	}
	
	// input commands need access to private structure
	friend int ReadSizeParam(istream &in, int &param, 
		NeuralLayer *change,
		NLInputCommand* self);
	typedef int NLcommandfcn(istream &in, 
		NeuralLayer *change,
		NLInputCommand* self); 
	friend NLcommandfcn ReadNumInputs;
	friend NLcommandfcn ReadWindowSize;
	friend NLcommandfcn ReadNumOutputs;
	friend NLcommandfcn ReadOverhang;
	friend NLcommandfcn ReadUseMultUpdate;
	friend NLcommandfcn ReadWeights;
	friend NLcommandfcn ReadBias;
	friend NLcommandfcn ReadGain;
	friend NLcommandfcn ReadPseudo;

    public:
	// Learning rates for the different types of weights.
	double WeightRate, BiasRate, GainRate, PseudoRate;
	double DesiredSq;	// target for Sums[o]*Sums[o] to
				// keep range meaningful. (default=1)
	
	// multipliers for gain, bias, and pseudocount on each update
	double GainExtinction, BiasExtinction, PseudoExtinction;  
	
	NeuralLayer(int in=0, int wind=0, int out=0)
	{   IsFrozen=0;	    UseMultUpdate=0;
	    Owner=0;	MyLayerNumber=0;
	    Weights=0;	    Bias=0;	    Pseudo=0;	    Gain=0;
	    Alloc(in,wind,out);
	}
	NeuralLayer(const NeuralLayer *old);	// copy constructor
	~NeuralLayer(void)
	{   Dealloc();
	}
	
	void copy_weights(const NeuralLayer *src);
	
	void initialize_learning_rates(int num_sequences,
		int distance_to_training);
	// Set the learning rates to appropriate defaults based on the
	// size of the training set and how far to next trainable layer.
	// Network should be fully constructed before this initialization.
	
	inline void set_layer_number(int lay)  {MyLayerNumber=lay;}
	inline int layer_number(void) const {return MyLayerNumber;}

	// NamedClass functions
	inline static IdObject* classID(void) {return &ID;}
	virtual IdObject* type(void) const {return &ID;}
	
	inline const NeuralNet *owner(void) const {return Owner;}
	inline void set_owner(NeuralNet *net) {Owner=net;}
	
	const InterfaceDescription *input_interface(void) const;
	const InterfaceDescription *output_interface(void) const;
	
	inline int num_in(void) const {return NumInputs;}
	inline int num_wind(void) const {return WindowSize;}
	inline int num_out(void) const {return NumOutputs;}
	inline int degrees_freedom(void) const 
	{   // degrees of freedom does not count
	    //	Gain (can be subsumed into weights)
	    //  Bias (can be subsumed into weights, given that
	    // 		a known subset of inputs sums to 1---true except 
	    //  	for Dummy units.
	    //  Pseudocounts, if they are all 0.
	    //  One of the outputs (because of softmax rescaling).
	    int numdeg=(NumOutputs-1)*WindowSize*(NumInputs);
	    for (int o=NumOutputs-1; o>=0; o--)
	    {   if (Pseudo[o]!=0)
		    return numdeg+NumOutputs;
	    }
	    return numdeg;
	}

	inline int overhang(void) const {return Overhang;}
	inline void set_overhang(int o) 
	{   assert(o>=0);
	    Overhang=o;
	}
	
	inline void freeze(int f=1) {IsFrozen=f;}
	inline void unfreeze(void) {IsFrozen=0;}
	inline int is_frozen(void) const {return IsFrozen;}
	
	inline void set_weight(int in, int wind, int out, float f)
	{    Weights[in+NumInputs*(wind + WindowSize*out)] = f;
	}
	
	inline float weight(int in, int wind, int out) const
	{    return Weights[in+NumInputs*(wind + WindowSize*out)];
	}
	
	inline float pseudo(int out) const	{return Pseudo[out];}
	inline float bias(int out) const	{return Bias[out];}
	inline float gain(int out) const	{return Gain[out];}
	
	inline void set_bias(int out, float b)	{Bias[out] = b;}
	inline void set_gain(int out, float b)	{Gain[out] = b;}
	inline void set_pseudo(int out, float b)	{Pseudo[out] = b;}
	
	// Initialization routines
	void initialize_weights(ostream &logfile);
        void initialize_pseudo(ostream &logfile, const TrainSet *training);
        void initialize_bias(ostream &logfile);
        void initialize_gain(ostream &logfile);
	inline void set_DesiredSq(int square)
	{ DesiredSq = square;
	}

	// Set gains to 1,
	// adjusting weights to preserve meaning
	void normalize(void);

	// for each set of inputs known to sum to one,
	// adjust weights for those inputs to sum to zero,
	// (adjusting bias as needed to preserve meaning).
	void center_weights_input_1(void);

	// center weights for each input position independently
	// Meaning is NOT quite preserved, unless pseudocounts are 0.
	// biases are adjusted to keep meaning roughly the same--do center_biases
	// after center_weights
	void center_weights(void);

	// adjust biases so min(bias)=-max(bias), adjusting
	// pseudocounts to preserve meaning
	void center_biases(void);

	// Do updates based on ActivationRecord that has already
	// had its share computed.
	// Dependencies:
	//	compute share and propagate to inputs before any updates
	//	update weights before gain or pseudo
	//	update gain before bias

	void update_weights(const ActivationRecord *act);
	void update_pseudo(const ActivationRecord *act);
	void update_gain(const ActivationRecord *act);
	void update_bias(const ActivationRecord *act);

	// Functions for changing the learning rates on the above
	// parameters.
	void change_WeightRate(double old_TEC, double new_TEC);
	void change_BiasRate(double old_TEC, double new_TEC, 
			       double old_SS2, double new_SS2);
	void  change_GainRate(double old_TEC, double new_TEC,
			       double old_SS2MDes2, double new_SS2MDes2);
	void change_PseudoRate(double old_TEC, double new_TEC);

	// Functions to return the rate values
	inline double weight_rate(void) const 	{return WeightRate; 	}
	inline double bias_rate(void) const	{return BiasRate; 	}
	inline double gain_rate(void) const	{return GainRate; 	}
	inline double pseudo_rate(void) const	{return PseudoRate; 	}

};


#endif

// CHANGE LOG:
// 23 March 1998	Kevin Karplus
//	Added Owner
// 26 March 1998	Kevn Karplus
//	changed normalization to remove bias, as well as gain
//	based on assumption that inputs of each windowframe sum to 1.
//	(Commented changes out again, as they didn't seem to help)
// 30 March 1998 Kevin Karplus
//	Added IsFrozen to stop learning in individual layers.
// 14 April 1998 Kevin Karplus
//	Added copy constructor
// 13 May 1998 Kevin Karplus
//	Added input_interface and output_interface.
// 18 May 1998 Kevin Karplus
//	Added center_weights()
// 23 November 1999 Kevin Karplus
//	Changed num_param() so it doesn't count gain or bias.
// 7 January 2000 Kevin Karplus
//	Added center_biases
// 26 Nov 2001 Kevin Karplus
//	Changed num_params to degrees_freedom (and changed meaning)
// 21 Apr 2004 Sol Katzman
//	Improved comments.
// 24 May 2004 Sol Katzman
//      inline numerous simple functions
// 25 May 2004 Kevin Karplus
//	inlined more of the simple functions