// NeuralNet.h
// copyright 30 July 1997 Kevin Karplus

#ifndef NeuralNet_H
#define NeuralNet_H

#include <iostream>

#include "NeuralLayer.h"
#include "NamedClass/NamedObject.h"
#include "NamedClass/NamedClass.h"
#include "InterfaceDescription.h"
#include "LearningParam.h"		// included for "delete LearningParams"

// forward declarations
class TrainSet;
class NetActivation;
class NetQuality;
class NeuralNet;
class LearningParam;


// class for keywords that can be used in NeuralNet input
class NNInputCommand: public NamedObject
{
        typedef int (*fcn)(istream &in,
			   NeuralNet *change,
			   NNInputCommand* self);

	fcn CommandFunction;

	// This is the command to execute when its corresponding
	// keyword is found.
	// Reading from "in" into "change".
	// Pass this down to function as 3rd arg so it can report
        // error using self->name().
        // Return 1 if input process should continue, 0 if error or
        // end of input.

   public:								   
	NNInputCommand(const char *nm, fcn c=0)
	{  set_name(nm);
	   CommandFunction=c;
	}
	
	inline int execute(istream &in, NeuralNet *change) 
	{  return (*CommandFunction) (in, change, this);
	}
	  
};

// function for NNInputCommand that treats keyword as a comment
// and skips to the end of the line
int ReadComment(istream &in, NeuralNet *change, NNInputCommand* self);


class NeuralNet: public NamedClass, public NamedObject
{
    // for NamedClass
    static IdObject ID;      

    virtual int read_knowing_type(istream &in);
    virtual void write_knowing_type(ostream &out) const;

    static NameToPtr *CommandTable;   // for reading input

    int NumLayers;	// >=1 for legal network.
    int AllocLayers;	// size of arrays. 

    // Used to keep track of which NeuralLayer and InterfaceDescription
    // is being added
    int layer_counter, ifd_counter;			     

    NeuralLayer **Layers;
    // Since there is now Interfaces, which contains an InterfaceDescription
    // between every two layers, the job of TrainTo is now accomplished by a
    // bit in the IFD structure

    //Changed since there will now be an InterfaceDescription between all
    //layers
    InterfaceDescription **Interfaces;

    NetActivation *MyNetActivation;


    int EpochCounter;	// how many epochs of training have there been?
    
    double CenterWeight, RangeWeight;
	// relative importance of keeping sums centered and with
	// desired range.
    LearningParam *LearningParams;
    
    void Alloc(int numlayers);				     
    void Dealloc(void);

    // Procedures for the input commands
    virtual NameToPtr *command_table(void) {return CommandTable;}
    virtual void init_command_table(void);

    void test_or_train(bool do_training,
		const TrainSet *train_set, 
		NetQuality *nq=0,
		ofstream *report_summary=0,
		ofstream *report_individually=0,
		bool shuffle=1);
    // see test() and train() in public interface for explanation
    // of parameters.

    
    public:
         NeuralNet(void);
	 NeuralNet(const NeuralNet* old);	// copy constructor
	 // copies everything EXCEPT MyNetActivation
	 
	 ~NeuralNet(void);
	 
	// copy weights, bias, gain, and pseudo from "from"
	// which should have the same structure.
	void copy_weights_from(const NeuralNet *from);
	
	inline NetActivation *net_activation(void) const {return MyNetActivation;}

	// Returns a pointer to the next TrainTo NeuralLayer.  Search begins with
	// the layer number that is the argument to the function
	NeuralLayer* next_TrainTo_layer(int start) const;
	
	inline int num_layers(void) const {return NumLayers;}
	inline int num_interfaces(void) const {return ifd_counter;}
	inline int num_defined_layers(void) const {return layer_counter;}
	inline int num_epochs(void) const {return EpochCounter;}
	
	inline double center_weight(void) const {return CenterWeight;}
	inline double range_weight(void) const {return RangeWeight;}

	inline const LearningParam* learning_params(void) const
	{   return LearningParams;
	}

	inline void set_learning_params(LearningParam *lp)
	{   delete LearningParams;
	    LearningParams=lp;
	    CenterWeight= LearningParams->NetInitCenterWeight;
	    RangeWeight= LearningParams->NetInitRangeWeight;
	}

	inline const NeuralLayer *layer(int lay) const 
	{    assert(0<=lay && lay <NumLayers);
		return Layers[lay];
	}
	inline NeuralLayer *layer(int lay)
	{    assert(0<=lay && lay <NumLayers);
		return Layers[lay];
	}
	inline int degrees_freedom(int lay=-1) const	// -1 means sum over all layers
	{   if (lay>=0)
		return Layers[lay]->degrees_freedom();
	    int sum=0;
	    for (int l=0; l<NumLayers; l++)
	        sum +=  Layers[l]->degrees_freedom();
	    return sum;
	}

	inline const InterfaceDescription *interface(int ifd) const
	{     assert(ifd>=0 && ifd< ifd_counter);
		return Interfaces[ifd];
	}
	inline InterfaceDescription *interface(int ifd)
	{     assert(ifd>=0 && ifd< ifd_counter);
		return Interfaces[ifd];
	}

	//Is a layer hidden  or an output(TrainTo) layer?
	inline int is_layer_hidden(int lay) const
	{    assert(0<=lay && lay <NumLayers);
	     return Interfaces[lay+1]->is_hidden();
	}
        inline void hide_interface(int inter)
	{    interface(inter)->hide();
	}
        inline void unhide_interface(int inter)
	{    interface(inter)->unhide();
	}

	inline unsigned int overhang(int lay) const 	
	{    return layer(lay)->overhang();
	}

	inline InterfaceDescription **interfaces(void) const
	{     return Interfaces;
	}

        int add_interface(InterfaceDescription *descr);
	int add_layer(NeuralLayer *layer);

	// Initializes any of the NN parameters that have not been set
        void initialize_net(ostream &logfile, const TrainSet*training);

	// Remove MyNetActivation, so that network can be safely
	// resized (say, changing the overhangs).
	// Need to call initialize_net() again after resizing
	void remove_activation(void);
	
	// This is the function that starts the show.  Since I don't know what
        // to use for convergence/stop criterion, the net just cycles through
        // num_iterations estimation cycles.
        void learning_loop(int num_interations, 
		TrainSet *training,
		ofstream *report_training, 
		ofstream *report_training_indiv, 
		
		TrainSet *cross_training, 
		ofstream *report_cross_training,
		ofstream *report_cross_training_indiv,

		ofstream *report_unit_usage,
		ofstream *report_Q_vs_Phat
		);

	// for a NetActivation record that has already had both
	// activate and back_propagate done, update the weights in the network.
	void reest_weights(const NetActivation *app);
    
	// Activate network for each of a set of data inputs,
	// updating a NetQuality record as you go.
	// For "train", also train the network after each
	// activation (which may result in somewhat inaccurate NetQuality
	// information--a purist would use 
	//	train(train_set)
	//	test(train_set, nq)
	// though this takes almost twice as long).
	// Can specify output stream for reporting once-per-epoch quality
	// results, or quality results for each individual chain.
	//
	inline void train(const TrainSet *train_set, NetQuality *nq=0,
		ofstream *report_summary=0,
		ofstream *report_individually=0,
		int shuffle=1)
	{    test_or_train(1,
		train_set,nq,report_summary,report_individually, shuffle);
	}
	inline void test(const TrainSet *test_set, NetQuality *nq=0,
		ofstream *report_summary=0,
		ofstream *report_individually=0)
	{    test_or_train(0, test_set,nq,report_summary,report_individually,0);
	}
	
	// Modify the learning rates based on the change in quality.
	// Usually the quality is evaluated on a cross-training set
	// before and after a "train(train_set)" iteration.
	// Training is made faster when the network improves, slowed
	// down when the network gets worse.
	void update_rates(const NetQuality *old_nq, const NetQuality *new_nq);

	// Using the usage statistics in NQ, adjust the biases of each layer
	// and the weights of the next layer to get approximately equal usage 
	// of reach hidden unit.
	// Do not adjust the biases for layers with training data for the
	// outputs, nor for the layer before a frozen layer.
        void equilibrate(NetQuality *NQ);

	// Necessary and needed NamedClass functions
        inline static IdObject* classID(void) {return &ID;}
	virtual IdObject* type(void) const {return &ID;}

        // Reads the NeuralNet basic structure, then assumes that the 
        // individual layers' NeuralLayer and InterfaceDescription 
        // definitions follow on the same stream.  These definitions are
        // also read in and added to the structure.  As a side effect, 
        // this function also creates the global NetActivation structure 
	static NeuralNet *read_new(istream &in);

	// Input commands that need access to private structure
	friend int ReadIntParam(istream &in, int &param, NeuralNet *chg,
				 NNInputCommand *self);
	typedef int NNcommandfcn(istream &in, NeuralNet *change, 
				 NNInputCommand *self);
	friend NNcommandfcn ReadName;
        friend NNcommandfcn ReadNumLayers;

};
#endif

// CHANGE LOG:
// 14 April 1998	Kevin Karplus
//	Added copy constructor, eliminated incorrect constructor from int.
// 1 May 1998 Kevin Karplus
//	Added remove_activation()
// 10 Dec 1999 Kevin Karplus
//	Added equilibrate()
// 26 Nov 2001 Kevin Karplus
//	Changed num_params to degrees_freedom (and changed meaning)
// 12 Dec 2001 Kevin Karplus
//	Added copy_weights_from()
// 24 May 2004 Sol Katzman
//      inline learning_params()
// 25 May 2004 Kevin Karplus
//	inlined more simple functions.
// Sat Jul 23 17:32:14 PDT 2005 Kevin Karplus
//	Fixed do_training and shuffle parameters of test_or_train to
//		be bool (not int).