// InterfaceDescription.h
// copyright 30 July 1997 Kevin Karplus

#ifndef InterfaceDescription_H
#define InterfaceDescription_H

#include <iostream>
using namespace std;

#include "NamedClass/NamedObject.h"
#include "NamedClass/NamedClass.h"
#include "NamedClass/NameToPtr.h"

#include "SeqWeight.h"
#include "Regularizer/DirichletReg.h"
#include "AlphabetTuple/AlphabetTuple.h"

class InterfaceDescription;  // forward declaration

// To do:
//	make some parts private, so that consistency of Alpha and regularizers
//	and NumUnits and Alpha can be maintained.


class IFInputCommand: public NamedObject
{
    typedef int (*fcn) (istream &in, InterfaceDescription *chg,
	               IFInputCommand *self);

    fcn CommandFunction;

    // function to execute when keyword found
    //   Reading from "in" into "chg"
    //   Pass this down to function as 3rd arg, so it can
    //   report error using self->name().
    // Return 1 if input should continue, 0 if error or end of input

  public:
	
    IFInputCommand(const char *nm, fcn c=0)
    { set_name(nm);
      CommandFunction=c;
    }
    inline int execute(istream &in, InterfaceDescription *chg) 
    { return (*CommandFunction)(in, chg, this);
    }
};

// function for IFInputCommand that treats keyword as a command and skips
// to the end of the line
int ReadComment(istream &in, InterfaceDescription *chg, 
		IFInputCommand* self);

// UnitName is a simple NamedObject for naming the units of a layer

class UnitName: public NamedObject
{
    public: 
        int number;
};


// InterfaceDescription provides information about how the input or output
// the neural net is to be handled
class InterfaceDescription: public NamedClass, public NamedObject
{
	static IdObject ID;  // for NamedClass
	static NameToPtr *CommandTable;   // for I/O

	virtual int read_knowing_type(istream &in);
	virtual void write_knowing_type(ostream &out) const;

    	int NumUnits;	// number of units 
			//  == size(Alphabet)*(1+TupleStop-TupleStart)
			//	+ UseInsert + UseDelete 
	                //	+ UseEntropy + UseProbOfGuide
			//      + UseGuide*size(Alphabet)
			// 	+ size(ComponentProbs)

	bool UseInsert;		// 1 => neural net input or output includes
	                        // probability that there is an insert before position
	bool UseDelete;		// 1 => neural net input or output includes
		                // probability that there is a deletion in position
  
        bool UseEntropy;	// 1 => neural net input includes
				//        entropy of position
	
	bool UseProbOfGuide;	// 1 => neural net input includes the
				// probability from the profile of the character of the guide sequence
	
	bool TrainTo; // Set to 1 if this is an output layer with training data
	bool HideTemporarily;	// set to 1 if this layer should
				// (temporarily) ignore the TrainTo info

    	UnitName *Names;	// Names[i] is the name of ith unit
	NameToPtr UnitNumber;	// hash table to look up names
    
	const char* generate_unit_name(int i) const;
    
       	inline int compute_num_units(void) const
	{   assert(Alpha || (!UseAminoAcidProbs && !UseGuide));
	    assert(ReRegularizer || ! UseComponentProbs);
	    assert (! (ReRegularizer && NetRegularizer));
	    
	    return  (UseAminoAcidProbs? TupleStates: 0)
		    + UseInsert
		    + UseDelete
		    + UseEntropy
		    + UseProbOfGuide
		    + (UseGuide? Alpha->num_normal() :0)
		    + (UseComponentProbs? ReRegularizer->num_components() :0)
		    + (NetRegularizer? NetRegularizer->num_components() :0)
		    ;

	}

    public:
        // on input or output, the data may be amino acid probabilities
	// and probabilities of insertion (before position)
	// Or they may be from any other alphabet (such as secondary structure)
	// Or thay may be simply numbered.
	
	AlphabetTuple *Alpha;	// 0 if purely numeric
			// Alphabet for each position in sequence
			
	int TupleStart, TupleStop;	// normally both 0
			// Instead of a single postion (i)
			// can use a tuple (i+TupleStart .. i+TupleStop).
			// Currently not compatible with UseComponentProbs.
	int DefaultBaseIndex;
		// If TupleStart or TupleStop non-zero, then need to
		// know what label to impute to missing positions at the
		// ends of the sequence.
	int TupleStates;	// Alphabet size ^ (1+TupleStop-TupleStart)
				// The number of states the tuple of
				// Alpha characters can have
	
	void set_unit_name(int i, const char* nm);
	inline const char* unit_name(int u) const
	{   if (u<0 || u >=NumUnits) return "illegal_unit";
	    if (Names) return Names[u].name();
	    return generate_unit_name(u);
	}
	int unit_number(const char* nm);
	
	int UseLogOdds;		// 1 => use log(P(x|data)/P(x))
				// instead of P(x)
				// Note: currently only supported
				// for input to network, will eventually
				// be extended to hidden layers.
	
	
	// function for weighting sequences, and its parameters
	const SequenceWeightObject* SequenceWeighter;
	float SequenceWeightBitsToSave;
	float SequenceWeightParam;  
	float ClipExponent;	// clip total weights to numseq^ClipExponent
		// (unless ClipExponent<0, in which case turn off clipping)
	
	Regularizer* WeightingRegularizer;	
	// NOTE: NOT OWNED BY InterfaceDescription
	
	// The best estimate of the true probability is
	// computed from the observed weighted counts using
	// ReRegularizer.  If pointer is 0, just normalize to sum to 1.
	// NOTE: NOT OWNED BY InterfaceDescriptions
	DirichletReg* ReRegularizer;	
	
	// On input, can recode inputs to use component probabilities
	// of ReRegularizer instead of or in addition to amino acid 
	// probabilities.  
	// ?? Not designed or tested for use with TupleStart or TupleStop
	// non-zero.
	//
	// On output, neural net probabilities are used to set
	// mixture coefficients, and the modified DirichletReg is used
	// with the input counts (not probabilities, but counts)
	// to get probabilities for the outputs.

	//SUGATO : 7/1/99

	DirichletReg * NetRegularizer;

	// The output layer trains to the mixture coefficients of
	// the NetRegularizer. Cost of encoding is calculated by
	// the cost_from_mix function, which takes as arguments the 
	// input counts to the net, the output_counts from the training
	// set for the last layer and the mixture coefficients output 
	// from the net

	int UseComponentProbs;	// 0 => inputs/outputs are probabilities
				// 1 => use component probs

        int UseAminoAcidProbs;  // 1 => neural net input or output includes
	                        // probabilities over an alphabet. On input
                                // the probabilities are calculated from a
                                // multiple alignment over the alphabet.
	
	int UseGuide;		// 1 => neural net input includes
		                // a guide sequence, which for each column
	                        // is a (nominally) one-hot vector over the
	                        // 'AminoAcidProbs' alphabet. For columns with
                                // a delete in the guide sequence, the vector is
                                // all zeroes. Inserts in the guide sequence
                                // are ignored.

	typedef enum {ALIGNMENT, SEQUENCE, VECTORS, NUMBERS} FormatType;
	FormatType InputFormat;
	
	InterfaceDescription(void);
	InterfaceDescription(const InterfaceDescription* old);	//copy
	~InterfaceDescription(void);

	inline void hide(int h=1) {HideTemporarily=h;}
	inline void unhide(void) {HideTemporarily=0;}
	
	inline int is_hidden(void) const
        {     return HideTemporarily || !TrainTo;
        }

	inline int is_TrainTo(void) const
        {     return TrainTo;
        }

	inline int train_to_unique(void) const
	{	return TrainTo &&
		(InputFormat==SEQUENCE || InputFormat==NUMBERS);
	}
	
	inline int num_units(void) const {return NumUnits;}
	// use assert(num_units_ok()) to check that NumUnits is
	//	properly set
	inline bool num_units_ok(void) const
	{    return NumUnits == compute_num_units();
	}

	// fill the vector vect with the appropriate 
	//	values (depending on the Use... variables)
	void fill_vector(float *vect,
		const float* aa_probs,
		float insert_prob=0.0,
		float delete_prob=0.0,
		const float* guide_probs=NULL,
		const float* component_probs=NULL,
		float entropy=0.0,
		float prob_of_guide=0.0) const;

	
	// Functions to get the first and last unit numbers of the guide vector
	inline int guide_first_num(void) const
	{   assert(UseGuide);
            return (UseAminoAcidProbs*TupleStates + UseInsert + UseDelete);
	}

	inline int guide_last_num(void) const
	{   assert(UseGuide);
	    assert(Alpha);
            return (guide_first_num() + Alpha->num_normal() - 1);
	}

        inline int profile_first_num(void) const
	{   assert(UseAminoAcidProbs);
	    return 0;
	}
	
	inline int profile_last_num(void) const
	{   assert(UseAminoAcidProbs);
	    return TupleStates-1;
	}

	// Functions for the input commands
        static NameToPtr * command_table(void) {return CommandTable;}
        static void init_command_table(void);

	// NamedClass functions
        inline static IdObject* classID(void) {return &ID;}
	virtual IdObject* type(void) const {return &ID;}


	// Input commands that need to access private structure
	friend int ReadIntParam(istream &in, int &param,
				InterfaceDescription *chg,
				IFInputCommand* self);
	friend int ReadFloatParam(istream &in, float &param,
				InterfaceDescription *chg,
				IFInputCommand* self);
	
	typedef int IFCommandfcn(istream &in, InterfaceDescription *chg,
				IFInputCommand* self);
	friend IFCommandfcn ReadName;
	friend IFCommandfcn ReadTrainTo;
	friend IFCommandfcn ReadInsertUse;
	friend IFCommandfcn ReadDeleteUse;
	friend IFCommandfcn ReadEntropyUse;
	friend IFCommandfcn ReadGuideProbUse;
	friend IFCommandfcn ReadNumUnits;
	friend IFCommandfcn ReadCompUse;
	friend IFCommandfcn ReadAAUse;
	friend IFCommandfcn ReadGuideUse;
	friend IFCommandfcn ReadInputFormat;
	friend IFCommandfcn ReadAlphabetTuple;

	// Although the InterfaceDescription doesn't own the Regularizers,
	// in come cases all other pointers to them may be lost, so there
	// is an explicit way to delete the regularizers, which should NOT
	// be part of the normal destructor for InterfaceDescription.
	inline void delete_regularizers(void)
	{   delete WeightingRegularizer;
	    if (ReRegularizer!=WeightingRegularizer) delete ReRegularizer;
	    if (NetRegularizer!=WeightingRegularizer 
	    		&& NetRegularizer!=ReRegularizer)
		delete NetRegularizer;
	    WeightingRegularizer=ReRegularizer=NetRegularizer=0;
	}
	
	// check whether this is compatible with an existing interface
	// (useful for checking whether two neural nets can share a
	// common input).
	// Report message if not compatible and err_log not NULL, 
	// with names of networks provided by old_name and new_name
	bool is_compatible(const InterfaceDescription* old_ifd,
		const char *old_name="", const char* new_name="",
		ostream *err_log=NULL) const;

    	// estimate approximate total weight of an input vector for
	//	this Interface
	inline double num_ranges(void) const
	{   if (Alpha ==NULL) return 1.0;	// believed to be output of softmax
	    double num_r = 0;
	    if (UseAminoAcidProbs) num_r++;
	    if (UseGuide) num_r++;
	    if (NetRegularizer) num_r++;
	    if (UseComponentProbs) num_r++;
	    if (UseDelete) num_r += 0.01;
	    if (UseInsert) num_r += 0.01;
	    if (UseEntropy) num_r += 0.9;
	    if (UseProbOfGuide) num_r += 0.3;
	    return num_r;
	}
};

#endif

// CHANGE LOG:
// 25 March 1998 Kevin Karplus
//	Added UseLogOdds.
// 14 April 1998 Kevin Karplus
//	Added copy constructor.
// 10 May 1998 Melissa Cline
// 	Modified the comments on the input probablilities, reflecting
//	how component probs and amino acid probs can both be used.
//	Added UseAminoAcidProbs.  Added function ReadAAUse.
// 27 December 1999 Kevin Karplus
//	Added TupleStart, TupleStop, and DefaultBaseIndex 
// 19 April 2004 Sol Katzman
//      Added UseGuide
// 20 May 2004 Sol Katzman
//      Corrected guide_first_num and also support in case of not UseAminoAcidProbs.
// 24 May 2004 Kevin Karplus
//	inlined simple functions
// Wed Jun 15 13:32:36 PDT 2005 Kevin Karplus
//	Added profile_first_num and profile_last_num
// Fri Aug 12 14:46:15 PDT 2005 Kevin Karplus
//	Created is_compatible() from existing code in Globals::add_neural_net
// Fri Aug 12 15:06:25 PDT 2005 Kevin Karplus
//	Created num_ranges() from code in NeuralLayer::center_weights
// Fri Aug 12 15:26:28 PDT 2005 Kevin Karplus
//	Created num_units_ok() from code in OneChain
// Fri Aug 12 17:06:20 PDT 2005 Kevin Karplus
//	Created fill_vector to fill an input vector in the right positions
// Fri Aug 12 17:18:43 PDT 2005 Kevin Karplus
//	Made UseInsert and UseDelete private
// Fri Aug 12 17:38:05 PDT 2005 Kevin Karplus
//	Added compute_num_units and used it in ok_num_units
// Fri Aug 12 20:43:32 PDT 2005 Kevin Karplus
//	Added UseEntropy and UseProbOfGuide
//	Changed several "Use" parameters to bool.
// Tue Oct 25 13:29:21 PDT 2005 Kevin Karplus
//	Added generate_unit_name and rewrote unit_name to be inline
//	and use generate_unit_name
// Thu Jul 16 13:40:00 PDT 2009 Kevin Karplus
//	Made command_table and init_command_table static, as they
//	should have been all along.