/* 
 * alignsummary.c - contains functions to process the alignSummay struct.
 *
 * This module contains functions to create, delete, and process the
 * alignSummary structure.  The global entry points to this module are
 * as follows:
 *     alignedSequenceStruct
 *     alignSequencesReviseAlignments
 *     findSequenceIndex
 *     freeAlignSummary
 *     printAlign
 *     readAlignment
 *     readReferenceCandidate
 *     sequencesIdentical
 *
 * Modifications:
 * 12/9/97  cline 	author, moved from measure_shift.c
 * 2/5/98   cline       added readReferenceCandidate
 * 2/11/98              added removeAlignmentColumn 
 *                      added restoreAlignmentColumn
 * 2/16/98              replaced getTemplate and getTarget  with
 *                      alignedSequenceStruct
 * 3/5/98	karplus	added echo of alignment summary info to stderr
 *			Tried to fix bug and simplify fixupColumns,
 *			but the code is so wrong it may be hopeless.
 *			Threw it out and did the conversion correctly
 *			in copyFromSeqNodeToAlignSeq.
 *			Eliminated MaxSeqLength
 *			Added count_match
 * 3/8/98	karplus	Changed strcmp to strcasecmp so that names needn't
 *			be converted to uppercase.
 * 3/9/98	karplus	Fixed my bugs in copyFromSeqNodeToAlignSeq.
 * 3/10/98	cline	moved printFinalA2m from a2mtrim, renamed as
 *			printAlign.
 * 4/7/98       cline   Fixed a bug in sequencesIdentical that was causing
 *                      residues tacked on the end of one sequence to be
 *                      missed.  Fixed readReferenceCandidate to check
 *                      for the presence of the template and target
 *                      residues in the two sequences before erroneously
 *                      assuming that they're there.
 * 4/16/98      cline   Fixed a bug in the error message that appears
 *                      when the alignment doesn't have the minumum
 *                      number of sequences to be interestint; stopped
 *                      the core dump.
 * 4/20/98	cline	Revised readAlignment and readReferenceCandidate
 *			to reflect the new handling of the command line
 *			parameters.
 * 4/21/98	cline	Added functionality to handle gzipped alignments
 * 5/20/98	cline	Removed bug that caused files to be left on /tmp
 *			after the program was run.
 * 5/22/98	cline	Trying again to fix the bug of 5/20.
 * 5/26/98	cline	Back again on this problem of temp file removal,
 *			renamed the temp files to make them distinct
 *			and traceable to a set of parameters.
 * 5/27/98	cline	Discovered the problem of the temp file removal.
 *			The files were not being removed because the
 *			parent program was dumping core.  This seems to
 *			be due to a memory management bug.  Changing the
 *			strcpy in nextSequence to strncpy seems to work
 *			around the problem.
 * 6/5/98       cline   Removed the unused isTemplate argument from
 *                      alignSequencesReviseAlignments.
 * 6/8/98	cline	Added functionality in readNextSequence to 
 *			remove whitespace added to the string for
 *			readability.
 * 6/9/98       cline   added function readStringFromFile
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "alignsummary.h"

/*
 * #DEFINES
 */
#define BUFSIZE	4095
#define LABEL_MARKER '>'

/*
 * TYEPDEFS
 *

/*
 * the aligned sequence data is initially read into a linked list,
 * a list of seqNode structures.  This is a convenient structure to
 * read the data into until we know details such as the number of
 * sequences and their maximum length, and can copy the whole
 * thing into the AlignSummary struct.
 */
typedef struct seqData{
    char label[MAX_LABEL_LENGTH]; 	/* sequence label */
    char *sequence;		        /* the aligned sequence itself */
    struct seqData *next;		/* next node in the list */
} seqNode;
    

/*
 * PROTOTYPES FOR LOCAL FUNCTIONS
 */
int count_match(AlignSummary *align);
	/* count the number of match columns in the first sequence of alignment
	 */

void addResidueToAlignedSeq(
    AlignSummary *align,		/* called with: the alignment to
					 * modify */
    int sequenceToModify,		/* called with: index of the sequence
					 * to be modified */
    int beforeSeqPosition,		/* called with: the sequence position
					 * (in sequenceToModify) before which
					 * the new column should be added */
    char newResidue);			/* called with: the new residue to
					 * add at this position. */    


int copyFromSeqNodeToAlignedSeq(
    seqNode *seqList,           /* called with: head of the linked list
                                 * of seqNode structures */
    int numberSequences,        /* called with: number of sequences in the
                                 * structure */
    AlignSummary *thisAlign);   /* called with: target data structure,
                                 * allocated but with empty alignment fields.
                                 * return with: structure with multiple
                                 * alignment fields filled in */

AlignSummary *getAlignSummary(
    char *a2mFilename,			/* called with: filename of the
					 * alignment file */
    char *templateName,			/* called with: name of the template
					 * sequence */
    char *targetName);			/* called with: anme of the target
					 * sequence */

int getMultipleAlignment(
    char *filename,             /* called with: name of the file
                                 * containing the alignment data */
    int minNumberAlignedSeqs,   /* the smallest number of aligned
                                 * sequences expected by the caller */
    AlignSummary *thisAlign);   /* called with: pointer to a struct
                                 * already allocated but with no alignment.
                                 * return with: struct with the 
                                 * alignment fields filled in */

int getSequenceAlignment(
    AlignSummary *align1,		/* called with: first alignment */
    AlignSummary *align2,		/* called with: second alignment */
    int sequenceIndex1,			/* called with: index of the sequence
					 * of interest within the first 
					 * alignment */ 
    int sequenceIndex2,			/* called with: index of the sequence
					 * of interest within the second 
					 * alignment */ 
    char *structName, 			/* called with: name of the structure
					 * of interest */
    AlignSummary *sequenceAlignment);   /* return with: pointer to the
                                         * alignment of the specified
                                         * sequences */

int inputs_okay( int argc, char *argv[]);

int hasSuffix(char *filename, char *suffix);

void makeSequenceAlignment(
    FILE *alignFile,		
    AlignSummary *align1,
    int templateSequenceIndex1,
    AlignSummary *align2,
    int templateSequenceIndex2);

void makeSequenceFile(
    FILE *alignFile,		
    AlignSummary *align1,
    int templateSequenceIndex1);

int readMultipleAlignment(
    char *filename,             /* called with: name of the file with the
                                 * alignment data */
    int minNumberAlignedSeqs,   /* called with: smallest number of
                                 * aligned sequences we could return 
                                 * with, and still have an alignment
                                 * of interest to the caller */
    seqNode **seqList,          /* return with: a linked list
                                 * representation of the sequence data */
    int *numberSequences);       /* return with: number of sequences read */


int readNextSeq(
    FILE *fp,           /* called with: the stream with the sequence
                         * data, open for reading */
    char *filename,     /* called with: name of the file, used in
                         * error messages */
    int seqNumber,      /* called with; what sequence we're on, used
                         * in error messages */
    seqNode *nextSeq);  /* called with: allocated but empty seqNode structure.  
                         * Return with: structure with fields filled in. */


void writeSequenceToFile(
    FILE *seqFile,			/* called with: file to write to */
    AlignSummary *align,		/* called with: alignment containing
					 * the sequence of interest */
    int sequenceIndex);			/* called with: row index of the 
					 * sequence of interest within
					 * the alignment */


/* 
 * CODE
 */

/*
 * hasSuffix - determine if the specified filename has the specified suffix
 */
int hasSuffix(char *filename, char *suffix)
{
    int ii;
    int suffixLength, filenameLength;
    int isSuffix = TRUE;

    filenameLength = strlen(filename);
    suffixLength = strlen(suffix);
    if (filenameLength < suffixLength) isSuffix = FALSE;
    for (ii = 0; ii < suffixLength && isSuffix; ii++) {
        if (toupper(filename[filenameLength - suffixLength + ii]) 
		    != toupper(suffix[ii])) {
	    isSuffix = FALSE;
        }
    }
    return(isSuffix);
}


/* count_match
 *	count the number of match columns in the first sequence of alignment
 */
int count_match(AlignSummary *align)
{   int num_match;
    int col;
    char c;
    
    if (!align || align->rows==0)
	return 0;
    num_match=0;
    for (col=0; col<align->columns; col++)
    {	c = align->sequence[col][0];
	if (isupper(c) || c=='-')
	    num_match++;
    }
    return num_match;
}


/* 
 * alignedSequenceStruct - get all the gory information relating
 * 				to the specified aligned sequence
 *				in the alignment.
 *
 * This function allocates and fills in the alignedSequence
 * struct for the specified sequence in the specified alignment.
 * If the sequnce is not found, it fills in the index as NOT_ALIGNED
 * and returns the otherwise empty structure.
 *
 * Return value - a pointer to the structure if all the mallocs and
 * 		everything went okay, else NULL.
 */
alignedSequence *alignedSequenceStruct(
    AlignSummary *alignment,	/* called with: the alignment in question */
    char *sequenceName)		/* called with: name of the sequence 
				 * in question */
{
    alignedSequence *sequenceData = NULL;
    int ii;
    int sequenceIndex, residueIndex;

    /*
     * first off, the overhead.  Allocate the structure and its
     * various substructures.  Look to see if this sequence is
     * even part of this alignment.
     */
    sequenceIndex = findSequenceIndex(sequenceName, alignment);
    if (sequenceIndex == NOT_ALIGNED) {
        fprintf(stderr, "Error: sequence %s not found in alignment %s\n",
		sequenceName, alignment->filename);
    } else {
    
        sequenceData = malloc(sizeof(alignedSequence));
        assert(sequenceData != NULL);
        sequenceData->index = sequenceIndex;
        sequenceData->seq_to_column = calloc(alignment->columns, sizeof(int));
        assert(sequenceData->seq_to_column != NULL);
        sequenceData->column_to_seq = calloc(alignment->columns, sizeof(int));
        assert(sequenceData->column_to_seq != NULL);

       /*
        * Now we begin with the two arrays to reference the alignment
	* data.  This alignment structure is a big matrix containing
	* inserts and deletions as well as matching columns.  For any
	* given residue in the sequence, we want something that tells
	* us where it is in the alignment.  For any given residue in
	* the alignment, we want to know which position it occupies in
	* the sequence.
        */
       residueIndex = 0;
       for (ii = 0; ii < alignment->columns; ii++) {
           if (isalpha(alignment->sequence[ii][sequenceData->index])) {
               sequenceData->seq_to_column[residueIndex] = ii;
               sequenceData->column_to_seq[ii] = residueIndex;
	       residueIndex++;
           } else {
               sequenceData->column_to_seq[ii] = NOTHING_ALIGNED;
           }
       } 
       sequenceData->sequence_length = residueIndex;
    }
    return(sequenceData);
}


/*
 * freeAlignSummary - free the align summary structure and its
 *		      substructures.
 */
void freeAlignSummary(
    AlignSummary *alignData)	/* called with: structure to be freed */
{
    int ii;
 
    for (ii = 0; ii < alignData->rows; ii++) free(alignData->label[ii]);
    for (ii = 0; ii < alignData->columns; ii++) free(alignData->sequence[ii]);
    free(alignData->sequence);
    free(alignData);
}


/*
 * freeAlignedSequence - free an aligned sequence struct
 */
void freeAlignedSequence(
    alignedSequence *sequenceStruct)	/* called with: a pointer
					 * to an allocated
					 * alignedSequence struct. */
{
    if (sequenceStruct != NULL) {
        free(sequenceStruct->seq_to_column);
        free(sequenceStruct->column_to_seq);
        free(sequenceStruct);
    }
}


/*
 * sequencesIndentical - check if two sequences are the same in the two
 *			alignments.
 *
 * This function checks to see whether or not the two versions of the
 * sequence from the two alignments are the same.  It checks all
 * amino acids, and if the amino acid sequence is different, it returns
 * FALSE.  If the amino acid sequence is the same, it returns TRUE.  It
 * doesn't matter if the characters are upper or lowercase, or
 * interspersed with inserts and so forth.
 */
int sequencesIdentical(
    AlignSummary *align1,	/* called with: first alignment */
    char *name1,		/* called with: name of the
				 * sequence of interest */
    AlignSummary *align2,	/* called with: second alignment */
    char *name2)		/* called with: name of the 
				 * sequence of interest */
{
    int identical = TRUE;
    int ii;
    int alignIndex1, alignIndex2;
    int seqlength1, seqlength2;
    int index1, index2;

    index1 = findSequenceIndex(name1, align1);
    if (index1 ==NOT_ALIGNED )
    {    fprintf(stderr, "Error: can't find sequence named %s in %s\n",
		name1, align1->filename);
    }
    assert(index1 >= 0);
    index2 = findSequenceIndex(name2, align2);
    if (index2 ==NOT_ALIGNED )
    {    fprintf(stderr, "Error: can't find sequence named %s in %s\n",
		name2, align2->filename);
    }
    assert(index2 >= 0);
    seqlength1 = seqlength2 = 0;
    for (alignIndex1 = alignIndex2 = 0;
         identical && alignIndex1 < align1->columns 
	           && alignIndex2 < align2->columns;
	 alignIndex1++, alignIndex2++) {
	/*
	 * In both alignment 1 and alignment 2, advance to either the
	 * next residue or the end of the alignment, whichever comes 
	 * first.
	 */
	while ((alignIndex1 < align1->columns) 
		&& (!isalpha(align1->sequence[alignIndex1][index1]))) {
	    alignIndex1++;
        }
	while ((alignIndex2 < align2->columns) 
		&& (!isalpha(align2->sequence[alignIndex2][index2]))) { 
	    alignIndex2++;
        }
        /*
         * After the while loops above, both sequence indices will
         * either be at a residue or past the end of the alignment.
         * If they're both at a residue, check whether the residues
         * are the same.
         */
	if (alignIndex1 < align1->columns 
	     && alignIndex2 < align2->columns) {
            if (toupper(align1->sequence[alignIndex1][index1]) 
		    != toupper(align2->sequence[alignIndex2][index2])) {
	        identical = FALSE;
            }
            seqlength1++;
	    seqlength2++;
        } else if (alignIndex1 < align1->columns) {
	    identical = FALSE;
	} else if (alignIndex2 < align2->columns) {
	    identical = FALSE;
	}
    }
    /*
     * At this point, either we've found differences in the sequences,
     * or we've reached the end of one of the alignments.  If we haven't
     * found any differences in the sequences yet, make sure that we
     * don't have a fe residues tacked on the end of one sequence but
     * not the other.  Check for this by seeing if there are any
     * residues left in either sequence.
     */
    if (identical) {
        while (alignIndex1 < align1->columns) {
            if (isalpha(align1->sequence[alignIndex1][index1])) {
                identical = FALSE;
            }
            alignIndex1++;
        }
        while (alignIndex2 < align2->columns) {
            if (isalpha(align2->sequence[alignIndex2][index2])) {
                identical = FALSE;
            }
            alignIndex2++;
        }
    }
    return(identical);
}

    
/* 
 * findSequenceIndex - find the index of the specified sequence
 *			in the alignment.
 *
 * This function looks through the labels of the specified alignment
 * and looks for the specified sequence label.  If it finds it, it
 * returns its row index.  Otherwise, it returns NOT_FOUND.
 */
int findSequenceIndex(
    char *target_label,			/* called with: the label we're
				 	 * looking for */
    AlignSummary *align)		/* called with: where we're
					 * looking for it */
{
    int ii;
    int target_index = NOT_FOUND;

    for (ii = 0; ii < align->rows && target_index == NOT_FOUND; ii++) {
	if (strcasecmp(target_label, align->label[ii]) == 0) {
	    target_index = ii;
        }
    }
    return(target_index);
}


/*
 * writeSequenceToFile - write the sequence data of the specified
 *			sequence to a .seq or .a2m file.
 *
 * This function writes out the sequence data from the indicated
 * aligned sequence to the specified file.  The sequence data it
 * writes out are the residues only, uppercased.  Note that this
 * is not the same as the aligned sequence.
 */
void writeSequenceToFile(
    FILE *seqFile,			/* called with: file to write to */
    AlignSummary *align,		/* called with: alignment containing
					 * the sequence of interest */
    int sequenceIndex)			/* called with: row index of the 
					 * sequence of interest within
					 * the alignment */
{
    int ii;

    fprintf(seqFile, ">%s1\n", align->label[sequenceIndex]);
    for (ii = 0; ii < align->columns; ii++) {
        if (isalpha(align->sequence[ii][sequenceIndex])) { 
            fputc(toupper(align->sequence[ii][sequenceIndex]), seqFile);
        } 
    }
    fputc('\n', seqFile);
}


/*
 * makeSequenceFile - to start the alignment process, make a file of
 * 			only one sequence, to which it and the other
 *			sequence will be aligned.
 *
 * This function writes out the data for the starting point of
 * modelfromalign - a copy of one of the sequences.  It doesn't
 * matter which sequence is used, as the other one should be
 * similar and should align to it easily.  This function differs from
 * makeAlignFile in that it only records one of the two sequences; the
 * other function makes the database which will later be used for
 * aligning both sequences to the model.  NOTE: modelfromalign won't
 * work on an .a2m file with a single sequence.  To get around that,
 * write out two copies of the same sequence. 
 */
void makeSequenceFile(
    FILE *seqFile,		
    AlignSummary *align1,
    int sequenceIndex)
{
    writeSequenceToFile(seqFile, align1, sequenceIndex);
    writeSequenceToFile(seqFile, align1, sequenceIndex); 
}


/*
 * makeSequenceAlignment - make an input alignment of the two template
 * 				sequences.
 *
 * This routine creates a dump alignment of the two template sequences
 * by writing out the sequence data for each sequence in .a2m format.
 * This alignment is to be used as input by such things as modelfromalign.
 */
void makeSequenceAlignment(
    FILE *alignFile,		
    AlignSummary *align1,
    int sequenceIndex1,
    AlignSummary *align2,
    int sequenceIndex2)
{
    int ii;
    int seq1Length, seq2Length;

    writeSequenceToFile(alignFile, align1, sequenceIndex1);
    writeSequenceToFile(alignFile, align2, sequenceIndex2);
}


/*
 * getSequenceAlignment - get the realignment of the two sequences
 *
 * This function takes as input two alignments containing two different 
 * versions of the same sequence, and returns an alignment of these two
 * different version to each other.
 *
 * return value: a boolean flag indicating if the operation was successful
 */
int  getSequenceAlignment(
    AlignSummary *align1,		/* called with: first alignment */
    AlignSummary *align2,		/* called with: second alignment */
    int sequenceIndex1,			/* called with: index of the sequence
					 * of interest within the first 
					 * alignment */ 
    int sequenceIndex2,			/* called with: index of the sequence
					 * of interest within the second 
					 * alignment */ 
    char *structName,			/* called with: name of the structure
					 * of interest */
    AlignSummary *sequenceAlignment)    /* return with: pointer to the 
					 * alignment of the specified
					 * sequences */
{
    FILE *alignFile;
    FILE *seqFile;
    char alignFilename[FILENAME_LENGTH + 1];
    char seqFilename[FILENAME_LENGTH + 1];
    char finalAlignFilename[FILENAME_LENGTH + 1];
    char modFileName[FILENAME_LENGTH + 1];
    char commandBuffer[COMMAND_LENGTH + 1];
    int returnValue;
				
    /*
     * First step: build a sequence file containing only
     * one of the two sequences.  Which one it is doesn't
     * matter.  This will be a starting point for the alignment
     * process. 
     */
    sprintf(seqFilename, "/tmp/%s.init.a2m", structName);
    seqFile = fopen(seqFilename, "w");
    assert(seqFile != NULL);
    makeSequenceFile(seqFile, align1, sequenceIndex1);
    fclose(seqFile);

    /*
     * Next step: build an alignment of the two template sequences.
     * This requires finding which sequences are the template sequences,
     * opening a file, and writing out the sequence data.
     */
    sprintf(alignFilename, "/tmp/%s.%s.a2m", structName, structName);
    alignFile = fopen(alignFilename, "w");
    assert(alignFile != NULL);
    makeSequenceAlignment(alignFile, align1, sequenceIndex1, 
			  align2, sequenceIndex2);
    fclose(alignFile);

    /*
     * construct and run the modelfromalign command 
     */
    sprintf(modFileName, "/tmp/%s.%s", structName, structName);
    sprintf(commandBuffer, "modelfromalign %s -alignfile %s",
		           modFileName, seqFilename);
    strcat(modFileName, ".mod");
    returnValue = system(commandBuffer);
    if (returnValue == 0) {

        /*
         * now, construct and run the align2model command.  When done, 
	 * delete the alignments and models.
         */ 
        sprintf(finalAlignFilename, "/tmp/%s.%s.final", structName, 
							structName);
        sprintf(commandBuffer, "align2model %s -i %s -db %s > /dev/null",
		finalAlignFilename, modFileName, alignFilename); 
        returnValue = system(commandBuffer);
        if (returnValue == 0) {
	    strcat(finalAlignFilename, ".a2m");
            returnValue = getMultipleAlignment(finalAlignFilename, 
				           2, sequenceAlignment );
            returnValue = unlink(finalAlignFilename);
            assert(returnValue == 0);
        }
        returnValue = unlink(modFileName);
        assert(returnValue == 0);
    }
    returnValue = unlink(alignFilename);
    assert(returnValue == 0);
    returnValue = unlink(seqFilename);
    assert(returnValue == 0);
    return(returnValue);
}


/*
 * addResidueToAlignedSeq - this function is called to add a residue to
 *			    an aligned sequence in the specified alignment.
 *
 * This function modifies an alignment by adding a residue to an aligned
 * sequence.  In most cases, this results in a new column added to the
 * alignment.  The particulars of what happens depend on the other sequence
 * of interest, and what if anything it has in the adjacent columns as the
 * column to be added:
 * - there's a deletion in that section of the sequence to be modified, and
 *   an aligned residue in the other sequence.  In that case, the new
 *   residue in the sequence to be modified is aligned to the residue in the
 *   other sequence.
 * - else, a new column is added to the alignment, and the other sequence
 *   has a deletion in this column.
 */
void addResidueToAlignedSeq(
    AlignSummary *align,	/* called with: the alignment to
				 * modify */
    int sequenceToModify,	/* called with: index of the sequence
				 * to be modified */
    int beforeSeqPosition,	/* called with: the sequence position
				 * (in sequenceToModify) before which
				 * the new column should be added */
    char newResidue)		/* called with: the new residue to
				 * add at this position. */    
{
    int seqPosition;
    int columnToAddBefore, columnToAddAfter;
    int ii;
    int columnFound;
    char **newData;

    /*
     * First, find what alignment columns this sequence position 
     * corresponds to.  As an exception case, if this sequence position
     * is at the end of the alignment, set the second marker for just
     * past the end of the alignment.
     */
    columnToAddAfter = -1;
    for (seqPosition = 0; seqPosition < beforeSeqPosition; ) {
	columnToAddAfter++;
        if (isalpha(align->sequence[columnToAddAfter][sequenceToModify])) {
           seqPosition++;
        }
    }
    columnToAddBefore = columnToAddAfter;
    while (seqPosition <= beforeSeqPosition 
	   && columnToAddBefore < align->columns-1) {
	columnToAddBefore++;
        if (isalpha(align->sequence[columnToAddBefore][sequenceToModify])) {
            seqPosition++;
        }        
    }

    if (columnToAddBefore == align->columns-1 
        && !isalpha(align->sequence[align->columns-1][sequenceToModify])) {
        columnToAddBefore = align->columns;
    }

    /*
     * It's time to add a column.  This alignment is in an array
     * that's indexed first by column and then by row.  The column field
     * is an array of pointers to all the row fields.  So, we've got to
     * allocate a new column field, one column larger, and move all the
     * pieces of row data into it, keeping a spot empty for the new column.
     */
    newData = calloc(align->columns+1, sizeof(char*));
    assert(newData != NULL);
    for (ii = 0; ii <= columnToAddAfter; ii++) {
        newData[ii] = align->sequence[ii];
    }
    newData[ii] = calloc(align->rows, sizeof(char));
    for (; ii < align->columns; ii++) {
        newData[ii+1] = align->sequence[ii];
    }
    free(align->sequence);
    align->sequence = newData;
    for (ii = 0; ii < align->rows; ii++) {
        align->sequence[columnToAddAfter+1][ii] = '-';
    }
    align->sequence[columnToAddAfter+1][sequenceToModify] = newResidue;
    align->columns++;
}


/*
 * removeAlignmentColumn - remove a column from the specified alignment
 *
 * This function "removes" a column by converting it to an insert.  All
 * aligned residues in the column are converted to inserts,
 */
void removeAlignmentColumn(
    AlignSummary *thisAlign,    /* called with: structure detailing the
                                 * alignment to update */
    int column)                 /* called with: column index for the
                                 * column to delete */
{
    int ii;

    for (ii = 0; ii < thisAlign->rows; ii++) {
        assert(isupper(thisAlign->sequence[column][ii])
               || thisAlign->sequence[column][ii] == '-');
        if (isupper(thisAlign->sequence[column][ii])) {
            thisAlign->sequence[column][ii]
                    = tolower(thisAlign->sequence[column][ii]);
       } else {
            thisAlign->sequence[column][ii] = '.';
       }
    }
    return;
}


/*
 * restoreAlignmentColumn - restore a column that was deleted previously
 *
 * This function "restores" a column that had been deleted previously
 * by converting it from an indel column to a match column.
 */
void restoreAlignmentColumn(
    AlignSummary *thisAlign,    /* called with: structure detailing the
                                * alignment to update */
    int column)                 /* called with: column index for the
                                 * column to delete */
{
    int ii;

    for (ii = 0; ii < thisAlign->rows; ii++) {
        assert(islower(thisAlign->sequence[column][ii])
               || thisAlign->sequence[column][ii] == '.');
        if (islower(thisAlign->sequence[column][ii])) {
            thisAlign->sequence[column][ii]
                    = toupper(thisAlign->sequence[column][ii]);
        } else {
            thisAlign->sequence[column][ii] = '-';
        }
    }
}


/*
 * alignSequencesReviseAlignments - align the two copies of the one sequence
 * 			  to each other, and insert any missing pieces into 
 * 			  the alignment.
 *
 * This function goes into the two alignments and looks at the two versions
 * of the specified sequence.  If the two versions are different, it creates
 * an alignment of the two sequences.  Then, using this alignment, it 
 * identifies any regions missing from one of the input alignments.  It adds
 * these regions to the input alignment as additional columns: the sequence
 * specified matches these columns, all others have a delete in that position.
 */
void alignSequencesReviseAlignments(
    AlignSummary *align1,     /* called with: one of the two alignments */
    AlignSummary *align2,     /* called with: the other of the two alignments */
    char *structName1,        /* called with: name in the first alignment 
			       * of the sequence whose alignment is being 
			       * checked */
    char *structName2)        /* called with: name in the second alignment 
			       * of the sequence whose alignment is being 
			       * checked */
{
    AlignSummary *sameSeqAlignment;
    int column_index, seq_index;
    int is_okay = TRUE;
    int sequenceIndex1, sequenceIndex2;

    sequenceIndex1 = findSequenceIndex(structName1, align1);
    assert(sequenceIndex1 >= 0);
    sequenceIndex2 = findSequenceIndex(structName2, align2);
    assert(sequenceIndex2 >= 0);
    /*
     * First off, we want to know if the template sequences are
     * identical.  If they are, we can shortcut this process
     * significantly.
     */
    if (!sequencesIdentical(align1, structName1, align2, structName2)) {
        sameSeqAlignment = malloc(sizeof(AlignSummary));
        assert(sameSeqAlignment != NULL);
        is_okay = getSequenceAlignment( align1, align2, sequenceIndex1, 
				sequenceIndex2, structName1, sameSeqAlignment);
        assert(is_okay = TRUE);
        
        /*
         * Go through this alignment.  Wherever the sequence in align1 has
	 * positions missing from align2 (or vise versa), add the missing
	 * positions to the alignment as new columns.  The way the missing
	 * positions will be noted is this.  In the same-sequence alignment,
	 * there will be three possibilities: 
	 * (1) both sequences will have the same residue.  All is well, 
	 *     we continue.
	 * (2) Both sequence have residues, but they're different
	 *     residues.  What's necessary then is to take the residue
	 *     from the first sequence and add it to the second, then 
	 *     take the residue from the second sequence and add it to
	 *     the first.
	 * (2) One seqeunce will insert or delete a residue relative
	 *     to the other sequence.  In that case, one sequence will
	 *     have an alphabetic character, and the other won't.  The
	 *     character will be inserted in whatever sequence doesn't
	 *     have it.
         */
	seq_index = 0;
        for (column_index = 0; 
	     column_index < sameSeqAlignment->columns; column_index++) {
            if (isalpha(sameSeqAlignment->sequence[column_index][0]) 
		&& !isalpha(sameSeqAlignment->sequence[column_index][1])) {
                addResidueToAlignedSeq(align2, sequenceIndex2, seq_index,
		     toupper(sameSeqAlignment->sequence[column_index][0]));
            } else if (isalpha(sameSeqAlignment->sequence[column_index][1]) &&
		      !isalpha(sameSeqAlignment->sequence[column_index][0])) {
                   addResidueToAlignedSeq(align1, sequenceIndex1, seq_index,
		         toupper(sameSeqAlignment->sequence[column_index][1]));
            } else if (toupper(sameSeqAlignment->sequence[column_index][0]) 
		      != toupper(sameSeqAlignment->sequence[column_index][1])) {
		addResidueToAlignedSeq(align2, sequenceIndex2, seq_index,
		          toupper(sameSeqAlignment->sequence[column_index][0]));
		seq_index++;
                addResidueToAlignedSeq(align1, sequenceIndex1, seq_index,	
		          toupper(sameSeqAlignment->sequence[column_index][1]));
            } 
	    seq_index++;
        } 
        freeAlignSummary(sameSeqAlignment);
    }
    return;
}


/*
 * readStringFromFile - read from the specified file a string of 
 *		        unknown length, allocating memory as you go.
 *
 * This function reads and allocates memory for a string terminated by
 * the carriage return character.
 *
 * return value: TRUE if the read went okay, FALSE if there was an 
 *		 an I/O or memory error.
 */
int readStringFromFile(
    FILE *fp,			/* called with: the input file, open for
				 * reading, and advanced to the beginning
				 * of the string. */
    char *filename,		/* name of the file, for error reporting
				 * purposes */
    char **stringFromFile) 	/* return with: the string as read
				 * from the file */
{
    int stringLength, oldStringLength;
    int readingString;
    char *oldString = NULL;
    char *newString = "";
    char buffer[BUFSIZE];
    char *crPtr;
    int is_okay = TRUE;

    readingString = TRUE;
    stringLength = 0;
    while (is_okay && readingString) {
        if (fgets(buffer, BUFSIZE, fp) == NULL) {
            fprintf(stderr, "Error: premature EOF in file %s\n", filename);
            is_okay = FALSE;
        } else {
            if ((crPtr = index(buffer, '\n')) != NULL) {
                *crPtr = '\0';
            }
	    if (strlen(buffer) > 0) {
                oldStringLength = stringLength;
                stringLength += strlen(buffer);
                if (oldString != NULL) free(oldString);
                oldString = newString;
                newString = calloc(stringLength, sizeof(char));
                assert(newString != NULL);
                strncpy(newString, oldString, oldStringLength);
                strcat(newString, buffer);
	    }
            if (strlen(buffer) < BUFSIZE - 1) {
                readingString = FALSE;
            }
        }
    }
    *stringFromFile = newString;
    return(is_okay);
}


/*
 * readNextSeq - read the data on the next sequence 
 *
 * This function reads the data on the next sequence from the
 * specified stream, and fills in the data in the passed seqNode
 * struct.
 *
 * return value: boolean flag set to TRUE if the data was read 
 * 	successfully, FALSE if any errors were encountered.
 */
int readNextSeq(
    FILE *fp,		/* called with: the stream with the sequence
			 * data, open for reading */
    char *filename,	/* called with: name of the file, used in
			 * error messages */
    int seqNumber,	/* called with; what sequence we're on, used
			 * in error messages */
    seqNode *nextSeq)	/* called with: allocated but empty seqNode
			 * structure.  
			 * Return with: structure with fields filled in. */
{
    char buffer[BUFSIZE];
    int is_okay = TRUE;
    int ii;
    char *spacePtr;
    char *oldSeq, *newSeq, *nextSubSeq;
    int seqLength, oldSeqLength;
    char nextChar;

    /* 
     * read the label line.  Check that it is a label line, and extract
     * and save the label.
     */
    if (fgets(buffer, BUFSIZE-1, fp) == NULL) {
        fprintf(stderr, "Error: premature EOF on sequence %d, file %s\n",
			seqNumber, filename);
        is_okay = FALSE;
    } else {
        if (buffer[0] != LABEL_MARKER) {
            fprintf(stderr, 
		    "Error: alignment file %s is missing sequence label %d\n",
		    filename, seqNumber);
            is_okay = FALSE;
        } else {
            for (ii = 1; isalnum(buffer[ii]); ii++) {
                nextSeq->label[ii-1] = buffer[ii];
            }
            nextSeq->label[ii-1] = '\0';
        }
    } 

    /*
     * read the next sequence from the input file.  The sequence
     * will be one or more strings of indefinite length, each terminated
     * by a carriage return.  When the next character in the file is
     * finally a '>', then we've reached the end of the sequence.
     */
    newSeq = oldSeq = NULL;
    seqLength = 0;
    nextChar = fgetc(fp);
    ungetc(nextChar, fp);
    while (is_okay && !feof(fp) && nextChar != '>') {
        if (oldSeq != NULL) free(oldSeq);
	oldSeq = newSeq;
	oldSeqLength = seqLength;
        is_okay = readStringFromFile(fp, filename, &nextSubSeq);
	if (is_okay) {
	    seqLength += strlen(nextSubSeq);
	    newSeq = calloc(seqLength, sizeof(char));
	    strncpy(newSeq, oldSeq, oldSeqLength);
	    strcat(newSeq, nextSubSeq);
	    free(nextSubSeq);
            nextChar = fgetc(fp);
	    if (!feof(fp)) ungetc(nextChar, fp);
	}
    }
    nextSeq->sequence = newSeq;

    /*
     * sometimes, a string is delimited with some blanks for readability.
     * If the string has any spaces in it, remove them.  
     */
    while ((spacePtr = index(nextSeq->sequence, ' ')) != NULL) {
	strcpy(spacePtr, spacePtr+1);
    }
    return(is_okay);
}

					
/*
 * readMultipleAlignment - read the multiple alignment data from
 *		the specified file into a linked list structure
 *
 * This function reads a FASTA-format multiple alignment from
 * the specified file into a linked list structure.  Additionally,
 * this module checks that there is a minimum number of aligned
 * sequences, the number specified by the calling program.
 *
 * Return value: flag set to TRUE if the data could be read
 *	successfully, the file was in the expected format, and
 *      there were at least two aligned sequences;
 *      FALSE otherwise.
 */
int readMultipleAlignment(
    char *filename,             /* called with: name of the file with the
                                 * alignment data, used in error messages */
    int minNumberAlignedSeqs,	/* called with: smallest number of
				 * aligned sequences we could return 
				 * with, and still have an alignment
				 * of interest to the caller */
    seqNode **seqList,		/* return with: a linked list
				 * representation of the sequence
				 * data */
    int *numberSequences)	/* return with: number of sequences
				 * read */
{
    FILE *fp;
    int isOkay = TRUE;
    seqNode *firstSeq, *lastSeq,  *nextSeq;
    int numberSeqsRead = 0;

    fp = fopen(filename, "r");
    if (fp == NULL) {
        fprintf(stderr, "Error: could not read alignment file %s\n", filename);
        isOkay = FALSE;
    } 
    firstSeq = lastSeq = (seqNode *) NULL;
    while (isOkay && !feof(fp)) {
        nextSeq = (seqNode *) malloc(sizeof(seqNode));
        assert(nextSeq != NULL);
	nextSeq->sequence = NULL;
        nextSeq->next = (seqNode *) NULL;
        isOkay = readNextSeq(fp, filename, numberSeqsRead+1, nextSeq);

        /* 
         * now for the bookkeeping: update the linked list, the number
         * of sequences, and the length of the longest sequence 
         */
        if (isOkay) {
            numberSeqsRead++;
            if (lastSeq == NULL) {
                firstSeq = lastSeq = nextSeq;
            } else {
               lastSeq->next = nextSeq;
               lastSeq = lastSeq->next;
            }
        }
    }

    /*
     * and now for the cleanup.  If the file was opened, close it and
     * look at how many sequences were read.  If this number is less
     * than the minimum number to make this alignment interesting, 
     * print and error message and set the return value to indicate
     * an error.
     */
    if (fp != NULL) {
        fclose(fp);
        if (numberSeqsRead < minNumberAlignedSeqs) {
            fprintf(stderr, 
            "Error: alignment %s has only %d sequences, at least %d expected\n",
                filename, numberSeqsRead, minNumberAlignedSeqs);
            isOkay = FALSE;
        }
    }
    *seqList = firstSeq;
    *numberSequences = numberSeqsRead;
    return(isOkay);
}


/*
 * copyFromSeqNodeToAlignedSeq - copy the alignment data from a linked
 * 	list of seqNode structures to the fields of the alignedSeq struct.
 *
 * Make sure all sequences have same number of match columns 
 * and figure out where they go in the final alignment.
 *
 * copy the data from the temporary linked list to the permanent data
 * structure, and free the nodes of the linked list.
 *
 *
 * return value: boolean value set to TRUE if the operation was successful,
 *		FALSE otherwise. 
 */
int copyFromSeqNodeToAlignedSeq(
    seqNode *seqList,		/* called with: head of the linked list
				 * of seqNode structures */
    int numberSequences,	/* called with: number of sequences in the
				 * structure */
    AlignSummary *thisAlign)	/* called with: target data structure,
				 * allocated but with empty alignment fields.
				 * return with: structure with multiple
				 * alignment fields filled in */
{
    seqNode *lastSeqNode, *nextSeqNode;
    int ii, jj; 
    int is_okay = TRUE;

    int row, col;	/* counters for sequence and position in sequence */
    char *seq;
    int num_match=0;	/* number of match columns in first sequence */
    int check_match;	/* number of match columns in sequence to
			 * compare to num_match
			 */
    
    int num_before;	/* number of insertions immediately before current
			 * position in current row.
			 */
    int *before_match;	/* before_match[i] is max number of insertions
			 * immediately before match column i.
			 * One extra position for final inserts.
			 */
    int *where_match;	/* where_match[i] is location for ith match column
			 * (cumulative sum of before_match +i)
			 * One extra position for full alignment width.
			 */
    int TotalColumns;	/* number of columns needed when insertions
			 * are all padded to make match columns align.
			 */
			 
    /* count the number of match columns in the first sequence */
    assert(seqList);
    seq = seqList->sequence;
    assert(seq);
    for (col=0; seq[col]; col++)
    {    if (isupper(seq[col]) || seq[col] == '-')
	    num_match++;
    }
    /* every sequence must have num_match match columns */
    
    before_match = calloc(num_match+1, sizeof(int));
    where_match = calloc(num_match+1, sizeof(int));
    
    /* verify number of match columns
     * count insertions before each match position for each sequence 
     */
    for (nextSeqNode = seqList, row = 0; nextSeqNode != NULL; 
			nextSeqNode = nextSeqNode->next, row++)
    {	seq = nextSeqNode->sequence;
	assert(seq);
	check_match=0;
	num_before=0;
	for (col=0; seq[col]; col++)
	{   if (islower(seq[col]) || seq[col] == '.')
		num_before++;
	    else if (isupper(seq[col]) || seq[col] == '-')
	    {   if (check_match >= num_match)
		{  fprintf(stderr, 
			"Error: row %d (%s) has more match columns than row 0\n",
			row, nextSeqNode->label);
		    return FALSE;
		}
		if (num_before>before_match[check_match])
		    before_match[check_match] = num_before;
		num_before=0;
		check_match++;
	    }
	}
	if (check_match!=num_match)
	{  fprintf(stderr, 
		"Error: row %d (%s) has fewer match columns than row 0\n",
		row, nextSeqNode->label);
	    return FALSE;
	}
	if (num_before>before_match[check_match])
	    before_match[check_match] = num_before;
    }

    /* Now have number of insertions needed before each match column
     * Add them up to figure out where matches go.
     */
    num_before=0;	/* cumulative number before match */
    for(col=0; col<=num_match; col++)
    {   where_match[col] = num_before + before_match[col];
        num_before = where_match[col] + 1;
    }
    /* Note: where_match[num_match] is the total number of columns needed
     */
    TotalColumns = where_match[num_match];
    
    /*
     * now, allocate the target fields
     */
    thisAlign->label = calloc(numberSequences, sizeof(char *));
    assert(thisAlign->label != NULL);
    thisAlign->sequence = calloc(TotalColumns, sizeof(char *));
    assert(thisAlign->sequence != NULL);
    for (ii = 0; ii < TotalColumns; ii++) 
    {
        thisAlign->sequence[ii] = calloc(numberSequences, sizeof(char));
        assert(thisAlign->sequence[ii] != NULL);
    }
    /* 
     * now, go through the nodes of the linked list.  At each node,
     * copy the data into the target struct and free the node.
     */
    for (nextSeqNode = seqList, row = 0; nextSeqNode != NULL;  row++) 
    {
        assert(row < numberSequences);
	thisAlign->label[row] = strdup(nextSeqNode->label);
        assert(thisAlign->label[row] != NULL);
	
	/* go through sequence filling in position */
	col=0;	check_match=0;
	for(seq  = nextSeqNode->sequence;	*seq; seq++)
   	{   assert(col <= TotalColumns);
	    if (islower(*seq) || *seq == '.')
	    {	thisAlign->sequence[col++][row] = *seq;
	    }
	    else if (isupper(*seq) || *seq == '-')
	    {   while (col<where_match[check_match])
		{    thisAlign->sequence[col++][row] = '.';
		}
		thisAlign->sequence[col++][row] = *seq;
		check_match++;
	    }
	}
	while (col<TotalColumns)
	{   thisAlign->sequence[col++][row] = '.';
	}
	/* make sure that the entire sequence properly copied */
	assert(col==TotalColumns);
	assert(check_match == num_match);

	lastSeqNode = nextSeqNode;
	nextSeqNode = nextSeqNode->next;
	free(lastSeqNode->sequence);
        free(lastSeqNode);
    }
    assert(row == numberSequences);
    thisAlign->columns = TotalColumns;
    thisAlign->rows = numberSequences;
    free(before_match);
    free(where_match);
    return(is_okay);
}

/*
 * getMultipleAlignment - read a multiple alignment into the
 *			   alignStruct fields
 *
 * This function reads an alignment from the specified input file,
 * and fills in the relevant fields in the alignSummary
 * struct.  It assumes that the alignment is in FASTA format.
 *
 * Return value: boolean flag set to TRUE if the operation was
 * successful, FALSE otherwise.
 */
int getMultipleAlignment(
    char *filename,		/* called with: name of the file
				 * containing the alignment data */
    int minNumberAlignedSeqs,	/* the smallest number of aligned
				 * sequences expected by the caller */
    AlignSummary *thisAlign)	/* called with: pointer to a struct
				 * already allocated but with no alignment.
				 * return with: struct with the 
				 * alignment fields filled in */
{
    int is_okay = FALSE;
    int numberSequences = 0;
    seqNode *seqList = NULL;
    seqNode *seqPtr, *nextSeqPtr;

    /*
     * a linked list makes sequence data easy to read, but hard to
     * work with afterwards.  An array is easy to work with, but hard
     * to read into in the case of sequence data.  So, we will read
     * the sequence data into a linked list structure, and then
     * copy the data into an array structure and delete the linked 
     * list.
     */
    is_okay = readMultipleAlignment(filename, minNumberAlignedSeqs,
				    &seqList, &numberSequences);
    if (is_okay) {
        is_okay = copyFromSeqNodeToAlignedSeq(seqList, numberSequences, 
					      thisAlign); 
    }
    return(is_okay);
}    


/*
 * readAlignment - read the alignment, filling in the target and
 * 		template fields if specified.
 *
 * This function takes as input a filename containing an alignment
 * and returns with an AlignSummary struct representing the alignment.
 * If the gotTemplate and gotTarget flags are TRUE, it will
 * require that the specified target and template sequences exist in
 * the alignment, and will fill in the target and template fields in
 * the structure.  If the flags are FALSE, it will leave the
 * target and template structure fields NULL.
 *
 * If template and/or target is not specified, the code will check
 * if either alignment is pairwise and will assign template and/or
 * target by the sequence names in the pairwise alignment.  If neither
 * template nor target were specified and the alignment is pairwise,
 * it will choose one at random to be target and one to be template
 *
 */
int readAlignment(
    char *alignFilename,	/* called with: file containing the
				 * alignment */
    char **templateName,        /* called with: pointer to the name of
                                 * the template sequence, or to NULL if
                                 * the sequence name is not yet known */
    char **targetName,          /* called with: pointer to the name of
                                 * the target sequence, or to NULL if
                                 * the sequence name is not yet known.  */
    int minNumberAlignedSeqs,	/* called with: fewest number of sequences
				 * expected in the alignment by the caller */
    AlignSummary **align)  	/* return with: pointer to the 
				 * filled-in alignment struct */
{
    AlignSummary *thisAlign;
    int is_okay = TRUE;
    int returnValue;
    char *baseFilenamePtr;
    char baseFilename[FILENAME_LENGTH + 1];
    char tmpFilename[FILENAME_LENGTH + 1];
    char cmdBuffer[COMMAND_LENGTH + 1];

    thisAlign = malloc(sizeof(AlignSummary));
    assert(thisAlign != NULL);
    thisAlign->rows = 0;
    thisAlign->columns = 0;
    thisAlign->label = NULL;
    thisAlign->sequence = NULL;
    strcpy(thisAlign->filename, alignFilename);
    if (hasSuffix(alignFilename, ".gz")) {
	/*
	 * If the alignment is gzipped, make a gunzipped copy of it on
	 * /tmp.  Identify it with the process ID to make it unique
         * and the filename of the alignment, to make it traceable
	 * if something blows up.  Note that we want the filename but
	 * not the pathname, so strip out any directory specification.
	 */
	if (strchr(alignFilename, '/') != NULL) {
	    baseFilenamePtr = strrchr(alignFilename, '/');
	    baseFilenamePtr++;
            strcpy(baseFilename, baseFilenamePtr);
        } else {
	    strcpy(baseFilename, alignFilename);
	}
	sprintf(tmpFilename, "/tmp/%s.%d", baseFilename, getpid());
	sprintf(cmdBuffer, "gunzip -c %s > %s", alignFilename, tmpFilename);
        system(cmdBuffer);
        is_okay = getMultipleAlignment(tmpFilename,
				       minNumberAlignedSeqs, thisAlign);
	returnValue = unlink(tmpFilename);
	assert(returnValue == 0);
    } else {
        is_okay = getMultipleAlignment(alignFilename, 
				       minNumberAlignedSeqs, thisAlign);
    }
    if (is_okay) {
        /*
         * If the template and/or target names were not specified,
         * if the alignment is pairwise, then deduce the template
	 * and target names from the sequence names.
         */
        if (*templateName == NULL) {
            if (thisAlign->rows == 2) {
                if (*targetName == NULL) {
                    *templateName = strdup(thisAlign->label[0]);
                } else if (strcasecmp(thisAlign->label[0], *targetName) == 0) {
                    *templateName = strdup(thisAlign->label[1]);
                } else {
                    *templateName = strdup(thisAlign->label[0]);
                }
            }
        }
        if (*targetName == NULL) {
            if (thisAlign->rows == 2) {
                if (*templateName == NULL) {
                    *targetName = strdup(thisAlign->label[0]);
                } else if (strcasecmp(thisAlign->label[0], *templateName)==0) {
                    *targetName = strdup(thisAlign->label[1]);
                } else {
                    *targetName = strdup(thisAlign->label[0]);
                }
            }
        }
    }
    if (!is_okay) freeAlignSummary(thisAlign);
    *align = thisAlign;
    return(is_okay);
}


/*
 * readReferenceCandidate - read the reference and candidate alignments,
 *			make the alignments consistent with each other
 *			if necessary, and return with two aligned
 *			sequence structs.
 *
 * This function makes calls to read the reference and candidate 
 * alignments, given their pathnames and optionally given the
 * template and target names.  
 *
 * return value: TRUE if the entire operation went okay, FALSE otherwise.
 */
int readReferenceCandidate(
    char *referenceA2mName,	/* called with: filename of the
				 * reference alignment */
    char *candidateA2mName,	/* called with: filename of the 
				 * candidate alignment */
    char **templateName,        /* called with: pointer to the name of
                                 * the template sequence, or to NULL if
                                 * the sequence name is not yet known */
    char **targetName,          /* called with: pointer to the name of
                                 * the target sequence, or to NULL if
                                 * the sequence name is not yet known.  */
    char *workingCandidateFileName,
				/* called with: name of a file for
				 * printing out the working version of
				 * the candidate alignment, NULL if
				 * no owrking version is requested */
    int minNumberAlignedSeqs,	/* called with: the fewest aligned
				 * sequences that would still make this
				 * alignment interesting to the caller */
    AlignSummary **reference,   /* return with: reference alignment data */
    AlignSummary **candidate)	/* return with: candidate alignment data */
{
    int is_okay;

    is_okay = readAlignment(referenceA2mName, templateName, 
		    targetName, minNumberAlignedSeqs, reference);
    if (is_okay) {

	is_okay = readAlignment(candidateA2mName, templateName,
			targetName, minNumberAlignedSeqs, candidate);

         /*
          * If we're claiming to have a target and / or template
          * sequence in both alignments, make sure that the sequence
          * truly exists in both alignments.
          */
         if (is_okay && *templateName != NULL) {
             if (findSequenceIndex(*templateName, *reference) == NOT_FOUND) {
                 fprintf(stderr,
                         "Error: sequence %s not found in alignment %s\n",
                         *templateName, (*reference)->filename);
                 is_okay = FALSE;
             }
             if (findSequenceIndex(*templateName, *candidate) == NOT_FOUND) {
                 fprintf(stderr,
                         "Error: sequence %s not found in alignment %s\n",
                         *templateName, (*candidate)->filename);
                 is_okay = FALSE;
             }
         }
         if (is_okay && *targetName != NULL) {
             if (findSequenceIndex(*targetName, *reference) == NOT_FOUND) {
                 fprintf(stderr,
                         "Error: sequence %s not found in alignment %s\n",
                         *targetName, (*reference)->filename);
                 is_okay = FALSE;
             }
             if (findSequenceIndex(*targetName, *candidate) == NOT_FOUND) {
                 fprintf(stderr,
                         "Error: sequence %s not found in alignment %s\n",
                         *targetName, (*candidate)->filename);
                 is_okay = FALSE;
             }
         }


	 /*
	  * check that the two versions of the target and template
	  * sequences are identical, and resolve any differences.
	  * but first, make sure we know what the target and template
	  * sequences are.  If we don't, we need to go yell at the
	  * user about the arguments specified.
	  */
         if (is_okay && *targetName != NULL && *templateName != NULL) {
	     if (!sequencesIdentical(*reference, *templateName,
				    *candidate, *templateName)) {
		alignSequencesReviseAlignments(*reference, *candidate, 
					*templateName, *templateName); 
	    }
	    if (!sequencesIdentical(*reference, *targetName,
				    *candidate, *targetName)) {
		alignSequencesReviseAlignments(*reference, *candidate,
						*targetName, *targetName); 
            }

	    /*
	     * At this point, we have modified the candidate alignment
	     * to resolve any differences between the two copies of the
	     * template and target sequences.  If the caller has
	     * requested a dump of the working version of the candidate
	     * alignment, here's the spot.
	     */
	    if (workingCandidateFileName != NULL) {
		printAlign(*candidate, workingCandidateFileName);
	    }
	}
    }
#ifdef DEBUG    
    if (is_okay)
    {	fprintf(stderr, "target=%s, template=%s\n", *targetName, *templateName);
	fprintf(stderr, "reference alignment has %d columns (%d match)\n",
		(*reference)->columns, count_match(*reference)); 
        fprintf(stderr, "candidate alignment has %d columns (%d match)\n",
		(*candidate)->columns, count_match(*candidate)); 
    }
#endif    
    return(is_okay);
}


/*
 * printAlign - print out the alignment in an AlignSummary struct,
 *              in FASTA format.
 */
void printAlign(
    AlignSummary *thisAlign,    /* called with: alignment in question */
    char *a2mName)              /* called with: filename for the alignment
                                 * file */
{
    FILE *fp;
    int ii,jj;

    fp = fopen(a2mName, "w");
    if (fp == NULL) {
        fprintf(stderr, "Error: cannot open %s for writing!", a2mName);
   } else {
        for (ii = 0; ii < thisAlign->rows; ii++) {
            fprintf(fp, ">%s\n", thisAlign->label[ii]);
            for (jj = 0; jj < thisAlign->columns; jj++) {
                fputc(thisAlign->sequence[jj][ii], fp);
                if ((jj + 1) % RESIDUES_PER_LINE == 0) {
                    fputc('\n', fp);
                }
            }
            fprintf(fp, "\n");
        }
        fclose(fp);
    }
}