/* * pdbsummary.c - contains functions to read a PDB summary file * * Update history: * 6/9/98 cline author */ #include #include #include #include #include "alignsummary.h" #include "compute_shift.h" #include "pdbsummary.h" /* * These constants represent strings in the PDB summary file */ #define PDBID_STRING "PDBID" #define SEQRES_STRING "SEQRES" #define SEQSEQ_STRING "SEQSEQ" /* * the constant below marks PDB sequence positions with no PDB numbers */ #define NO_PDB_NUMBER "_" /* * Below are constants for lengths of filenames, command buffers, etc. */ #define PATHNAME_LENGTH 256 #define COMMAND_BUFFER_LENGTH 256 #define INPUT_BUFFER_LENGTH 337 /* * PROTOTYPES */ /* * CODE */ /* * readPdbId - read the PDB ID line and check it against the expected PDBID * * return value: TRUE if we read the expected PDBID, FALSE otherwise. */ int readPdbId( FILE *fp, /* called with: pointer to the PDB summary * input stream, open for reading */ char *templateName) /* called with: expected pdbID */ { char input_buffer[INPUT_BUFFER_LENGTH]; int isOkay = TRUE; fscanf(fp, "%s ", input_buffer); if (strcmp(input_buffer, PDBID_STRING) != 0) { fprintf(stderr, "Error in the PDB summary: read %s, expected %s\n", input_buffer, PDBID_STRING); isOkay = FALSE; } else { fscanf(fp, "%s ", input_buffer); if (strcmp(input_buffer, templateName) != 0) { fprintf(stderr, "Error in the PDB summary: ID %s read, expected ID %s\n", input_buffer, templateName); isOkay = FALSE; } } return(isOkay); } /* * readSeqres - read the SEQRES line of the PDB summary file * * Read the SEQRES line and return with the sequence it labels * * Return value: TRUE if we could read and allocate memory for the * sequence, FALSE otherwise. */ int readSeqres( FILE *fp, /* called with: pointer to the PDB summary * data, open for reading */ char **templateSeq) /* return with: the PDB seqres sequence, * allocated in memory. */ { char input_buffer[INPUT_BUFFER_LENGTH]; int isOkay = TRUE; fscanf(fp, "%s ", input_buffer); if (strcmp(input_buffer, SEQRES_STRING) != 0) { fprintf(stderr, "Error in the PDB summary: read %s, expected %s\n", input_buffer, SEQRES_STRING); isOkay = FALSE; } else { isOkay = readStringFromFile(fp, "pdb-get-summary pipe", templateSeq); } return(isOkay); } /* * readSeqseq - read the seqseq record of the PDB file * * Read the SEQSEQ record, which contains one PDB number entry for each * sequence position in the SEQRES record. * * Return value: TRUE if we could read and allocate memory for the * sequence, FALSE otherwise. */ int readSeqseq( FILE *fp, /* called with: pointer to the * pdbsummary input stream */ int templateSeqLength, /* called with: length of the * template sequence */ char ***pdbNumbers) /* return with: pointer to an * array of strings, one cell * per template seq position, * containing the PDB number * for that position. */ { char input_buffer[INPUT_BUFFER_LENGTH]; int isOkay = TRUE; char **pdbArray = NULL; char *pdbnum; char *allPdbnumString; int pdbnumIndex; fscanf(fp, "%s ", input_buffer); if (strcmp(input_buffer, SEQSEQ_STRING) != 0) { fprintf(stderr, "Error in the PDB summary: read %s, expected %s\n", input_buffer, SEQSEQ_STRING); isOkay = FALSE; } else { isOkay = readStringFromFile(fp, "pdb-get-summary pipe", &allPdbnumString); if (isOkay) { pdbArray = calloc(templateSeqLength, sizeof(char *)); pdbnum = strtok(allPdbnumString, ","); pdbArray[0] = pdbnum; pdbnumIndex = 1; while (isOkay && (pdbnum = strtok(NULL, ",")) != NULL) { if (pdbnumIndex > templateSeqLength) { fprintf(stderr, "PDB summary error: %s and %s records are of different size\n", SEQRES_STRING, SEQSEQ_STRING); isOkay = FALSE; } else { pdbArray[pdbnumIndex] = pdbnum; pdbnumIndex++; } } } } *pdbNumbers = pdbArray; return(isOkay); } /* * createAlignSummary - make an empty AlignSummary structure to hold * the specified template sequence. */ void createAlignSummary( char *templateName, /* called with: name of the template * sequence */ char *templateSeq, /* called with: the new template sequence */ char *filename, /* called with: "filename" for the new * alignment */ AlignSummary **newAlign) /* return with: pointer to a pointer to * the new AlignSummary struct. */ { AlignSummary *alignTemplate = NULL; int ii; alignTemplate = malloc(sizeof(AlignSummary)); assert(alignTemplate != NULL); alignTemplate->columns = strlen(templateSeq);; alignTemplate->rows = 1; alignTemplate->label = calloc(alignTemplate->rows, sizeof(char *)); assert(alignTemplate->label != NULL); alignTemplate->label[0] = strdup(templateName); alignTemplate->sequence = calloc(alignTemplate->columns, sizeof(char *)); assert(alignTemplate->sequence != NULL); for (ii = 0; ii < alignTemplate->columns; ii++) { alignTemplate->sequence[ii] = calloc(alignTemplate->rows, sizeof(char)); assert(alignTemplate->sequence[ii] != NULL); alignTemplate->sequence[ii][0] = templateSeq[ii]; } *newAlign = alignTemplate; } /* * convertNonNumberedPositionsToInserts - for all residues that don't * have real PDB numbers, convert the positions to inserts. */ void convertNonNumberedPositionsToInserts( int pdbListSize, /* called with: number of PDB numbers / sequence * positions here */ char **pdbNumList, /* called with: list of the PDB numbers */ char *seqFromPdb) /* called with: the sequence as it was read * from PDB. Return with: same sequence, but * with certain states switched to lowercase. */ { int ii; for (ii = 0; ii < pdbListSize; ii++) { if (strcmp(pdbNumList[ii], NO_PDB_NUMBER) == 0) { seqFromPdb[ii] = tolower(seqFromPdb[ii]); } } } /* * getPdbsumAlignment - read the pdb summary for the specified template * structure. Store the SEQRES sequence in an AlignSummary structure, * and the list of PDB "numbers" in 2D string array. * * return value: TRUE if everything worked successfully. FALSE if * there was a pipe failure, memory failure, pdb-get-summary failure, etc. */ int getPdbsumAlignment( char *templateName, /* called with: name of the template * structure */ char ***pdbNumberList, /* return with: pointer to an array * of PDB numbers */ int *pdbListSize, /* return with: number of template * sequence residues and PDB numbers, * will be the same (assumed & tested) */ char **seqReadFromPdb, /* return with: the sequence as read * from the PDB, for later error checking */ AlignSummary **templateSeq) /* return with: an alignsummary structure * with the template seq in one row, PDB * numbers in the other row. */ { FILE *fp; char command[COMMAND_BUFFER_LENGTH]; int isOkay = TRUE; char **pdbNumbers; /* * create a shell command for "pdb-get-summary -cat