From 2327244873a9cc88120433bd1c68ea3757d839ea Mon Sep 17 00:00:00 2001 From: gb10 <gb10> Date: Tue, 5 Oct 2010 15:51:21 +0000 Subject: [PATCH] Added a type for BlxSequence --- blxFetch.c | 14 +++++----- blxparser.c | 6 ++--- blxview.c | 78 ++++++++++++++++++++++++++++++++--------------------- blxview.h | 20 +++++++++++--- utilities.c | 18 ++++++++++--- utilities.h | 2 ++ 6 files changed, 91 insertions(+), 47 deletions(-) diff --git a/blxFetch.c b/blxFetch.c index be818c57..78a784f7 100644 --- a/blxFetch.c +++ b/blxFetch.c @@ -38,7 +38,7 @@ * HISTORY: * Last edited: Aug 21 17:34 2009 (edgrif) * Created: Tue Jun 17 16:20:26 2008 (edgrif) - * CVS info: $Id: blxFetch.c,v 1.39 2010-10-05 15:23:02 gb10 Exp $ + * CVS info: $Id: blxFetch.c,v 1.40 2010-10-05 15:51:21 gb10 Exp $ *------------------------------------------------------------------- */ @@ -928,8 +928,8 @@ gboolean populateFastaDataPfetch(GList *seqsToFetch, const char *pfetchIP, int p * - can be called after the fasta sequence data is already populated: * in that case it will ignore the sequence data and just populate * the additional data. - * - sequence data will also be ignored for sequences that have the - * sequenceReqd flag set to false + * - sequence data will also be ignored for sequences that do not + * require sequence data */ gboolean populateFullDataPfetch(GList *seqsToFetch, const char *pfetchIP, int port, gboolean External, const BlxSeqType seqType, GError **error) { @@ -2155,7 +2155,7 @@ static gboolean pfetchGetParserStateFromId(const char *sectionId, if (stringsEqual(sectionId, "SQ", TRUE)) { /* Skip the sequence section if the sequence is already populated or not required. */ - if (currentSeq->sequence && (currentSeq->sequence->str || !currentSeq->sequenceReqd)) + if (currentSeq->sequence && (currentSeq->sequence->str || !blxSequenceRequiresSeqData(currentSeq))) { *parserState = PARSING_IGNORE; } @@ -2164,15 +2164,15 @@ static gboolean pfetchGetParserStateFromId(const char *sectionId, *parserState = PARSING_SEQUENCE_HEADER; } } - else if (stringsEqual(sectionId, "OS", TRUE) && currentSeq->optionalDataReqd) + else if (stringsEqual(sectionId, "OS", TRUE) && blxSequenceRequiresOptionalData(currentSeq)) { *parserState = PARSING_ORGANISM; } - else if (stringsEqual(sectionId, "GN", TRUE) && currentSeq->optionalDataReqd) + else if (stringsEqual(sectionId, "GN", TRUE) && blxSequenceRequiresOptionalData(currentSeq)) { *parserState = PARSING_GENE_NAME; } - else if (stringsEqual(sectionId, "FT", TRUE) && currentSeq->optionalDataReqd) + else if (stringsEqual(sectionId, "FT", TRUE) && blxSequenceRequiresOptionalData(currentSeq)) { if (tagName && tagName->len) { diff --git a/blxparser.c b/blxparser.c index 63700ad9..c02b0593 100644 --- a/blxparser.c +++ b/blxparser.c @@ -34,7 +34,7 @@ * * 98-02-19 Changed MSP parsing to handle all SFS formats. * * 99-07-29 Added support for SFS type=HSP and GFF. * Created: 93-05-17 - * CVS info: $Id: blxparser.c,v 1.38 2010-08-27 12:25:14 gb10 Exp $ + * CVS info: $Id: blxparser.c,v 1.39 2010-10-05 15:51:21 gb10 Exp $ *------------------------------------------------------------------- */ @@ -903,7 +903,7 @@ static void parseEXBLXSEQBLExtended(MSP **lastMsp, MSP **mspList, BlxParserState g_error("Bad description data\n"); } } - else if ((parserState == SEQBL_X_BODY || mspIsSnp(msp)) && (strstr(seq_pos, BLX_SEQUENCE_TAG))) + else if ((parserState == SEQBL_X_BODY || mspIsVariation(msp)) && (strstr(seq_pos, BLX_SEQUENCE_TAG))) { if (*opts == 'L') { @@ -1782,7 +1782,7 @@ static BlxMspType getMspTypeFromScore(const int score) } else if (score == -3) { - result = BLXMSP_SNP; + result = BLXMSP_VARIATION; } return result; diff --git a/blxview.c b/blxview.c index 9799e137..86fce988 100644 --- a/blxview.c +++ b/blxview.c @@ -88,7 +88,7 @@ 01-10-05 Added getsseqsPfetch to fetch all missing sseqs in one go via socket connection to pfetch [RD] * Created: Thu Feb 20 10:27:39 1993 (esr) - * CVS info: $Id: blxview.c,v 1.73 2010-10-05 15:23:02 gb10 Exp $ + * CVS info: $Id: blxview.c,v 1.74 2010-10-05 15:51:21 gb10 Exp $ *------------------------------------------------------------------- */ @@ -811,6 +811,28 @@ static gint compareFuncMspPos(gconstpointer a, gconstpointer b) return result; } + +static BlxSequenceType getBlxSequenceTypeForMsp(const MSP const *msp) +{ + BlxSequenceType result = BLXSEQUENCE_UNSET; + + if (mspIsBlastMatch(msp)) + { + result = BLXSEQUENCE_MATCH; + } + else if (mspIsExon(msp) || mspIsIntron(msp)) + { + result = BLXSEQUENCE_TRANSCRIPT; + } + else if (mspIsVariation(msp)) + { + result = BLXSEQUENCE_VARIATION; + } + + return result; +} + + /* Add or create a BlxSequence struct, creating the BlxSequence if one does not * already exist for the MSP's sequence name. Seperate BlxSequence structs are created * for the forward and reverse strands of the same sequence. The passed-in sequence @@ -839,12 +861,7 @@ BlxSequence* addBlxSequence(const char *name, const char *idTag, BlxStrand stran /* Create a new BlxSequence, and take ownership of the passed in sequence (if any) */ blxSeq = createEmptyBlxSequence(seqName, idTag, NULL); *seqList = g_list_prepend(*seqList, blxSeq); - blxSeq->strand = strand; - - /* Set whether the sequence data is required by any of this sequence's MSPs */ - blxSeq->sequenceReqd |= mspIsBlastMatch(msp) || mspIsVariation(msp); - blxSeq->optionalDataReqd |= mspIsBlastMatch(msp); } if (seqName && !blxSeq->fullName) @@ -859,6 +876,15 @@ BlxSequence* addBlxSequence(const char *name, const char *idTag, BlxStrand stran /* Add the MSP to the BlxSequence's list. Keep it sorted by position. */ blxSeq->mspList = g_list_insert_sorted(blxSeq->mspList, msp, compareFuncMspPos); msp->sSequence = blxSeq; + + if (blxSeq->type == BLXSEQUENCE_UNSET) + { + blxSeq->type = getBlxSequenceTypeForMsp(msp); + } + else if (blxSeq->type != getBlxSequenceTypeForMsp(msp)) + { + g_warning("Adding MSP of type %d to parent of type %d (expected parent type to be %d)\n", msp->type, blxSeq->type, getBlxSequenceTypeForMsp(msp)); + } } /* Add the sequence data */ @@ -1366,7 +1392,7 @@ void addBlxSequenceData(BlxSequence *blxSeq, char *sequence, GError **error) gboolean sequenceUsed = FALSE; - if (blxSeq && blxSeq->sequenceReqd) + if (blxSeq && blxSequenceRequiresSeqData(blxSeq)) { if (!blxSeq->sequence) { @@ -1644,31 +1670,23 @@ GList* getSeqsToPopulate(GList *inputList, const gboolean getSequenceData, const { BlxSequence *blxSeq = (BlxSequence*)(inputItem->data); - gboolean getSeq = FALSE; + /* Check if sequence data was requested and is not already set. */ + gboolean getSeq = (blxSequenceRequiresSeqData(blxSeq) && getSequenceData && blxSeq->sequence == NULL); + + /* Check if optional data was requested and is not already set. We can assume that + * if any of the data fields is set then the parsing has been done for all of them + * (and any remaining empty fields just don't have that data available) */ + getSeq |= (blxSequenceRequiresOptionalData(blxSeq) && + getOptionalData && + !blxSeq->organism && + !blxSeq->geneName && + !blxSeq->tissueType && + !blxSeq->strain); - /* We only want to get data for blast matches, which have the sequenceReqd flag set. */ - if (blxSeq->sequenceReqd) + if (getSeq) { - /* Check if sequence data was requested and is not already set. */ - getSeq = (getSequenceData && blxSeq->sequence == NULL); - - if (blxSeq->optionalDataReqd) - { - /* Check if optional data was requested and is not already set. We can assume that - * if any of the data fields is set then the parsing has been done for all of them - * (and any remaining empty fields just don't have that data available) */ - getSeq |= (getOptionalData && - !blxSeq->organism && - !blxSeq->geneName && - !blxSeq->tissueType && - !blxSeq->strain); - } - - if (getSeq) - { - resultList = g_list_prepend(resultList, blxSeq); - } - } + resultList = g_list_prepend(resultList, blxSeq); + } } return resultList; diff --git a/blxview.h b/blxview.h index 1b0029e1..97b7ae45 100644 --- a/blxview.h +++ b/blxview.h @@ -27,7 +27,7 @@ * Last edited: Aug 21 13:57 2009 (edgrif) * * Aug 26 16:57 1999 (fw): added this header * Created: Thu Aug 26 16:57:17 1999 (fw) - * CVS info: $Id: blxview.h,v 1.37 2010-10-05 15:23:02 gb10 Exp $ + * CVS info: $Id: blxview.h,v 1.38 2010-10-05 15:51:21 gb10 Exp $ *------------------------------------------------------------------- */ #ifndef DEF_BLXVIEW_H @@ -86,9 +86,23 @@ typedef enum } BlxStrand ; -/* Structure that contains information about a sequence */ +/* Type definition for BlxSequences */ +typedef enum +{ + BLXSEQUENCE_UNSET, + BLXSEQUENCE_TRANSCRIPT, /* transcript (i.e. collection of exons and introns) */ + BLXSEQUENCE_MATCH, /* match sequence (i.e. collection of matches) */ + BLXSEQUENCE_VARIATION /* variation (i.e. insertion, deletion or substitution) */ +} BlxSequenceType; + + +/* Structure that contains information about a "sequence" (bit of a misnomer because it also includes + * transcripts etc. for which it doesn't actually contain any sequence data. Really this is just a + * parent object for collections of MSPs). */ typedef struct _BlxSequence { + BlxSequenceType type; /* What type of collection of MSPs this is */ + char *idTag; /* Unique identifier e.g. from ID tag in GFF files */ char *fullName; /* full name of the sequence and variant, including prefix characters, e.g. EM:AV274505.2 */ @@ -102,8 +116,6 @@ typedef struct _BlxSequence BlxStrand strand; /* which strand of the sequence this is */ GString *sequence; /* the actual sequence data */ - gboolean sequenceReqd; /* whether the sequence data is required (e.g. it is not needed for exons/introns etc.) */ - gboolean optionalDataReqd; /* whether the optional data is required (e.g. it is not applicable to exons/snps etc.) */ IntRange qRange; /* the extent of the sequence on the ref sequence */ GList *mspList; /* list of MSPs from this sequence */ diff --git a/utilities.c b/utilities.c index 80f09fd2..ddc575ea 100644 --- a/utilities.c +++ b/utilities.c @@ -1794,7 +1794,8 @@ BlxSequence* createEmptyBlxSequence(const char *fullName, const char *idTag, GEr } BlxSequence *seq = g_malloc(sizeof(BlxSequence)); - + + seq->type = BLXSEQUENCE_UNSET; seq->idTag = idTag ? g_strdup(idTag) : NULL; seq->fullName = NULL; @@ -1804,8 +1805,6 @@ BlxSequence* createEmptyBlxSequence(const char *fullName, const char *idTag, GEr seq->mspList = NULL; seq->sequence = NULL; - seq->sequenceReqd = FALSE; - seq->optionalDataReqd = FALSE; seq->organism = NULL; seq->geneName = NULL; @@ -1816,6 +1815,19 @@ BlxSequence* createEmptyBlxSequence(const char *fullName, const char *idTag, GEr } +/* Returns true if the given BlxSequence requires sequence data to be set. */ +gboolean blxSequenceRequiresSeqData(const BlxSequence *blxSeq) +{ + return (blxSeq->type == BLXSEQUENCE_MATCH || blxSeq->type == BLXSEQUENCE_VARIATION); +} + +/* Returns true if the given BlxSequence uses optional data. */ +gboolean blxSequenceRequiresOptionalData(const BlxSequence *blxSeq) +{ + return (blxSeq->type == BLXSEQUENCE_MATCH); +} + + /* Round to nearest int (needed because there is no round() function in ISO C90) */ int roundNearest(const double val) { diff --git a/utilities.h b/utilities.h index 04d25cac..41d28f1c 100644 --- a/utilities.h +++ b/utilities.h @@ -221,6 +221,8 @@ BlxStrand mspGetMatchStrand(const MSP const *msp); const char* mspGetMatchSeq(const MSP const *msp); const char* mspGetSName(const MSP *msp); char* blxSequenceGetSummaryInfo(const BlxSequence const *blxSeq); +gboolean blxSequenceRequiresSeqData(const BlxSequence *blxSeq); +gboolean blxSequenceRequiresOptionalData(const BlxSequence *blxSeq); char* mspGetExonTranscriptName(const MSP *msp); const IntRange const* mspGetRefCoords(const MSP const *msp); const IntRange const* mspGetMatchCoords(const MSP const *msp); -- GitLab