From 2327244873a9cc88120433bd1c68ea3757d839ea Mon Sep 17 00:00:00 2001
From: gb10 <gb10>
Date: Tue, 5 Oct 2010 15:51:21 +0000
Subject: [PATCH] Added a type for BlxSequence

---
 blxFetch.c  | 14 +++++-----
 blxparser.c |  6 ++---
 blxview.c   | 78 ++++++++++++++++++++++++++++++++---------------------
 blxview.h   | 20 +++++++++++---
 utilities.c | 18 ++++++++++---
 utilities.h |  2 ++
 6 files changed, 91 insertions(+), 47 deletions(-)

diff --git a/blxFetch.c b/blxFetch.c
index be818c57..78a784f7 100644
--- a/blxFetch.c
+++ b/blxFetch.c
@@ -38,7 +38,7 @@
  * HISTORY:
  * Last edited: Aug 21 17:34 2009 (edgrif)
  * Created: Tue Jun 17 16:20:26 2008 (edgrif)
- * CVS info:   $Id: blxFetch.c,v 1.39 2010-10-05 15:23:02 gb10 Exp $
+ * CVS info:   $Id: blxFetch.c,v 1.40 2010-10-05 15:51:21 gb10 Exp $
  *-------------------------------------------------------------------
  */
 
@@ -928,8 +928,8 @@ gboolean populateFastaDataPfetch(GList *seqsToFetch, const char *pfetchIP, int p
  *  - can be called after the fasta sequence data is already populated:
  *    in that case it will ignore the sequence data and just populate
  *    the additional data.
- *  - sequence data will also be ignored for sequences that have the
- *    sequenceReqd flag set to false
+ *  - sequence data will also be ignored for sequences that do not 
+ *    require sequence data
  */
 gboolean populateFullDataPfetch(GList *seqsToFetch, const char *pfetchIP, int port, gboolean External, const BlxSeqType seqType, GError **error)
 {
@@ -2155,7 +2155,7 @@ static gboolean pfetchGetParserStateFromId(const char *sectionId,
   if (stringsEqual(sectionId, "SQ", TRUE))
     {
       /* Skip the sequence section if the sequence is already populated or not required. */
-      if (currentSeq->sequence && (currentSeq->sequence->str || !currentSeq->sequenceReqd))
+      if (currentSeq->sequence && (currentSeq->sequence->str || !blxSequenceRequiresSeqData(currentSeq)))
         {
           *parserState = PARSING_IGNORE;
         }
@@ -2164,15 +2164,15 @@ static gboolean pfetchGetParserStateFromId(const char *sectionId,
           *parserState = PARSING_SEQUENCE_HEADER;
         }
     }
-  else if (stringsEqual(sectionId, "OS", TRUE) && currentSeq->optionalDataReqd)
+  else if (stringsEqual(sectionId, "OS", TRUE) && blxSequenceRequiresOptionalData(currentSeq))
     {
       *parserState = PARSING_ORGANISM;
     }
-  else if (stringsEqual(sectionId, "GN", TRUE) && currentSeq->optionalDataReqd)
+  else if (stringsEqual(sectionId, "GN", TRUE) && blxSequenceRequiresOptionalData(currentSeq))
     {
       *parserState = PARSING_GENE_NAME;
     }
-  else if (stringsEqual(sectionId, "FT", TRUE) && currentSeq->optionalDataReqd)
+  else if (stringsEqual(sectionId, "FT", TRUE) && blxSequenceRequiresOptionalData(currentSeq))
     {
       if (tagName && tagName->len)
         {
diff --git a/blxparser.c b/blxparser.c
index 63700ad9..c02b0593 100644
--- a/blxparser.c
+++ b/blxparser.c
@@ -34,7 +34,7 @@
  * * 98-02-19  Changed MSP parsing to handle all SFS formats.
  * * 99-07-29  Added support for SFS type=HSP and GFF.
  * Created: 93-05-17
- * CVS info:   $Id: blxparser.c,v 1.38 2010-08-27 12:25:14 gb10 Exp $
+ * CVS info:   $Id: blxparser.c,v 1.39 2010-10-05 15:51:21 gb10 Exp $
  *-------------------------------------------------------------------
  */
 
@@ -903,7 +903,7 @@ static void parseEXBLXSEQBLExtended(MSP **lastMsp, MSP **mspList, BlxParserState
                       g_error("Bad description data\n");
                     }
 		}
-	      else if ((parserState == SEQBL_X_BODY || mspIsSnp(msp)) && (strstr(seq_pos, BLX_SEQUENCE_TAG)))
+	      else if ((parserState == SEQBL_X_BODY || mspIsVariation(msp)) && (strstr(seq_pos, BLX_SEQUENCE_TAG)))
 		{
 		  if (*opts == 'L')
 		    {
@@ -1782,7 +1782,7 @@ static BlxMspType getMspTypeFromScore(const int score)
     }
   else if (score == -3)
     {
-      result = BLXMSP_SNP;
+      result = BLXMSP_VARIATION;
     }
 
   return result;  
diff --git a/blxview.c b/blxview.c
index 9799e137..86fce988 100644
--- a/blxview.c
+++ b/blxview.c
@@ -88,7 +88,7 @@
 01-10-05	Added getsseqsPfetch to fetch all missing sseqs in one go via socket connection to pfetch [RD]
 
  * Created: Thu Feb 20 10:27:39 1993 (esr)
- * CVS info:   $Id: blxview.c,v 1.73 2010-10-05 15:23:02 gb10 Exp $
+ * CVS info:   $Id: blxview.c,v 1.74 2010-10-05 15:51:21 gb10 Exp $
  *-------------------------------------------------------------------
  */
 
@@ -811,6 +811,28 @@ static gint compareFuncMspPos(gconstpointer a, gconstpointer b)
   return result;
 }
 
+
+static BlxSequenceType getBlxSequenceTypeForMsp(const MSP const *msp)
+{
+  BlxSequenceType result = BLXSEQUENCE_UNSET;
+  
+  if (mspIsBlastMatch(msp))
+    {
+      result = BLXSEQUENCE_MATCH;
+    }
+  else if (mspIsExon(msp) || mspIsIntron(msp))
+    {
+      result = BLXSEQUENCE_TRANSCRIPT;
+    }
+  else if (mspIsVariation(msp))
+    {
+      result = BLXSEQUENCE_VARIATION;
+    }
+
+  return result;
+}
+
+
 /* Add or create a BlxSequence struct, creating the BlxSequence if one does not
  * already exist for the MSP's sequence name. Seperate BlxSequence structs are created
  * for the forward and reverse strands of the same sequence. The passed-in sequence 
@@ -839,12 +861,7 @@ BlxSequence* addBlxSequence(const char *name, const char *idTag, BlxStrand stran
           /* Create a new BlxSequence, and take ownership of the passed in sequence (if any) */
           blxSeq = createEmptyBlxSequence(seqName, idTag, NULL);
           *seqList = g_list_prepend(*seqList, blxSeq);
-          
           blxSeq->strand = strand;
-          
-          /* Set whether the sequence data is required by any of this sequence's MSPs */
-          blxSeq->sequenceReqd |= mspIsBlastMatch(msp) || mspIsVariation(msp);
-          blxSeq->optionalDataReqd |= mspIsBlastMatch(msp);
         }
       
       if (seqName && !blxSeq->fullName)
@@ -859,6 +876,15 @@ BlxSequence* addBlxSequence(const char *name, const char *idTag, BlxStrand stran
           /* Add the MSP to the BlxSequence's list. Keep it sorted by position. */
           blxSeq->mspList = g_list_insert_sorted(blxSeq->mspList, msp, compareFuncMspPos);
           msp->sSequence = blxSeq;
+          
+          if (blxSeq->type == BLXSEQUENCE_UNSET)
+            {
+              blxSeq->type = getBlxSequenceTypeForMsp(msp);
+            }
+          else if (blxSeq->type != getBlxSequenceTypeForMsp(msp))
+            {
+              g_warning("Adding MSP of type %d to parent of type %d (expected parent type to be %d)\n", msp->type, blxSeq->type, getBlxSequenceTypeForMsp(msp));
+            }
         }
       
       /* Add the sequence data */
@@ -1366,7 +1392,7 @@ void addBlxSequenceData(BlxSequence *blxSeq, char *sequence, GError **error)
   
   gboolean sequenceUsed = FALSE;
   
-  if (blxSeq && blxSeq->sequenceReqd)
+  if (blxSeq && blxSequenceRequiresSeqData(blxSeq))
     {
       if (!blxSeq->sequence)
         {
@@ -1644,31 +1670,23 @@ GList* getSeqsToPopulate(GList *inputList, const gboolean getSequenceData, const
     {
       BlxSequence *blxSeq = (BlxSequence*)(inputItem->data);
       
-      gboolean getSeq = FALSE;
+      /* Check if sequence data was requested and is not already set. */
+      gboolean getSeq = (blxSequenceRequiresSeqData(blxSeq) && getSequenceData && blxSeq->sequence == NULL);
+
+      /* Check if optional data was requested and is not already set. We can assume that
+       * if any of the data fields is set then the parsing has been done for all of them
+       * (and any remaining empty fields just don't have that data available) */
+      getSeq |= (blxSequenceRequiresOptionalData(blxSeq) &&
+                 getOptionalData && 
+                 !blxSeq->organism &&
+                 !blxSeq->geneName &&
+                 !blxSeq->tissueType &&
+                 !blxSeq->strain);
       
-      /* We only want to get data for blast matches, which have the sequenceReqd flag set. */
-      if (blxSeq->sequenceReqd)
+      if (getSeq)
         {
-          /* Check if sequence data was requested and is not already set. */
-          getSeq = (getSequenceData && blxSeq->sequence == NULL);
-
-          if (blxSeq->optionalDataReqd)
-            {
-              /* Check if optional data was requested and is not already set. We can assume that
-               * if any of the data fields is set then the parsing has been done for all of them
-               * (and any remaining empty fields just don't have that data available) */
-              getSeq |= (getOptionalData && 
-                         !blxSeq->organism &&
-                         !blxSeq->geneName &&
-                         !blxSeq->tissueType &&
-                         !blxSeq->strain);
-            }
-          
-          if (getSeq)
-            {
-              resultList = g_list_prepend(resultList, blxSeq);
-            }
-          }
+          resultList = g_list_prepend(resultList, blxSeq);
+        }
     }
 
   return resultList;
diff --git a/blxview.h b/blxview.h
index 1b0029e1..97b7ae45 100644
--- a/blxview.h
+++ b/blxview.h
@@ -27,7 +27,7 @@
  * Last edited: Aug 21 13:57 2009 (edgrif)
  * * Aug 26 16:57 1999 (fw): added this header
  * Created: Thu Aug 26 16:57:17 1999 (fw)
- * CVS info:   $Id: blxview.h,v 1.37 2010-10-05 15:23:02 gb10 Exp $
+ * CVS info:   $Id: blxview.h,v 1.38 2010-10-05 15:51:21 gb10 Exp $
  *-------------------------------------------------------------------
  */
 #ifndef DEF_BLXVIEW_H
@@ -86,9 +86,23 @@ typedef enum
   } BlxStrand ;
 
 
-/* Structure that contains information about a sequence */
+/* Type definition for BlxSequences */
+typedef enum
+{
+  BLXSEQUENCE_UNSET,
+  BLXSEQUENCE_TRANSCRIPT,         /* transcript (i.e. collection of exons and introns) */
+  BLXSEQUENCE_MATCH,              /* match sequence (i.e. collection of matches) */
+  BLXSEQUENCE_VARIATION           /* variation (i.e. insertion, deletion or substitution) */
+} BlxSequenceType;
+
+
+/* Structure that contains information about a "sequence" (bit of a misnomer because it also includes
+ * transcripts etc. for which it doesn't actually contain any sequence data. Really this is just a
+ * parent object for collections of MSPs). */
 typedef struct _BlxSequence
 {
+  BlxSequenceType type;            /* What type of collection of MSPs this is */
+
   char *idTag;			   /* Unique identifier e.g. from ID tag in GFF files */
 
   char *fullName;                  /* full name of the sequence and variant, including prefix characters, e.g. EM:AV274505.2 */
@@ -102,8 +116,6 @@ typedef struct _BlxSequence
   
   BlxStrand strand;                /* which strand of the sequence this is */
   GString *sequence;               /* the actual sequence data */
-  gboolean sequenceReqd;           /* whether the sequence data is required (e.g. it is not needed for exons/introns etc.) */
-  gboolean optionalDataReqd;       /* whether the optional data is required (e.g. it is not applicable to exons/snps etc.) */
   IntRange qRange;		   /* the extent of the sequence on the ref sequence */ 
   
   GList *mspList;                  /* list of MSPs from this sequence */
diff --git a/utilities.c b/utilities.c
index 80f09fd2..ddc575ea 100644
--- a/utilities.c
+++ b/utilities.c
@@ -1794,7 +1794,8 @@ BlxSequence* createEmptyBlxSequence(const char *fullName, const char *idTag, GEr
     }
   
   BlxSequence *seq = g_malloc(sizeof(BlxSequence));
-  
+
+  seq->type = BLXSEQUENCE_UNSET;
   seq->idTag = idTag ? g_strdup(idTag) : NULL;
 
   seq->fullName = NULL;
@@ -1804,8 +1805,6 @@ BlxSequence* createEmptyBlxSequence(const char *fullName, const char *idTag, GEr
 
   seq->mspList = NULL;
   seq->sequence = NULL;
-  seq->sequenceReqd = FALSE;
-  seq->optionalDataReqd = FALSE;
   
   seq->organism = NULL;
   seq->geneName = NULL;
@@ -1816,6 +1815,19 @@ BlxSequence* createEmptyBlxSequence(const char *fullName, const char *idTag, GEr
 }
 
 
+/* Returns true if the given BlxSequence requires sequence data to be set. */
+gboolean blxSequenceRequiresSeqData(const BlxSequence *blxSeq)
+{
+  return (blxSeq->type == BLXSEQUENCE_MATCH || blxSeq->type == BLXSEQUENCE_VARIATION);
+}
+
+/* Returns true if the given BlxSequence uses optional data. */
+gboolean blxSequenceRequiresOptionalData(const BlxSequence *blxSeq)
+{
+  return (blxSeq->type == BLXSEQUENCE_MATCH);
+}
+
+
 /* Round to nearest int (needed because there is no  round() function in ISO C90) */
 int roundNearest(const double val)
 {
diff --git a/utilities.h b/utilities.h
index 04d25cac..41d28f1c 100644
--- a/utilities.h
+++ b/utilities.h
@@ -221,6 +221,8 @@ BlxStrand	      mspGetMatchStrand(const MSP const *msp);
 const char*           mspGetMatchSeq(const MSP const *msp);
 const char*	      mspGetSName(const MSP *msp);
 char*		      blxSequenceGetSummaryInfo(const BlxSequence const *blxSeq);
+gboolean              blxSequenceRequiresSeqData(const BlxSequence *blxSeq);
+gboolean              blxSequenceRequiresOptionalData(const BlxSequence *blxSeq);
 char*                 mspGetExonTranscriptName(const MSP *msp);
 const IntRange const* mspGetRefCoords(const MSP const *msp);
 const IntRange const* mspGetMatchCoords(const MSP const *msp);
-- 
GitLab