From 95538e7220fb434d8e05d37dec38d67a14676daa Mon Sep 17 00:00:00 2001
From: edgrif <edgrif>
Date: Wed, 18 Aug 2010 11:31:41 +0000
Subject: [PATCH] add function to return cds/phase of a given exon.

---
 src/include/ZMap/zmapFeature.h     |   9 +-
 src/zmapFeature/zmapFeatureUtils.c | 304 ++++++++++++++++++++++-------
 2 files changed, 241 insertions(+), 72 deletions(-)

diff --git a/src/include/ZMap/zmapFeature.h b/src/include/ZMap/zmapFeature.h
index f16a85996..26b42c99e 100755
--- a/src/include/ZMap/zmapFeature.h
+++ b/src/include/ZMap/zmapFeature.h
@@ -25,9 +25,9 @@
  * Description: Data structures describing a sequence feature.
  *
  * HISTORY:
- * Last edited: Aug 17 08:35 2010 (edgrif)
+ * Last edited: Aug 18 12:30 2010 (edgrif)
  * Created: Fri Jun 11 08:37:19 2004 (edgrif)
- * CVS info:   $Id: zmapFeature.h,v 1.179 2010-08-18 09:16:53 edgrif Exp $
+ * CVS info:   $Id: zmapFeature.h,v 1.180 2010-08-18 11:31:41 edgrif Exp $
  *-------------------------------------------------------------------
  */
 #ifndef ZMAP_FEATURE_H
@@ -995,6 +995,11 @@ ZMapFrame zMapFeatureSubPartFrame(ZMapFeature feature, int coord);
 gboolean zMapFeatureWorld2CDS(ZMapFeature feature,
 			      int exon1, int exon2,
 			      int *cds1, int *cds2);
+gboolean zMapFeatureExon2CDS(ZMapFeature feature,
+			     int exon_start, int exon_end,
+			     int *exon_cds_start, int *exon_cds_end, int *phase_out) ;
+
+
 
 /* ================================================================= */
 /* functions in zmapFeatureFormatInput.c */
diff --git a/src/zmapFeature/zmapFeatureUtils.c b/src/zmapFeature/zmapFeatureUtils.c
index b7bdc2e8f..9eb38e47c 100755
--- a/src/zmapFeature/zmapFeatureUtils.c
+++ b/src/zmapFeature/zmapFeatureUtils.c
@@ -27,9 +27,9 @@
  *
  * Exported functions: See ZMap/zmapFeature.h
  * HISTORY:
- * Last edited: Sep  2 09:42 2009 (edgrif)
+ * Last edited: Aug 16 14:39 2010 (edgrif)
  * Created: Tue Nov 2 2004 (rnc)
- * CVS info:   $Id: zmapFeatureUtils.c,v 1.72 2010-06-14 15:40:13 mh17 Exp $
+ * CVS info:   $Id: zmapFeatureUtils.c,v 1.73 2010-08-18 11:31:41 edgrif Exp $
  *-------------------------------------------------------------------
  */
 
@@ -68,6 +68,9 @@ static void addTypeQuark(gpointer style_id, gpointer data, gpointer user_data) ;
 static void map_parent2child(gpointer exon_data, gpointer user_data);
 static int sortGapsByTarget(gconstpointer a, gconstpointer b) ;
 
+static int findExon(ZMapFeature feature, int exon_start, int exon_end) ;
+static gboolean calcExonPhase(ZMapFeature feature, int exon_index,
+			      int *exon_cds_start, int *exon_cds_end, int *phase_out) ;
 
 /*!
  * Function to do some validity checking on a ZMapFeatureAny struct. Always more you
@@ -740,6 +743,71 @@ gboolean zMapFeatureGetFeatureListExtent(GList *feature_list, int *start_out, in
   return done;
 }
 
+
+
+GArray *zMapFeatureWorld2TranscriptArray(ZMapFeature feature)
+{
+  GArray *t_array = NULL, *exon_array;
+
+  if(ZMAPFEATURE_HAS_EXONS(feature))
+    {
+      ZMapSpanStruct span;
+      int i;
+
+      t_array    = g_array_sized_new(FALSE, FALSE, sizeof(ZMapSpanStruct), 128);
+      exon_array = feature->feature.transcript.exons;
+
+      for(i = 0; i < exon_array->len; i++)
+	{
+	  span = g_array_index(exon_array, ZMapSpanStruct, i);
+	  zMapFeatureWorld2Transcript(feature, span.x1, span.x2, &(span.x1), &(span.x2));
+	  t_array = g_array_append_val(t_array, span);
+	}
+    }
+
+  return t_array;
+}
+
+
+gboolean zMapFeatureWorld2Transcript(ZMapFeature feature,
+				     int w1, int w2,
+				     int *t1, int *t2)
+{
+  gboolean is_transcript = FALSE;
+
+  if(feature->type == ZMAPSTYLE_MODE_TRANSCRIPT)
+    {
+      if(feature->x1 > w2 || feature->x2 < w1)
+	is_transcript = FALSE;
+      else
+	{
+	  SimpleParent2ChildDataStruct parent_data = { NULL };
+	  ZMapMapBlockStruct map_data = { 0 };
+	  map_data.p1 = w1;
+	  map_data.p2 = w2;
+
+	  parent_data.map         = &map_data;
+	  parent_data.limit_start = feature->x1;
+	  parent_data.limit_end   = feature->x2;
+	  parent_data.counter     = 0;
+
+	  zMapFeatureTranscriptExonForeach(feature, map_parent2child,
+					   &parent_data);
+	  if(t1)
+	    *t1 = map_data.c1;
+	  if(t2)
+	    *t2 = map_data.c2;
+
+	  is_transcript = TRUE;
+	}
+    }
+  else
+    is_transcript = FALSE;
+
+  return is_transcript;
+}
+
+
 void zMapFeatureTranscriptExonForeach(ZMapFeature feature, GFunc function, gpointer user_data)
 {
   GArray *exons;
@@ -785,64 +853,56 @@ void zMapFeatureTranscriptExonForeach(ZMapFeature feature, GFunc function, gpoin
   return ;
 }
 
-gboolean zMapFeatureWorld2Transcript(ZMapFeature feature,
-				     int w1, int w2,
-				     int *t1, int *t2)
+
+
+
+GArray *zMapFeatureWorld2CDSArray(ZMapFeature feature)
 {
-  gboolean is_transcript = FALSE;
+  GArray *cds_array = NULL ;
 
-  if(feature->type == ZMAPSTYLE_MODE_TRANSCRIPT)
+  if (ZMAPFEATURE_HAS_CDS(feature) && ZMAPFEATURE_HAS_EXONS(feature))
     {
-      if(feature->x1 > w2 || feature->x2 < w1)
-	is_transcript = FALSE;
-      else
-	{
-	  SimpleParent2ChildDataStruct parent_data = { NULL };
-	  ZMapMapBlockStruct map_data = { 0 };
-	  map_data.p1 = w1;
-	  map_data.p2 = w2;
-
-	  parent_data.map         = &map_data;
-	  parent_data.limit_start = feature->x1;
-	  parent_data.limit_end   = feature->x2;
-	  parent_data.counter     = 0;
+      GArray *exon_array ;
+      ZMapSpanStruct span;
+      int i;
 
-	  zMapFeatureTranscriptExonForeach(feature, map_parent2child,
-					   &parent_data);
-	  if(t1)
-	    *t1 = map_data.c1;
-	  if(t2)
-	    *t2 = map_data.c2;
+      cds_array  = g_array_sized_new(FALSE, FALSE, sizeof(ZMapSpanStruct), 128);
+      exon_array = feature->feature.transcript.exons;
 
-	  is_transcript = TRUE;
+      for(i = 0; i < exon_array->len; i++)
+	{
+	  span = g_array_index(exon_array, ZMapSpanStruct, i);
+	  zMapFeatureWorld2CDS(feature, span.x1, span.x2, &(span.x1), &(span.x2));
+	  cds_array = g_array_append_val(cds_array, span);
 	}
     }
-  else
-    is_transcript = FALSE;
 
-  return is_transcript;
+  return cds_array ;
 }
 
+
+
 gboolean zMapFeatureWorld2CDS(ZMapFeature feature,
-			      int exon1, int exon2,
-			      int *cds1, int *cds2)
+			      int exon1, int exon2, int *cds1, int *cds2)
 {
   gboolean cds_exon = FALSE;
 
-  if(feature->type == ZMAPSTYLE_MODE_TRANSCRIPT &&
-     feature->feature.transcript.flags.cds)
+  if (feature->type == ZMAPSTYLE_MODE_TRANSCRIPT && feature->feature.transcript.flags.cds)
     {
       int cds_start, cds_end;
 
       cds_start = feature->feature.transcript.cds_start;
       cds_end   = feature->feature.transcript.cds_end;
 
-      if(cds_start > exon2 || cds_end < exon1)
-	cds_exon = FALSE;
+      if (cds_start > exon2 || cds_end < exon1)
+	{
+	  cds_exon = FALSE;
+	}
       else
 	{
 	  SimpleParent2ChildDataStruct exon_cds_data = { NULL };
 	  ZMapMapBlockStruct map_data = { 0 };
+
 	  map_data.p1 = exon1;
 	  map_data.p2 = exon2;
 
@@ -853,64 +913,64 @@ gboolean zMapFeatureWorld2CDS(ZMapFeature feature,
 
 	  zMapFeatureTranscriptExonForeach(feature, map_parent2child,
 					   &exon_cds_data);
-	  if(cds1)
+	  if (cds1)
 	    *cds1 = map_data.c1;
-	  if(cds2)
+	  if (cds2)
 	    *cds2 = map_data.c2;
 
 	  cds_exon = TRUE;
 	}
     }
-  else
-    cds_exon = FALSE;
 
   return cds_exon;
 }
 
-GArray *zMapFeatureWorld2CDSArray(ZMapFeature feature)
+
+/* Given a transcript feature and the start/end of an exon within that transcript feature,
+ * returns TRUE and gives the coords (in reference sequence space) of the CDS section
+ * of the exon and also it's phase. Returns FALSE if the exon is not in the transcript
+ * or if the exon has no cds section. */
+gboolean zMapFeatureExon2CDS(ZMapFeature feature,
+			     int exon_start, int exon_end, int *exon_cds_start, int *exon_cds_end, int *phase_out)
 {
-  GArray *cds_array = NULL, *exon_array;
+  gboolean is_cds_exon = FALSE ;
+  int exon_index ;
 
-  if(ZMAPFEATURE_HAS_CDS(feature) && ZMAPFEATURE_HAS_EXONS(feature))
+  if (feature->type == ZMAPSTYLE_MODE_TRANSCRIPT && feature->feature.transcript.flags.cds
+      && (exon_index = findExon(feature, exon_start, exon_end)) > -1)
     {
-      ZMapSpanStruct span;
-      int i;
+      int cds_start, cds_end ;
 
-      cds_array  = g_array_sized_new(FALSE, FALSE, sizeof(ZMapSpanStruct), 128);
-      exon_array = feature->feature.transcript.exons;
+      cds_start = feature->feature.transcript.cds_start ;
+      cds_end = feature->feature.transcript.cds_end ;
 
-      for(i = 0; i < exon_array->len; i++)
+      if (cds_start > exon_end || cds_end < exon_start)
 	{
-	  span = g_array_index(exon_array, ZMapSpanStruct, i);
-	  zMapFeatureWorld2CDS(feature, span.x1, span.x2, &(span.x1), &(span.x2));
-	  cds_array = g_array_append_val(cds_array, span);
+	  ;
+	}
+      else
+	{
+	  /* Exon has a cds section so calculate it and find the exons phase. */
+	  int start, end, phase ;
+
+	  if (calcExonPhase(feature, exon_index, &start, &end, &phase))
+	    {
+	      *exon_cds_start = start ;
+	      *exon_cds_end = end ;
+	      *phase_out = phase ;
+
+	      is_cds_exon = TRUE ;
+	    }
 	}
     }
-  return cds_array;
+
+  return is_cds_exon ;
 }
 
-GArray *zMapFeatureWorld2TranscriptArray(ZMapFeature feature)
-{
-  GArray *t_array = NULL, *exon_array;
 
-  if(ZMAPFEATURE_HAS_EXONS(feature))
-    {
-      ZMapSpanStruct span;
-      int i;
 
-      t_array    = g_array_sized_new(FALSE, FALSE, sizeof(ZMapSpanStruct), 128);
-      exon_array = feature->feature.transcript.exons;
 
-      for(i = 0; i < exon_array->len; i++)
-	{
-	  span = g_array_index(exon_array, ZMapSpanStruct, i);
-	  zMapFeatureWorld2Transcript(feature, span.x1, span.x2, &(span.x1), &(span.x2));
-	  t_array = g_array_append_val(t_array, span);
-	}
-    }
 
-  return t_array;
-}
 
 ZMapFrame zMapFeatureFrame(ZMapFeature feature)
 {
@@ -1132,3 +1192,107 @@ static int sortGapsByTarget(gconstpointer a, gconstpointer b)
   return (alignA->t1 == alignB->t1 ? 0 : (alignA->t1 > alignB->t1 ? 1 : -1));
 }
 
+
+/* Find an exon in a features list given it's start/end coords, returns -1
+ * if exon not found, otherwise returns index of exon in exon array. */
+static int findExon(ZMapFeature feature, int exon_start, int exon_end)
+{
+  int exon_index = -1 ;
+  GArray *exons ;
+  int i ;
+  
+  exons = feature->feature.transcript.exons ;
+
+  for (i = 0 ; i < exons->len ; i++)
+    {
+      ZMapSpan next_exon ;
+
+      next_exon = &(g_array_index(exons, ZMapSpanStruct, i)) ;
+
+      if (next_exon->x1 == exon_start && next_exon->x2 == exon_end)
+	{
+	  exon_index = i ;
+	  break ;
+	}
+    }
+
+  return exon_index ;
+}
+
+
+/* Returns the coords (in reference coords) of the cds section of the given exon
+ * and also it's phase. */
+static gboolean calcExonPhase(ZMapFeature feature, int exon_index,
+			      int *exon_cds_start_out, int *exon_cds_end_out, int *phase_out)
+{
+  gboolean result = FALSE ;
+  int cds_start, cds_end ;
+  GArray *exons ;
+  int i ;
+  int cds_bases ;
+
+  cds_start = feature->feature.transcript.cds_start ;
+  cds_end = feature->feature.transcript.cds_end ;
+
+  exons = feature->feature.transcript.exons ;
+  cds_bases = 0 ;
+  for (i = 0 ; i < exons->len && i <= exon_index ; i++)
+    {
+      ZMapSpan next_exon ;
+
+      next_exon = &(g_array_index(exons, ZMapSpanStruct, i)) ;
+
+      if (cds_start > next_exon->x2 || cds_end < next_exon->x1)
+	{
+	  continue ;
+	}
+      else
+	{
+	  int start, end, phase ;
+	  gboolean first_exon = FALSE ;
+
+	  start = next_exon->x1 ;
+	  end = next_exon->x2 ;
+
+	  if (cds_start >= start && cds_start <= end)
+	    {
+	      start = cds_start ;
+	      first_exon = TRUE ;
+	    }
+
+	  if (cds_end >= start && cds_end <= end)
+	    end = cds_end ;
+
+	  if (i == exon_index)
+	    {
+	      if (first_exon)
+		{
+		  /* The first exon must have phase 0 unless it has been annotated as
+		   * starting with a different phase. */
+		  if (feature->feature.transcript.flags.start_not_found)
+		    phase = feature->feature.transcript.start_phase ;
+		  else
+		    phase = 0 ;
+		}
+	      else
+		{
+		  phase = cds_bases % 3 ;
+		}
+
+	      *exon_cds_start_out = start ;
+	      *exon_cds_end_out = end ;
+	      *phase_out = phase ;
+
+	      result = TRUE ;
+
+	      break ;
+	    }
+
+	  /* Keep a running count of bases so far, caculate phase of next exon from this. */
+	  cds_bases += (end - start) + 1 ;
+	}
+
+    }
+
+  return result ;
+}
-- 
GitLab