Skip to content
Snippets Groups Projects
Commit 9e37d80d authored by rnc's avatar rnc
Browse files

added homology gap handling for calling blixem

parent ce13e063
No related branches found
No related tags found
No related merge requests found
......@@ -25,9 +25,9 @@
* Description: Data structures describing a genetic feature.
*
* HISTORY:
* Last edited: May 31 20:48 2005 (rds)
* Last edited: Jun 23 09:37 2005 (rnc)
* Created: Fri Jun 11 08:37:19 2004 (edgrif)
* CVS info: $Id: zmapFeature.h,v 1.27 2005-06-01 11:17:26 rds Exp $
* CVS info: $Id: zmapFeature.h,v 1.28 2005-06-24 12:07:12 rnc Exp $
*-------------------------------------------------------------------
*/
#ifndef ZMAP_FEATURE_H
......@@ -395,7 +395,8 @@ gboolean zmapFeatureAugmentData(ZMapFeature feature, char *feature_name_id, char
char *sequence, ZMapFeatureType feature_type,
int start, int end, double score, ZMapStrand strand,
ZMapPhase phase,
ZMapHomolType homol_type_out, int start_out, int end_out) ;
ZMapHomolType homol_type_out, int start_out, int end_out,
GArray *gaps) ;
GQuark zMapFeatureGetStyleQuark(ZMapFeature feature) ;
ZMapFeatureTypeStyle zMapFeatureGetStyle(ZMapFeature feature) ;
void zmapFeatureDestroy(ZMapFeature feature) ;
......
......@@ -27,9 +27,9 @@
*
* Exported functions: See zmapView_P.h
* HISTORY:
* Last edited: May 25 11:38 2005 (edgrif)
* Last edited: Jun 23 20:02 2005 (rnc)
* Created: Fri Jul 16 13:05:58 2004 (edgrif)
* CVS info: $Id: zmapFeature.c,v 1.14 2005-05-27 15:14:02 edgrif Exp $
* CVS info: $Id: zmapFeature.c,v 1.15 2005-06-24 12:08:30 rnc Exp $
*-------------------------------------------------------------------
*/
......@@ -122,6 +122,7 @@ ZMapFeature zmapFeatureCreateEmpty(void)
}
/*!
* Adds data to a feature which may be "NULL" or may already have partial features,
* e.g. transcript that does not yet have all its exons.
......@@ -133,7 +134,8 @@ gboolean zmapFeatureAugmentData(ZMapFeature feature, char *feature_name_id, char
char *sequence, ZMapFeatureType feature_type,
int start, int end, double score, ZMapStrand strand,
ZMapPhase phase,
ZMapHomolType homol_type, int query_start, int query_end)
ZMapHomolType homol_type, int query_start, int query_end,
GArray *gaps)
{
gboolean result = FALSE ;
......@@ -216,14 +218,11 @@ gboolean zmapFeatureAugmentData(ZMapFeature feature, char *feature_name_id, char
}
else if (feature_type == ZMAPFEATURE_HOMOL)
{
/* Note that we do not put the extra "align" information for gapped alignments into the
* GFF files from acedb so no need to worry about it just now...... */
feature->feature.homol.type = homol_type ;
feature->feature.homol.y1 = query_start ;
feature->feature.homol.y2 = query_end ;
feature->feature.homol.score = score ;
feature->feature.homol.align = NULL ; /* not supported currently.... */
feature->feature.homol.align = gaps;
}
return result ;
......@@ -248,6 +247,11 @@ void zmapFeatureDestroy(ZMapFeature feature)
if (feature->feature.transcript.introns)
g_array_free(feature->feature.transcript.introns, TRUE) ;
}
else if (feature->type == ZMAPFEATURE_HOMOL)
{
if (feature->feature.homol.align)
g_array_free(feature->feature.homol.align, TRUE) ;
}
g_free(feature) ;
......
......@@ -26,9 +26,9 @@
*
* Exported functions: See ZMap/zmapGFF.h
* HISTORY:
* Last edited: May 27 14:56 2005 (edgrif)
* Last edited: Jun 23 20:00 2005 (rnc)
* Created: Fri May 28 14:25:12 2004 (edgrif)
* CVS info: $Id: zmapGFF2parser.c,v 1.23 2005-05-27 15:15:48 edgrif Exp $
* CVS info: $Id: zmapGFF2parser.c,v 1.24 2005-06-24 12:09:16 rnc Exp $
*-------------------------------------------------------------------
*/
......@@ -54,7 +54,7 @@ static gboolean parseBodyLine(ZMapGFFParser parser, char *line) ;
static gboolean makeNewFeature(ZMapGFFParser parser, char *sequence, char *source,
ZMapFeatureType feature_type,
int start, int end, double score, ZMapStrand strand,
ZMapPhase phase, char *attributes) ;
ZMapPhase phase, char *attributes, GArray *gaps) ;
static gboolean getFeatureName(char *sequence, char *attributes, ZMapFeatureType feature_type,
ZMapStrand strand, int start, int end, int query_start, int query_end,
char **feature_name, char **feature_name_id) ;
......@@ -68,6 +68,7 @@ static gboolean formatPhase(char *phase_str, ZMapPhase *phase_out) ;
static void getFeatureArray(GQuark key_id, gpointer data, gpointer user_data) ;
static void destroyFeatureArray(gpointer data) ;
static void loadGaps(char *currentPos, GArray *gaps);
static void printSource(GQuark key_id, gpointer data, gpointer user_data) ;
......@@ -145,6 +146,7 @@ gboolean zMapGFFParseLine(ZMapGFFParser parser, char *line)
{
gboolean result = FALSE ;
parser->line_count++ ;
/* Look for the header information. */
......@@ -191,6 +193,7 @@ gboolean zMapGFFParseLine(ZMapGFFParser parser, char *line)
}
}
return result ;
}
......@@ -549,14 +552,38 @@ static gboolean parseBodyLine(ZMapGFFParser parser, char *line)
int start = 0, end = 0 ;
double score = 0 ;
char *format_str = "%50s%50s%50s%d%d%50s%50s%50s %1000[^#] %1000c" ;
int fields ;
char *format_str_gaps = "%50s%50s%50s%d%d%50s%50s%50s %n" ;
int fields, charsRead, attsLen ;
char *attsPos, *gapsPos;
GArray *gaps = g_array_new(FALSE, FALSE, sizeof(ZMapAlignBlockStruct));
if (((fields = sscanf(line, format_str,
&sequence[0], &source[0], &feature_type[0],
&start, &end, &score_str[0], &strand_str[0], &phase_str[0],
&attributes[0], &comments[0]))
< GFF_MANDATORY_FIELDS)
gapsPos = strstr(line, " Gaps ");
if (gapsPos == NULL)
{
fields = sscanf(line, format_str,
&sequence[0], &source[0], &feature_type[0],
&start, &end, &score_str[0], &strand_str[0], &phase_str[0],
&attributes[0], &comments[0]);
}
else
{
fields = sscanf(line, format_str_gaps,
&sequence[0], &source[0], &feature_type[0],
&start, &end, &score_str[0], &strand_str[0], &phase_str[0],
&charsRead);
/* The hard bit here is to distinguish the attributes field from any following
* gaps pairs, so for now I'm just saying copy from where the sscanf ended
* up to the Gaps tag, then go and do the gaps. */
attsPos = line + charsRead;
attsLen = gapsPos - attsPos;
strncpy(attributes, attsPos, attsLen);
loadGaps(gapsPos, gaps);
}
if (fields < GFF_MANDATORY_FIELDS
|| (g_ascii_strcasecmp(source, ".") == 0)
|| (g_ascii_strcasecmp(feature_type, ".") == 0))
{
......@@ -627,18 +654,54 @@ static gboolean parseBodyLine(ZMapGFFParser parser, char *line)
{
result = makeNewFeature(parser, sequence, source, type,
start, end, score, strand, phase,
attributes) ;
attributes, gaps) ;
}
if (source_lower)
g_free(source_lower) ;
}
return result ;
}
/* This reads any gaps which are present on the gff line.
* They are preceded by a Gaps tag, and are presented as
* space-delimited groups of 4, consecutive groups being
* comma-delimited. gapsPos is wherever we are in the gff
* and is set to NULL when strstr can't find another comma.
* fields must be 4 for a gap so either way we drop out
* of the loop at the end. */
static void loadGaps(char *gapsPos, GArray *gaps)
{
ZMapAlignBlockStruct gap;
char *gaps_format_str = "%d%d%d%d," ;
int fields, i;
gboolean status = TRUE;
gapsPos += 7; /* skip over Gaps tag */
while (status == TRUE)
{
fields = sscanf(gapsPos, gaps_format_str, &gap.q1, &gap.q2, &gap.t1, &gap.t2);
if (fields == 4)
{
gaps = g_array_append_val(gaps, gap);
if ((gapsPos = strstr(gapsPos, ",")) != NULL)
gapsPos++;
else
status = FALSE; /* no more commas means we're at the end */
}
else
status = FALSE; /* anything other than 4 is not a gap */
}
return;
}
static void printSource(GQuark key_id, gpointer data, gpointer user_data)
{
......@@ -654,7 +717,7 @@ static void printSource(GQuark key_id, gpointer data, gpointer user_data)
static gboolean makeNewFeature(ZMapGFFParser parser, char *sequence, char *source,
ZMapFeatureType feature_type,
int start, int end, double score, ZMapStrand strand,
ZMapPhase phase, char *attributes)
ZMapPhase phase, char *attributes, GArray *gaps)
{
gboolean result = FALSE ;
char *feature_name_id = NULL, *feature_name = NULL ;
......@@ -755,12 +818,10 @@ static gboolean makeNewFeature(ZMapGFFParser parser, char *sequence, char *sourc
/* we need to give it proper unique style name... */
style_id = zMapStyleCreateID(source) ;
#endif /* ED_G_NEVER_INCLUDE_THIS_CODE */
result = zmapFeatureAugmentData(feature, feature_name_id, feature_name, sequence,
feature_type, start, end, score, strand,
phase, homol_type, query_start, query_end) ;
phase, homol_type, query_start, query_end, gaps) ;
g_free(feature_name) ;
g_free(feature_name_id) ;
......@@ -863,7 +924,7 @@ static gboolean getFeatureName(char *sequence, char *attributes, ZMapFeatureType
*
* Format of similarity/homol attribute section is:
*
* Target "class:obj_name" start end
* Target "class:obj_name" start end [Gaps "Qstart Qend Tstart Tend, ..."]
*
* Format string extracts class:obj_name and start and end.
*
......@@ -1177,6 +1238,7 @@ gboolean formatPhase(char *phase_str, ZMapPhase *phase_out)
}
/* This is a GDataForeachFunc() and is called for each element of a GData list as a result
* of a call to zmapGFFGetFeatures(). The function adds the feature array returned
* in the GData element to the GArray in user_data. */
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment