Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Z
zmap
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Wiki
Requirements
Jira
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ensembl-gh-mirror
zmap
Commits
e17ed1c5
Commit
e17ed1c5
authored
20 years ago
by
edgrif
Browse files
Options
Downloads
Patches
Plain Diff
Add SO term stuff to GFF parser code.
parent
3cf73701
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/include/ZMap/zmapGFF.h
+8
-2
8 additions, 2 deletions
src/include/ZMap/zmapGFF.h
src/zmapGFF/zmapGFF2parser.c
+189
-45
189 additions, 45 deletions
src/zmapGFF/zmapGFF2parser.c
src/zmapGFF/zmapGFF_P.h
+19
-24
19 additions, 24 deletions
src/zmapGFF/zmapGFF_P.h
with
216 additions
and
71 deletions
src/include/ZMap/zmapGFF.h
+
8
−
2
View file @
e17ed1c5
...
...
@@ -28,9 +28,9 @@
* of ZMapFeatureStruct's, one for each GFF source.
*
* HISTORY:
* Last edited: Jun 1
5
1
5:20
2004 (edgrif)
* Last edited: Jun 1
7
1
6:33
2004 (edgrif)
* Created: Sat May 29 13:18:32 2004 (edgrif)
* CVS info: $Id: zmapGFF.h,v 1.
1
2004-06-1
5
1
4:30
:2
5
edgrif Exp $
* CVS info: $Id: zmapGFF.h,v 1.
2
2004-06-1
8
1
1:03
:2
1
edgrif Exp $
*-------------------------------------------------------------------
*/
#ifndef ZMAP_GFF_H
...
...
@@ -60,10 +60,16 @@ ZMapGFFParser zMapGFFCreateParser(void) ;
gboolean
zMapGFFParseLine
(
ZMapGFFParser
parser
,
char
*
line
)
;
void
zMapGFFSetStopOnError
(
ZMapGFFParser
parser
,
gboolean
stop_on_error
)
;
void
zMapGFFSetSOCompliance
(
ZMapGFFParser
parser
,
gboolean
SO_compliant
)
;
GArray
*
zmapGFFGetFeatures
(
ZMapGFFParser
parser
)
;
int
zMapGFFGetVersion
(
ZMapGFFParser
parser
)
;
int
zMapGFFGetLineNumber
(
ZMapGFFParser
parser
)
;
GError
*
zMapGFFGetError
(
ZMapGFFParser
parser
)
;
void
zMapGFFSetFreeOnDestroy
(
ZMapGFFParser
parser
,
gboolean
free_on_destroy
)
;
...
...
This diff is collapsed.
Click to expand it.
src/zmapGFF/zmapGFF2parser.c
+
189
−
45
View file @
e17ed1c5
...
...
@@ -26,13 +26,14 @@
*
* Exported functions: See ZMap/zmapGFF.h
* HISTORY:
* Last edited: Jun 1
5
1
5:27
2004 (edgrif)
* Last edited: Jun 1
7
1
6:34
2004 (edgrif)
* Created: Fri May 28 14:25:12 2004 (edgrif)
* CVS info: $Id: zmapGFF2parser.c,v 1.
1
2004-06-1
5
1
4:37:35
edgrif Exp $
* CVS info: $Id: zmapGFF2parser.c,v 1.
2
2004-06-1
8
1
1:03:20
edgrif Exp $
*-------------------------------------------------------------------
*/
#include
<stdio.h>
#include
<strings.h>
#include
<errno.h>
#include
<glib.h>
#include
<ZMap/zmapFeature.h>
...
...
@@ -56,7 +57,7 @@ static char *getFeatureName(char *attributes) ;
static
gboolean
getHomolAttrs
(
char
*
attributes
,
ZMapHomolType
*
homol_type_out
,
int
*
start_out
,
int
*
end_out
)
;
static
gboolean
formatType
(
char
*
feature_type
,
ZMapFeatureType
*
type_out
)
;
static
gboolean
formatType
(
gboolean
SO_compliant
,
char
*
feature_type
,
ZMapFeatureType
*
type_out
)
;
static
gboolean
formatScore
(
char
*
score_str
,
gdouble
*
score_out
)
;
static
gboolean
formatStrand
(
char
*
strand_str
,
ZMapStrand
*
strand_out
)
;
static
gboolean
formatPhase
(
char
*
phase_str
,
ZMapPhase
*
phase_out
)
;
...
...
@@ -66,6 +67,8 @@ static void getFeatureArray(GQuark key_id, gpointer data, gpointer user_data) ;
void
destroyFeatureArray
(
gpointer
data
)
;
ZMapGFFParser
zMapGFFCreateParser
(
void
)
{
ZMapGFFParser
parser
;
...
...
@@ -75,10 +78,14 @@ ZMapGFFParser zMapGFFCreateParser(void)
parser
->
state
=
ZMAPGFF_PARSE_HEADER
;
parser
->
error
=
NULL
;
parser
->
error_domain
=
g_quark_from_string
(
ZMAP_GFF_ERROR
)
;
parser
->
stop_on_error
=
FALSE
;
parser
->
line_count
=
0
;
parser
->
SO_compliant
=
FALSE
;
parser
->
done_version
=
FALSE
;
parser
->
gff_version
=
0
;
parser
->
gff_version
=
-
1
;
parser
->
done_source
=
FALSE
;
parser
->
source_name
=
parser
->
source_version
=
NULL
;
...
...
@@ -117,6 +124,8 @@ gboolean zMapGFFParseLine(ZMapGFFParser parser, char *line)
{
gboolean
result
=
FALSE
;
parser
->
line_count
++
;
/* Look for the header information. */
if
(
parser
->
state
==
ZMAPGFF_PARSE_HEADER
)
{
...
...
@@ -124,8 +133,9 @@ gboolean zMapGFFParseLine(ZMapGFFParser parser, char *line)
{
/* returns FALSE for two reasons: there was a parse error, or the header section has
* finished, for the latter we need to cancel the error. */
if
(
parser
->
error
)
if
(
parser
->
error
&&
parser
->
stop_on_error
)
{
result
=
FALSE
;
parser
->
state
=
ZMAPGFF_PARSE_ERROR
;
}
else
...
...
@@ -150,7 +160,7 @@ gboolean zMapGFFParseLine(ZMapGFFParser parser, char *line)
/* THIS NEEDS WORK, ONCE I'VE SORTED OUT ALL THE PARSING STUFF...... */
if
(
!
(
result
=
parseBodyLine
(
parser
,
line
)))
{
if
(
parser
->
error
)
if
(
parser
->
error
&&
parser
->
stop_on_error
)
{
result
=
FALSE
;
parser
->
state
=
ZMAPGFF_PARSE_ERROR
;
...
...
@@ -178,6 +188,25 @@ GArray *zmapGFFGetFeatures(ZMapGFFParser parser)
}
/* If stop_on_error is TRUE the parser will not parse any further lines after it encounters
* the first error in the GFF file. */
void
zMapGFFSetStopOnError
(
ZMapGFFParser
parser
,
gboolean
stop_on_error
)
{
parser
->
stop_on_error
=
stop_on_error
;
return
;
}
/* If SO_compliant is TRUE the parser will only accept SO terms for feature types. */
void
zMapGFFSetSOCompliance
(
ZMapGFFParser
parser
,
gboolean
SO_compliant
)
{
parser
->
SO_compliant
=
SO_compliant
;
return
;
}
/* Return the GFF version which the parser is using. This is determined from the GFF
* input stream from the header comments. */
int
zMapGFFGetVersion
(
ZMapGFFParser
parser
)
...
...
@@ -186,6 +215,13 @@ int zMapGFFGetVersion(ZMapGFFParser parser)
}
/* Return line number of last line processed (this is the same as the number of lines processed. */
int
zMapGFFGetLineNumber
(
ZMapGFFParser
parser
)
{
return
parser
->
line_count
;
}
/* If a zMapGFFNNN function has failed then this function returns a description of the error
* in the glib GError format. If there has been no error then NULL is returned. */
GError
*
zMapGFFGetError
(
ZMapGFFParser
parser
)
...
...
@@ -287,7 +323,8 @@ static gboolean parseHeaderLine(ZMapGFFParser parser, char *line)
if
((
fields
=
sscanf
(
line
,
format_str
,
&
version
))
!=
1
)
{
parser
->
error
=
g_error_new
(
parser
->
error_domain
,
ZMAP_GFF_ERROR_HEADER
,
"Bad ##gff-version line:
\"
%s
\"
"
,
line
)
;
"Bad ##gff-version line %d:
\"
%s
\"
"
,
parser
->
line_count
,
line
)
;
result
=
FALSE
;
}
else
...
...
@@ -307,7 +344,8 @@ static gboolean parseHeaderLine(ZMapGFFParser parser, char *line)
if
((
fields
=
sscanf
(
line
,
format_str
,
&
program
[
0
],
&
version
[
0
]))
!=
2
)
{
parser
->
error
=
g_error_new
(
parser
->
error_domain
,
ZMAP_GFF_ERROR_HEADER
,
"Bad ##source-version line:
\"
%s
\"
"
,
line
)
;
"Bad ##source-version line %d:
\"
%s
\"
"
,
parser
->
line_count
,
line
)
;
result
=
FALSE
;
}
else
...
...
@@ -329,7 +367,8 @@ static gboolean parseHeaderLine(ZMapGFFParser parser, char *line)
if
((
fields
=
sscanf
(
line
,
format_str
,
&
sequence_name
[
0
],
&
start
,
&
end
))
!=
3
)
{
parser
->
error
=
g_error_new
(
parser
->
error_domain
,
ZMAP_GFF_ERROR_HEADER
,
"Bad ##sequence-region line:
\"
%s
\"
"
,
line
)
;
"Bad ##sequence-region line %d:
\"
%s
\"
"
,
parser
->
line_count
,
line
)
;
result
=
FALSE
;
}
else
...
...
@@ -361,7 +400,7 @@ static gboolean parseHeaderLine(ZMapGFFParser parser, char *line)
* a number. The only tricky bit is to get at the attributes and comments which have
* white space in them, this scanf format string seems to do it:
*
* format_str = "%
49s%49s%49s%d%d%49s%49s%49
s %999[^#] %999c"
* format_str = "%
50s%50s%50s%d%d%50s%50s%50
s %999[^#] %999c"
*
* " %999[^#]" Jumps white space after the last mandatory field and then gets everything up to
* the next "#", so this will fail if people put a "#" in their attributes !
...
...
@@ -380,10 +419,10 @@ static gboolean parseBodyLine(ZMapGFFParser parser, char *line)
source
[
GFF_MAX_FIELD_CHARS
+
1
]
=
{
''
},
feature_type
[
GFF_MAX_FIELD_CHARS
+
1
]
=
{
''
},
score_str
[
GFF_MAX_FIELD_CHARS
+
1
]
=
{
''
},
strand_str
[
GFF_MAX_FIELD_CHARS
+
1
]
=
{
''
},
phase_str
[
GFF_MAX_FIELD_CHARS
+
1
]
=
{
''
},
attributes
[
GFF_MAX_F
IELD
_CHARS
+
1
]
=
{
''
},
comments
[
GFF_MAX_F
IELD
_CHARS
+
1
]
=
{
''
}
;
attributes
[
GFF_MAX_F
REETEXT
_CHARS
+
1
]
=
{
''
},
comments
[
GFF_MAX_F
REETEXT
_CHARS
+
1
]
=
{
''
}
;
int
start
=
0
,
end
=
0
;
double
score
=
0
;
char
*
format_str
=
"%
49s%49s%49s%d%d%49s%49s%49s %999
[^#] %
999
c"
;
char
*
format_str
=
"%
50s%50s%50s%d%d%50s%50s%50s %1000
[^#] %
1000
c"
;
int
fields
;
...
...
@@ -394,7 +433,8 @@ static gboolean parseBodyLine(ZMapGFFParser parser, char *line)
<
GFF_MANDATORY_FIELDS
)
{
parser
->
error
=
g_error_new
(
parser
->
error_domain
,
ZMAP_GFF_ERROR_BODY
,
"Mandatory fields missing in GFF line:
\"
%s
\"
"
,
line
)
;
"Mandatory fields missing in GFF line %d:
\"
%s
\"
"
,
parser
->
line_count
,
line
)
;
result
=
FALSE
;
}
else
...
...
@@ -403,15 +443,26 @@ static gboolean parseBodyLine(ZMapGFFParser parser, char *line)
ZMapStrand
strand
;
ZMapPhase
phase
;
/* I'm afraid I'm not doing assembly stuff at the moment, its not worth it....if I need
* to change this decision I can just this section.....
* Code just silently drops these lines.
* */
if
(
g_ascii_strcasecmp
(
source
,
"assembly_tag"
)
==
0
)
{
return
TRUE
;
}
if
(
strlen
(
sequence
)
==
GFF_MAX_FREETEXT_CHARS
||
strlen
(
source
)
==
GFF_MAX_FREETEXT_CHARS
||
(
strlen
(
feature_type
)
==
GFF_MAX_FREETEXT_CHARS
||
!
formatType
(
feature_type
,
&
type
))
||
(
strlen
(
feature_type
)
==
GFF_MAX_FREETEXT_CHARS
||
!
formatType
(
parser
->
SO_compliant
,
feature_type
,
&
type
))
||
!
formatScore
(
score_str
,
&
score
)
||
!
formatStrand
(
strand_str
,
&
strand
)
||
!
formatPhase
(
phase_str
,
&
phase
))
{
parser
->
error
=
g_error_new
(
parser
->
error_domain
,
ZMAP_GFF_ERROR_BODY
,
"Bad format GFF line:
\"
%s
\"
"
,
line
)
;
"Bad format GFF line %d:
\"
%s
\"
"
,
parser
->
line_count
,
line
)
;
result
=
FALSE
;
}
else
...
...
@@ -438,13 +489,15 @@ static gboolean makeNewFeature(ZMapGFFParser parser, char *sequence, char *sourc
ZMapFeature
feature
=
NULL
;
char
*
first_attr
=
NULL
;
ZMapGFFParserFeatureSet
feature_set
=
NULL
;
;
gboolean
has_name
=
TRUE
;
/* Look for an explicit feature name for the GFF record, if none exists use the sequence
* name itself. */
if
(
!
(
feature_name
=
getFeatureName
(
attributes
)))
feature_name
=
sequence
;
{
feature_name
=
sequence
;
has_name
=
FALSE
;
}
/* Check if the "source" for this feature is already known, if it is then check if there
* is already a multiline feature with the same name as we will need to augment it with this data. */
...
...
@@ -497,8 +550,13 @@ static gboolean makeNewFeature(ZMapGFFParser parser, char *sequence, char *sourc
/* THIS PIECE OF CODE WILL NEED TO BE CHANGED AS I DO MORE TYPES..... */
/* If the feature is one that must be built up from several GFF lines then add it to
* our set of such features. */
if
(
feature_type
!=
ZMAPFEATURE_HOMOL
)
* our set of such features. There are arcane/adhoc rules in action here, any features
* that do not have their own feature_name _cannot_ be multiline features as such features
* can _only_ be identified if they do have their own name. */
if
(
has_name
&&
(
feature_type
==
ZMAPFEATURE_SEQUENCE
||
feature_type
==
ZMAPFEATURE_TRANSCRIPT
||
feature_type
==
ZMAPFEATURE_EXON
||
feature_type
==
ZMAPFEATURE_INTRON
))
{
g_datalist_set_data
(
&
(
feature_set
->
multiline_features
),
feature_name
,
feature
)
;
}
...
...
@@ -639,7 +697,7 @@ static char *getFeatureName(char *attributes)
{
char
*
feature_name
=
NULL
;
int
attr_fields
;
char
*
attr_format_str
=
"%
49
s %*[
\"
]%50[^
\"
]%*[
\"
]%*s"
;
char
*
attr_format_str
=
"%
50
s %*[
\"
]%50[^
\"
]%*[
\"
]%*s"
;
char
class
[
GFF_MAX_FIELD_CHARS
+
1
]
=
{
''
},
name
[
GFF_MAX_FIELD_CHARS
+
1
]
=
{
''
}
;
...
...
@@ -716,51 +774,65 @@ static gboolean getHomolAttrs(char *attributes, ZMapHomolType *homol_type_out,
* Not all of these are below because I'm not sure what type they should be classified
* in to.
*
*
* If SO_compliant is TRUE then only recognised SO terms will be accepted for feature
* types, if FALSE then both SO and more the earlier more adhoc names will be accepted.
*
* */
gboolean
formatType
(
char
*
feature_type
,
ZMapFeatureType
*
type_out
)
gboolean
formatType
(
gboolean
SO_compliant
,
char
*
feature_type
,
ZMapFeatureType
*
type_out
)
{
gboolean
result
=
FALSE
;
ZMapFeatureType
type
=
ZMAPFEATURE_INVALID
;
if
(
g_ascii_strcasecmp
(
feature_type
,
"Clone"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"Clone_right_end"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"Clone_left_end"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"utr"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"experimental"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"reagent"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"repeat"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"structural"
)
==
0
)
/* Is feature_type a SO term. */
if
(
g_ascii_strcasecmp
(
feature_type
,
"trans_splice_acceptor_site"
)
==
0
)
{
type
=
ZMAPFEATURE_B
ASIC
;
type
=
ZMAPFEATURE_B
OUNDARY
;
}
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"
Pseudogen
e"
)
==
0
)
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"
transposable_element_insertion_sit
e"
)
==
0
)
{
/* REALLY NOT SURE ABOUT THIS CLASSIFICATION......SHOULD IT BE A TRANSCRIPT ? */
type
=
ZMAPFEATURE_BASIC
;
type
=
ZMAPFEATURE_VARIATION
;
}
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"
SNP
"
)
==
0
)
if
(
g_ascii_strcasecmp
(
feature_type
,
"
region
"
)
==
0
)
{
type
=
ZMAPFEATURE_
VARIATION
;
type
=
ZMAPFEATURE_
BASIC
;
}
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"
S
equence"
)
==
0
)
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"
virtual_s
equence"
)
==
0
)
{
type
=
ZMAPFEATURE_SEQUENCE
;
}
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"transcript"
)
==
0
)
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"reagent"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"clone"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"clone_end"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"clone_end"
)
==
0
)
{
type
=
ZMAPFEATURE_BASIC
;
}
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"UTR"
)
==
0
)
{
/* this should in the end be part of a transcript..... */
type
=
ZMAPFEATURE_BASIC
;
}
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"pseudogene"
)
==
0
)
{
/* In SO terms this is a region but we don't have a basic "region" type that includes
* exons like structure...suggests we need to remodel our feature struct.... */
type
=
ZMAPFEATURE_TRANSCRIPT
;
}
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"similarity"
)
==
0
)
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"experimental_result_region"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"chromosomal_structural_element"
)
==
0
)
{
type
=
ZMAPFEATURE_
HOMOL
;
type
=
ZMAPFEATURE_
BASIC
;
}
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"trans-splice_acceptor"
)
==
0
)
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"transcript"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"protein_coding_primary_transcript"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"CDS"
)
==
0
)
{
type
=
ZMAPFEATURE_
BOUNDARY
;
type
=
ZMAPFEATURE_
TRANSCRIPT
;
}
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"coding_exon"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"exon"
)
==
0
)
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"exon"
)
==
0
)
{
type
=
ZMAPFEATURE_EXON
;
}
...
...
@@ -768,7 +840,79 @@ gboolean formatType(char *feature_type, ZMapFeatureType *type_out)
{
type
=
ZMAPFEATURE_INTRON
;
}
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"nucleotide_match"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"expressed_sequence_match"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"EST_match"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"cDNA_match"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"translated_nucleotide_match"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"protein_match"
)
==
0
)
{
type
=
ZMAPFEATURE_HOMOL
;
}
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"repeat_region"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"inverted_repeat"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"tandem_repeat"
)
==
0
)
{
type
=
ZMAPFEATURE_BASIC
;
}
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"SNP"
)
==
0
)
{
type
=
ZMAPFEATURE_VARIATION
;
}
if
(
!
SO_compliant
)
{
if
(
g_ascii_strcasecmp
(
feature_type
,
"Clone_right_end"
)
==
0
)
{
type
=
ZMAPFEATURE_BASIC
;
}
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"Clone"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"Clone_left_end"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"utr"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"experimental"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"reagent"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"repeat"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"structural"
)
==
0
)
{
type
=
ZMAPFEATURE_BASIC
;
}
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"Pseudogene"
)
==
0
)
{
/* REALLY NOT SURE ABOUT THIS CLASSIFICATION......SHOULD IT BE A TRANSCRIPT ? */
type
=
ZMAPFEATURE_TRANSCRIPT
;
}
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"SNP"
)
==
0
)
{
type
=
ZMAPFEATURE_VARIATION
;
}
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"Sequence"
)
==
0
)
{
type
=
ZMAPFEATURE_SEQUENCE
;
}
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"transcript"
)
==
0
)
{
type
=
ZMAPFEATURE_TRANSCRIPT
;
}
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"similarity"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"transcription"
)
==
0
)
{
type
=
ZMAPFEATURE_HOMOL
;
}
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"trans-splice_acceptor"
)
==
0
)
{
type
=
ZMAPFEATURE_BOUNDARY
;
}
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"coding_exon"
)
==
0
||
g_ascii_strcasecmp
(
feature_type
,
"exon"
)
==
0
)
{
type
=
ZMAPFEATURE_EXON
;
}
else
if
(
g_ascii_strcasecmp
(
feature_type
,
"intron"
)
==
0
)
{
type
=
ZMAPFEATURE_INTRON
;
}
}
if
(
type
!=
ZMAPFEATURE_INVALID
)
...
...
This diff is collapsed.
Click to expand it.
src/zmapGFF/zmapGFF_P.h
+
19
−
24
View file @
e17ed1c5
...
...
@@ -25,9 +25,9 @@
* Description:
* Exported functions: See XXXXXXXXXXXXX.h
* HISTORY:
* Last edited: Jun 1
5
1
5:29
2004 (edgrif)
* Last edited: Jun 1
7
1
3:52
2004 (edgrif)
* Created: Sat May 29 13:18:32 2004 (edgrif)
* CVS info: $Id: zmapGFF_P.h,v 1.
1
2004-06-1
5
1
4:37:35
edgrif Exp $
* CVS info: $Id: zmapGFF_P.h,v 1.
2
2004-06-1
8
1
1:03:21
edgrif Exp $
*-------------------------------------------------------------------
*/
#ifndef ZMAP_GFF_P_H
...
...
@@ -37,7 +37,7 @@
/* Some defines for parsing stuff....my need v2 and v3 versions of these. */
/* Some defines for parsing stuff....m
a
y need v2 and v3 versions of these. */
enum
{
GFF_MANDATORY_FIELDS
=
8
,
GFF_MAX_FIELD_CHARS
=
50
,
GFF_MAX_FREETEXT_CHARS
=
1000
}
;
...
...
@@ -60,6 +60,9 @@ typedef enum
/* Some features need to be built up from multiple GFF lines so we keep associations
* of these features in arrays. The arrays are indexed via sources. These arrays are only used
* while building up the final arrays of features. */
/* For each set of features that come from a single source, we keep an array of those features
* but also a list of features that need to be built up from several GFF lines. */
...
...
@@ -86,8 +89,15 @@ typedef struct ZMapGFFParserFeatureSetStruct_
typedef
struct
ZMapGFFParserStruct_
{
ZMapGFFParseState
state
;
GError
*
error
;
/* Holds
recoverable
error. */
GError
*
error
;
/* Holds
last parser
error. */
GQuark
error_domain
;
gboolean
stop_on_error
;
/* Stop parsing if there is an error. */
int
line_count
;
/* Contains number of lines processed. */
gboolean
SO_compliant
;
/* TRUE => use only SO terms for
feature types. */
/* Header data, need to find all this for parsing to be valid. */
gboolean
done_version
;
...
...
@@ -101,30 +111,15 @@ typedef struct ZMapGFFParserStruct_
char
*
sequence_name
;
int
sequence_start
,
sequence_end
;
/* Some features need to be built up from multiple GFF lines so we keep associations
* of these features in arrays. The arrays are indexed via sources. These arrays are only used
* while building up the final arrays of features. */
GData
*
feature_sets
;
/* A list of arrays of features, the
arrays are indexed via their
"source". Hence each array contains
features from a single source. */
GData
*
feature_sets
;
/* A list of ZMapGFFParserFeatureSetStruct.
There is one of these structs per
"source". The struct contains among
other things an array of all
features for that source. */
gboolean
free_on_destroy
;
/* TRUE => free all feature arrays
when parser is destroyed. */
#ifdef ED_G_NEVER_INCLUDE_THIS_CODE
/* Let's not worry about this for now.........we could even just return the array of arrays
* dynamically by zooming through the sources and extracting all the sources.... */
/* The final array of arrays of features. Each sub-array contains features that all have the
* same source. */
GArray
*
feature_sets
;
#endif
/* ED_G_NEVER_INCLUDE_THIS_CODE */
}
ZMapGFFParserStruct
;
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment