From 5b4b6c9121a7488525f5243690ff7bfb9c0613ef Mon Sep 17 00:00:00 2001 From: edgrif <edgrif> Date: Mon, 30 Nov 2009 10:46:59 +0000 Subject: [PATCH] add code to allow separate parsing of sequence from file. --- src/zmapGFF/zmapGFF_P.h | 46 +++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/src/zmapGFF/zmapGFF_P.h b/src/zmapGFF/zmapGFF_P.h index 815e86074..d08fe6253 100755 --- a/src/zmapGFF/zmapGFF_P.h +++ b/src/zmapGFF/zmapGFF_P.h @@ -25,9 +25,9 @@ * Description: Internal types, functions etc. for the GFF parser, * currently this parser only does GFF v2. * HISTORY: - * Last edited: Jul 2 12:06 2009 (rds) + * Last edited: Nov 20 18:14 2009 (edgrif) * Created: Sat May 29 13:18:32 2004 (edgrif) - * CVS info: $Id: zmapGFF_P.h,v 1.19 2009-07-02 22:22:08 rds Exp $ + * CVS info: $Id: zmapGFF_P.h,v 1.20 2009-11-30 10:46:59 edgrif Exp $ *------------------------------------------------------------------- */ #ifndef ZMAP_GFF_P_H @@ -43,7 +43,8 @@ enum {GFF_MANDATORY_FIELDS = 8, GFF_MAX_FIELD_CHARS = 50, GFF_MAX_FREETEXT_CHARS /* possible states for parsing GFF file, rather trivial in fact.... */ -typedef enum {ZMAPGFF_PARSE_HEADER, ZMAPGFF_PARSE_BODY, ZMAPGFF_PARSE_ERROR} ZMapGFFParseState ; +typedef enum {ZMAPGFF_PARSE_HEADER, ZMAPGFF_PARSE_BODY, + ZMAPGFF_PARSE_SEQUENCE, ZMAPGFF_PARSE_ERROR} ZMapGFFParseState ; /* We follow glib convention in error domain naming: @@ -106,20 +107,30 @@ typedef struct ZMapGFFParserStruct_ int clip_start, clip_end ; /* Coords used for clipping. */ - /* Header data, need to find all this for parsing to be valid. */ - gboolean done_header ; - gboolean done_version ; + /* Parsing header data, need to find all this for parsing to be valid. */ + struct + { + unsigned int done_header : 1 ; + unsigned int done_version : 1 ; + unsigned int done_source : 1 ; + unsigned int done_date : 1 ; + unsigned int done_type : 1 ; + unsigned int done_sequence_region : 1 ; + } header_flags ; + int gff_version ; - gboolean done_source ; /* Not sure if we need this... */ char *source_name ; char *source_version ; - gboolean done_sequence_region ; + char *date ; + char *sequence_name ; int features_start, features_end ; + + /* Parsing feature data. */ ZMapFeatureTypeStyle locus_set_style ; /* cached locus style. */ GQuark locus_set_id ; /* If not zero then make a locus set from locus tags in sequence objects. */ @@ -128,26 +139,29 @@ typedef struct ZMapGFFParserStruct_ GFF records with a source from this list. */ - GData *feature_sets ; /* A list of ZMapGFFParserFeatureSetStruct. There is one of these structs per "source". The struct contains among other things an array of all features for that source. */ - /* These two are used for holding the attributes and comments fields of a GFF line, * these can be very long so need dynamic allocation. */ GString *attributes_str ; GString *comments_str ; - struct + + + /* Parsing DNA sequence data, used when DNA sequence is embedded in the file. */ + struct { - GString *raw_line_data; - ZMapSequenceStruct seq_data; - unsigned int in_sequence_block : 1; - unsigned int finished :1; - }parsed_sequence; + unsigned int done_start : 1 ; + unsigned int in_sequence_block : 1 ; + unsigned int done_finished :1 ; + } sequence_flags ; + GString *raw_line_data ; + ZMapSequenceStruct seq_data ; + } ZMapGFFParserStruct ; -- GitLab