diff --git a/modules/Bio/EnsEMBL/Utils/IO/GFFSerializer.pm b/modules/Bio/EnsEMBL/Utils/IO/GFFSerializer.pm index 26811e8cb755334df710bd779dfebeaf6651ae82..cc73066371260582c271351e851acf80015fa251 100644 --- a/modules/Bio/EnsEMBL/Utils/IO/GFFSerializer.pm +++ b/modules/Bio/EnsEMBL/Utils/IO/GFFSerializer.pm @@ -124,6 +124,10 @@ sub print_feature { # Column 3 - feature, the ontology term for the kind of feature this row is my $so_term = eval { $so_mapper->to_name($feature); }; $@ and throw sprintf "Unable to map feature %s to SO term.\n$@", $summary{ID}; + if ($so_term eq 'protein_coding_gene') { +# Special treatment for protein_coding_gene, as more commonly expected term is 'gene' + $so_term = 'gene'; + } $row .= $so_term."\t"; # Column 4 - start, the start coordinate of the feature, here shifted to chromosomal coordinates @@ -137,6 +141,14 @@ sub print_feature { # non-circular, but end still before start else {$summary{'end'} = $summary{'start'};} } + if ($feature->slice()) { + if ($summary{'start'} < $feature->slice->start()) { + $summary{'start'} = $feature->slice->start(); + } + if ($summary{'end'} > $feature->slice->end()) { + $summary{'end'} = $feature->slice->end(); + } + } $row .= $summary{'start'} . "\t"; # Column 5 - end, coordinates (absolute) for the end of this feature @@ -173,6 +185,7 @@ sub print_feature { delete $summary{'start'}; delete $summary{'end'}; delete $summary{'strand'}; + delete $summary{'phase'}; delete $summary{'score'}; delete $summary{'source'}; # Slice the hash for specific keys in GFF-friendly order @@ -181,6 +194,24 @@ sub print_feature { while (my $key = shift @ordered_keys) { my $value = shift @ordered_values; if ($value) { + if ($key eq 'ID') { + if ($feature->isa('Bio::EnsEMBL::Transcript')) { + $value = 'transcript:' . $value; + } elsif ($feature->isa('Bio::EnsEMBL::Gene')) { + $value = 'gene:' . $value; + } else { + $value = $so_term . ':' . $value; + } + } + if ($key eq 'Parent') { + if ($feature->isa('Bio::EnsEMBL::Transcript')) { + $value = 'gene:' . $value; + } elsif ($feature->isa('Bio::EnsEMBL::Exon')) { + $value = 'transcript:' . $value; + } elsif ($so_term eq 'CDS') { + $value = 'transcript:' . $value; + } + } $row .= $key."=".uri_escape($value,'\t\n\r;=%&,'); delete $summary{$key}; $row .= ';' if scalar(@ordered_keys) > 0 || scalar(keys %summary) > 0; @@ -195,7 +226,7 @@ sub print_feature { $data_written = 1; } else { - if ($summary{$attribute}) { + if (defined $summary{$attribute}) { $row .= $attribute."=".uri_escape($summary{$attribute},'\t\n\r;=%&,'); $data_written = 1; }