Skip to content
Snippets Groups Projects
Commit d8e7a9d6 authored by Magali Ruffier's avatar Magali Ruffier
Browse files

update to gff3 output format:

- for ID field, concatenate object type before the stable id
- if gene biotype is protein_coding, the so term is replaced by gene
parent 0c238223
No related branches found
No related tags found
3 merge requests!25GFF3 export: unstranded features and using analysis.gff_source,!25GFF3 export: unstranded features and using analysis.gff_source,!25GFF3 export: unstranded features and using analysis.gff_source
......@@ -124,6 +124,10 @@ sub print_feature {
# Column 3 - feature, the ontology term for the kind of feature this row is
my $so_term = eval { $so_mapper->to_name($feature); };
$@ and throw sprintf "Unable to map feature %s to SO term.\n$@", $summary{ID};
if ($so_term eq 'protein_coding_gene') {
# Special treatment for protein_coding_gene, as more commonly expected term is 'gene'
$so_term = 'gene';
}
$row .= $so_term."\t";
# Column 4 - start, the start coordinate of the feature, here shifted to chromosomal coordinates
......@@ -137,6 +141,14 @@ sub print_feature {
# non-circular, but end still before start
else {$summary{'end'} = $summary{'start'};}
}
if ($feature->slice()) {
if ($summary{'start'} < $feature->slice->start()) {
$summary{'start'} = $feature->slice->start();
}
if ($summary{'end'} > $feature->slice->end()) {
$summary{'end'} = $feature->slice->end();
}
}
$row .= $summary{'start'} . "\t";
# Column 5 - end, coordinates (absolute) for the end of this feature
......@@ -173,6 +185,7 @@ sub print_feature {
delete $summary{'start'};
delete $summary{'end'};
delete $summary{'strand'};
delete $summary{'phase'};
delete $summary{'score'};
delete $summary{'source'};
# Slice the hash for specific keys in GFF-friendly order
......@@ -181,6 +194,24 @@ sub print_feature {
while (my $key = shift @ordered_keys) {
my $value = shift @ordered_values;
if ($value) {
if ($key eq 'ID') {
if ($feature->isa('Bio::EnsEMBL::Transcript')) {
$value = 'transcript:' . $value;
} elsif ($feature->isa('Bio::EnsEMBL::Gene')) {
$value = 'gene:' . $value;
} else {
$value = $so_term . ':' . $value;
}
}
if ($key eq 'Parent') {
if ($feature->isa('Bio::EnsEMBL::Transcript')) {
$value = 'gene:' . $value;
} elsif ($feature->isa('Bio::EnsEMBL::Exon')) {
$value = 'transcript:' . $value;
} elsif ($so_term eq 'CDS') {
$value = 'transcript:' . $value;
}
}
$row .= $key."=".uri_escape($value,'\t\n\r;=%&,');
delete $summary{$key};
$row .= ';' if scalar(@ordered_keys) > 0 || scalar(keys %summary) > 0;
......@@ -195,7 +226,7 @@ sub print_feature {
$data_written = 1;
}
else {
if ($summary{$attribute}) {
if (defined $summary{$attribute}) {
$row .= $attribute."=".uri_escape($summary{$attribute},'\t\n\r;=%&,');
$data_written = 1;
}
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment