From b84e25a270a77338c020643568c395c237697255 Mon Sep 17 00:00:00 2001 From: Andrew Yates <ayates@ebi.ac.uk> Date: Wed, 29 May 2013 14:45:22 +0000 Subject: [PATCH] [ENSCORESW-490]. Point raised by Dan. Default GFF3 for source is . not ensembl. Using . and supporting basic features --- modules/Bio/EnsEMBL/Utils/BiotypeMapper.pm | 3 +- modules/Bio/EnsEMBL/Utils/IO/GFFSerializer.pm | 4 +- modules/t/gffSerialiser.t | 41 +++++++++++++++++-- 3 files changed, 41 insertions(+), 7 deletions(-) diff --git a/modules/Bio/EnsEMBL/Utils/BiotypeMapper.pm b/modules/Bio/EnsEMBL/Utils/BiotypeMapper.pm index 428b7fe62d..2a7fdd13d0 100644 --- a/modules/Bio/EnsEMBL/Utils/BiotypeMapper.pm +++ b/modules/Bio/EnsEMBL/Utils/BiotypeMapper.pm @@ -114,9 +114,10 @@ my %transcript_so_mapping = ( ); my %feature_so_mapping = ( + 'Bio::EnsEMBL::Feature' => 'SO:0000001', # region 'Bio::EnsEMBL::Gene' => 'SO:0000704', # gene 'Bio::EnsEMBL::Transcript' => 'SO:0000673', # transcript - 'Bio::EnsEMBL::Exon' => 'SO:0000147', + 'Bio::EnsEMBL::Exon' => 'SO:0000147', # exon 'Bio::EnsEMBL::Slice' => 'SO:0000001', # region 'Bio::EnsEMBL::SimpleFeature' => 'SO:0001411', # biological_region 'Bio::EnsEMBL::MiscFeature' => 'SO:0001411', # biological_region diff --git a/modules/Bio/EnsEMBL/Utils/IO/GFFSerializer.pm b/modules/Bio/EnsEMBL/Utils/IO/GFFSerializer.pm index 3274d2ba0b..d80846d297 100644 --- a/modules/Bio/EnsEMBL/Utils/IO/GFFSerializer.pm +++ b/modules/Bio/EnsEMBL/Utils/IO/GFFSerializer.pm @@ -52,7 +52,7 @@ my %strand_conversion = ( '1' => '+', '0' => '?', '-1' => '-'); Constructor Arg [1] : Ontology Adaptor Arg [2] : Optional File handle - Arg [3] : Default source of the features. Defaults to ensembl + Arg [3] : Default source of the features. Defaults to . Returntype : Bio::EnsEMBL::Utils::IO::GFFSerializer @@ -77,7 +77,7 @@ sub new { $self->{'stdout'} = 1; } if(!defined $self->{default_source}) { - $self->{default_source} = 'ensembl'; + $self->{default_source} = '.'; } return $self; } diff --git a/modules/t/gffSerialiser.t b/modules/t/gffSerialiser.t index 4fa0c8e557..f8a27ec23d 100644 --- a/modules/t/gffSerialiser.t +++ b/modules/t/gffSerialiser.t @@ -8,9 +8,10 @@ sub new { return bless({}, ref($class) || $class); } +# default lookup and always returns region sub name { my ($self) = @_; - return 'feature'; + return 'region'; } package Test::SO; @@ -34,7 +35,10 @@ use warnings; use Test::More; use Bio::EnsEMBL::Test::MultiTestDB; use Bio::EnsEMBL::Utils::IO::GFFSerializer; +use Bio::EnsEMBL::Feature; +use Bio::EnsEMBL::Slice; use IO::String; +use Test::Differences; my $db = Bio::EnsEMBL::Test::MultiTestDB->new(); my $dba = $db->get_DBAdaptor('core'); @@ -53,7 +57,7 @@ my $ga = $dba->get_GeneAdaptor(); OUT #Have to do this outside of the HERETO thanks to tabs $expected .= join("\t", - qw/20 ensembl feature 30274334 30300924 . + ./, + qw/20 ensembl region 30274334 30300924 . + ./, 'ID=ENSG00000131044;biotype=protein_coding;external_name=C20orf125;logic_name=ensembl' ); $expected .= "\n"; @@ -61,6 +65,35 @@ OUT assert_gff3($gene, $expected, 'Gene with no source serialises to GFF3 as expected. Source is ensembl'); } +{ + my $cs = $dba->get_CoordSystemAdaptor()->fetch_by_name('chromosome'); + my $feature = Bio::EnsEMBL::Feature->new( + -SLICE => Bio::EnsEMBL::Slice->new( + -COORD_SYSTEM => $cs, + -SEQ => ('A'x10), + -SEQ_REGION_NAME => 'wibble', + -START => 1, + -END => 10 + ), + -START => 1, + -END => 10, + -STRAND => 1, + ); + my $expected = <<'OUT'; +##gff-version 3 +##sequence-region wibble 1 10 +OUT + #Have to do this outside of the HERETO thanks to tabs + $expected .= join("\t", + qw/wibble . region 1 10 . + ./, + '' + ); + $expected .= "\n"; + + assert_gff3($feature, $expected, 'Default feature should seralise without attributes but leave a trailing \t'); +} + + { my $gene = $ga->fetch_by_stable_id($id); $gene->source('wibble'); @@ -70,7 +103,7 @@ OUT OUT #Have to do this outside of the HERETO thanks to tabs $expected .= join("\t", - qw/20 wibble feature 30274334 30300924 . + ./, + qw/20 wibble region 30274334 30300924 . + ./, 'ID=ENSG00000131044;biotype=protein_coding;description=DJ310O13.1.2 (NOVEL PROTEIN SIMILAR DROSOPHILA PROTEIN CG7474%2C ISOFORM 2 ) (FRAGMENT). [Source:SPTREMBL%3BAcc:Q9BR18];external_name=C20orf125;logic_name=ensembl' ); $expected .= "\n"; @@ -85,7 +118,7 @@ sub assert_gff3 { my $ser = Bio::EnsEMBL::Utils::IO::GFFSerializer->new($ota, $fh); $ser->print_main_header([$feature->feature_Slice()]); $ser->print_feature($feature); - is(${$fh->string_ref()}, $expected, $msg); + eq_or_diff(${$fh->string_ref()}, $expected, $msg); } done_testing(); \ No newline at end of file -- GitLab