From b84e25a270a77338c020643568c395c237697255 Mon Sep 17 00:00:00 2001
From: Andrew Yates <ayates@ebi.ac.uk>
Date: Wed, 29 May 2013 14:45:22 +0000
Subject: [PATCH] [ENSCORESW-490]. Point raised by Dan. Default GFF3 for source
 is . not ensembl. Using . and supporting basic features

---
 modules/Bio/EnsEMBL/Utils/BiotypeMapper.pm    |  3 +-
 modules/Bio/EnsEMBL/Utils/IO/GFFSerializer.pm |  4 +-
 modules/t/gffSerialiser.t                     | 41 +++++++++++++++++--
 3 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/modules/Bio/EnsEMBL/Utils/BiotypeMapper.pm b/modules/Bio/EnsEMBL/Utils/BiotypeMapper.pm
index 428b7fe62d..2a7fdd13d0 100644
--- a/modules/Bio/EnsEMBL/Utils/BiotypeMapper.pm
+++ b/modules/Bio/EnsEMBL/Utils/BiotypeMapper.pm
@@ -114,9 +114,10 @@ my %transcript_so_mapping = (
     );
 
 my %feature_so_mapping = (
+    'Bio::EnsEMBL::Feature' => 'SO:0000001', # region
     'Bio::EnsEMBL::Gene' => 'SO:0000704', # gene
     'Bio::EnsEMBL::Transcript' => 'SO:0000673', # transcript
-    'Bio::EnsEMBL::Exon' => 'SO:0000147',
+    'Bio::EnsEMBL::Exon' => 'SO:0000147', # exon
     'Bio::EnsEMBL::Slice' => 'SO:0000001', # region
     'Bio::EnsEMBL::SimpleFeature' => 'SO:0001411', # biological_region
     'Bio::EnsEMBL::MiscFeature' => 'SO:0001411', # biological_region
diff --git a/modules/Bio/EnsEMBL/Utils/IO/GFFSerializer.pm b/modules/Bio/EnsEMBL/Utils/IO/GFFSerializer.pm
index 3274d2ba0b..d80846d297 100644
--- a/modules/Bio/EnsEMBL/Utils/IO/GFFSerializer.pm
+++ b/modules/Bio/EnsEMBL/Utils/IO/GFFSerializer.pm
@@ -52,7 +52,7 @@ my %strand_conversion = ( '1' => '+', '0' => '?', '-1' => '-');
     Constructor
     Arg [1]    : Ontology Adaptor
     Arg [2]    : Optional File handle
-    Arg [3]    : Default source of the features. Defaults to ensembl
+    Arg [3]    : Default source of the features. Defaults to .
     
     Returntype : Bio::EnsEMBL::Utils::IO::GFFSerializer
 
@@ -77,7 +77,7 @@ sub new {
         $self->{'stdout'} = 1;
     }
     if(!defined $self->{default_source}) {
-        $self->{default_source} = 'ensembl';
+        $self->{default_source} = '.';
     }
     return $self;
 }
diff --git a/modules/t/gffSerialiser.t b/modules/t/gffSerialiser.t
index 4fa0c8e557..f8a27ec23d 100644
--- a/modules/t/gffSerialiser.t
+++ b/modules/t/gffSerialiser.t
@@ -8,9 +8,10 @@ sub new {
   return bless({}, ref($class) || $class);
 }
 
+# default lookup and always returns region
 sub name {
   my ($self) = @_;
-  return 'feature';
+  return 'region';
 }
 
 package Test::SO;
@@ -34,7 +35,10 @@ use warnings;
 use Test::More;
 use Bio::EnsEMBL::Test::MultiTestDB;
 use Bio::EnsEMBL::Utils::IO::GFFSerializer;
+use Bio::EnsEMBL::Feature;
+use Bio::EnsEMBL::Slice;
 use IO::String;
+use Test::Differences;
 
 my $db = Bio::EnsEMBL::Test::MultiTestDB->new();
 my $dba = $db->get_DBAdaptor('core');
@@ -53,7 +57,7 @@ my $ga = $dba->get_GeneAdaptor();
 OUT
   #Have to do this outside of the HERETO thanks to tabs
   $expected .= join("\t", 
-    qw/20  ensembl feature 30274334  30300924  . + ./,
+    qw/20  ensembl region 30274334  30300924  . + ./,
     'ID=ENSG00000131044;biotype=protein_coding;external_name=C20orf125;logic_name=ensembl' 
   );
   $expected .= "\n";
@@ -61,6 +65,35 @@ OUT
   assert_gff3($gene, $expected, 'Gene with no source serialises to GFF3 as expected. Source is ensembl');
 }
 
+{
+  my $cs = $dba->get_CoordSystemAdaptor()->fetch_by_name('chromosome');
+  my $feature = Bio::EnsEMBL::Feature->new(
+    -SLICE => Bio::EnsEMBL::Slice->new(
+      -COORD_SYSTEM => $cs,
+      -SEQ => ('A'x10),
+      -SEQ_REGION_NAME => 'wibble',
+      -START => 1,
+      -END => 10
+    ),
+    -START => 1,
+    -END => 10,
+    -STRAND => 1,
+  );
+  my $expected = <<'OUT';
+##gff-version 3
+##sequence-region   wibble 1 10
+OUT
+  #Have to do this outside of the HERETO thanks to tabs
+  $expected .= join("\t", 
+    qw/wibble  . region 1  10  . + ./,
+    '' 
+  );
+  $expected .= "\n";
+
+  assert_gff3($feature, $expected, 'Default feature should seralise without attributes but leave a trailing \t');
+}
+
+
 {
   my $gene = $ga->fetch_by_stable_id($id);
   $gene->source('wibble');
@@ -70,7 +103,7 @@ OUT
 OUT
   #Have to do this outside of the HERETO thanks to tabs
   $expected .= join("\t", 
-    qw/20  wibble feature 30274334  30300924  . + ./,
+    qw/20  wibble region 30274334  30300924  . + ./,
     'ID=ENSG00000131044;biotype=protein_coding;description=DJ310O13.1.2 (NOVEL PROTEIN SIMILAR DROSOPHILA PROTEIN CG7474%2C ISOFORM 2 ) (FRAGMENT). [Source:SPTREMBL%3BAcc:Q9BR18];external_name=C20orf125;logic_name=ensembl' 
   );
   $expected .= "\n";
@@ -85,7 +118,7 @@ sub assert_gff3 {
   my $ser = Bio::EnsEMBL::Utils::IO::GFFSerializer->new($ota, $fh);
   $ser->print_main_header([$feature->feature_Slice()]);
   $ser->print_feature($feature);
-  is(${$fh->string_ref()}, $expected, $msg);
+  eq_or_diff(${$fh->string_ref()}, $expected, $msg);
 }
 
 done_testing();
\ No newline at end of file
-- 
GitLab