Merge pull request #168 from Ensembl/ENSCORESW-1971

Enscoresw 1971 transl_table qualifier added to genbank and embl flat file dumps

Merge pull request #168 from Ensembl/ENSCORESW-1971
Enscoresw 1971 transl_table qualifier added to genbank and embl flat file dumps
55a6baea · Kieron Taylor · GitHub · 45ea16c4 · 6a9b3375 · 55a6baea
Commit 55a6baea authored 8 years ago by Kieron Taylor Committed by GitHub 8 years ago
--- a/modules/Bio/EnsEMBL/Utils/SeqDumper.pm
+++ b/modules/Bio/EnsEMBL/Utils/SeqDumper.pm
@@ -827,7 +827,12 @@ sub _dump_feature_table {
            $self->features2location($transcript->get_all_translateable_Exons);
          $self->write(@ff,'CDS', $value);
          my $codon_start = $self->transcript_to_codon_start($transcript);
-          $self->write(@ff,''   , qq{/codon_start="${codon_start}"}) if $codon_start > 1; 
+          $self->write(@ff,''   , qq{/codon_start="${codon_start}"}) if $codon_start > 1;
+
+          my $codon_table_id = $self->_get_codon_table_id($slice);
+          if($codon_table_id > 1){
+            $self->write(@ff,''   , '/transl_table='.$codon_table_id);
+          }
          $self->write(@ff,''   , '/gene="'.$gene->stable_id_version().'"'); 
          $self->write(@ff,''   , '/protein_id="'.$translation->stable_id_version().'"');
          $self->write(@ff,''   ,'/note="transcript_id='.$transcript->stable_id_version().'"');
@@ -1004,6 +1009,29 @@ sub transcript_to_codon_start {
 }


+=head2 _get_codon_table_id
+
+  Arg [1]    : Bio::EnsEMBL::Slice slice
+  Example    : none
+  Description: Helper method to get codon_table seq region attribute
+               codon_table defines the genetic code table used if other than the universal genetic code table (1)
+               By default it is 1 and is not shown in flat files.
+               If it is not equal to 1, then it is shown as a transl_table qualifier on the CDS feature.
+  Returntype : int
+  Caller     : internal
+
+=cut
+
+sub _get_codon_table_id{
+  my ($self, $slice) = @_;
+  my $codon_table_id = 1;
+  my $codon_table_attributes = $slice->get_all_Attributes("codon_table");
+  if (@{$codon_table_attributes}) {
+    $codon_table_id = $codon_table_attributes->[0]->value;
+  }
+  return $codon_table_id;
+}
+
 =head2 dump_fasta

  Arg [1]    : Bio::EnsEMBL::Slice

--- a/modules/t/seqDumper.t
+++ b/modules/t/seqDumper.t
@@ -105,7 +105,20 @@ my $index_count_fh = sub {
    is(scalar(@{$lines}), 1, 'Expect only 1 EMBL SQ line describing a sequence');
    is($lines->[0], 'SQ   Sequence     100001 BP;      24986 A;      24316 C;      24224 G;      26475 T;          0 other;', 'Formatting of SQ as expected');
  }
+
+  # check if transl_table is included
+  $sd = Bio::EnsEMBL::Utils::SeqDumper->new();
+  $sd->{feature_types}->{$_} = 0 for keys %{$sd->{feature_types}};
+  $sd->{feature_types}->{'gene'} = 1;
  
+  {
+    my $mt_slice = $slice_adaptor->fetch_by_region('chromosome', 'MT_NC_001807', 10060, 10405);
+    my $fh = IO::String->new();
+    $sd->dump_embl($mt_slice, $fh);
+    my $lines = $index_fh->($fh, 'FT ');
+    like( $lines->[9], qr/FT\s+\/transl_table=2/,  "Expected transl_table line at FT  CDS");
+  }
+
  {
    my $fh = IO::String->new();
    $sd->dump_genbank($slice, $fh);

--- a/modules/t/test-genome-DBs/homo_sapiens/core/exon_transcript.txt
+++ b/modules/t/test-genome-DBs/homo_sapiens/core/exon_transcript.txt
@@ -168,6 +168,6 @@
 162030	21739	1
 162031	21739	4
 162032	21739	3
-162033	21740	0
+162033	21740	1
 162034	21741	1
 162035	21741	2
\ No newline at end of file