transcript.t 38.3 KB
Newer Older
Magali Ruffier's avatar
Magali Ruffier committed
1
# Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Tiago Grego's avatar
Tiago Grego committed
2
# Copyright [2016-2019] EMBL-European Bioinformatics Institute
3 4 5 6 7 8 9 10 11 12 13 14 15
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#      http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

Arne Stabenau's avatar
Arne Stabenau committed
16 17 18
use strict;
use warnings;

19
use Test::More;
20
use Test::Warnings qw(warning);
Arne Stabenau's avatar
Arne Stabenau committed
21

22 23
use Bio::EnsEMBL::Test::MultiTestDB;
use Bio::EnsEMBL::Test::TestUtils;
Arne Stabenau's avatar
Arne Stabenau committed
24
use Bio::EnsEMBL::Transcript;
25 26
use Bio::EnsEMBL::Exon;
use Bio::EnsEMBL::Translation;
Arne Stabenau's avatar
Arne Stabenau committed
27
use Bio::EnsEMBL::Slice;
Ian Longden's avatar
Ian Longden committed
28
use Bio::EnsEMBL::Intron;
Arne Stabenau's avatar
Arne Stabenau committed
29

30
my $multi = Bio::EnsEMBL::Test::MultiTestDB->new();
Arne Stabenau's avatar
Arne Stabenau committed
31 32 33 34

ok( $multi );

my $db = $multi->get_DBAdaptor('core' );
Magali Ruffier's avatar
Magali Ruffier committed
35
my $ontology = Bio::EnsEMBL::Test::MultiTestDB->new('ontology');
36 37
my $odb;
warning { $odb = $ontology->get_DBAdaptor("ontology"); };
Arne Stabenau's avatar
Arne Stabenau committed
38 39 40

my $sa = $db->get_SliceAdaptor();

41
my $slice = $sa->fetch_by_region('chromosome', "20", 30_249_935, 31_254_640 );
Arne Stabenau's avatar
Arne Stabenau committed
42 43 44 45 46 47

my $genes = $slice->get_all_Genes();

my $translates = 1;
my $utr_trans;

48
note( "Checking if all Transcripts translate and transform" );
Arne Stabenau's avatar
Arne Stabenau committed
49 50 51 52 53

for my $gene ( @$genes ) {
  for my $trans ( @{$gene->get_all_Transcripts()} ) {
    if( $trans->translate()->seq() =~ /\*./ ) {
      $translates = 0;
54
      note( $trans->stable_id()." does not translate." );
Arne Stabenau's avatar
Arne Stabenau committed
55 56
      last;
    }
57 58
    if( $trans->coding_region_start() != $trans->start() &&
	$trans->coding_region_end() != $trans->end() ) {
Arne Stabenau's avatar
Arne Stabenau committed
59 60 61 62
      $utr_trans = $trans->stable_id();
    }
  }

63 64
  $gene = $gene->transform( "contig" );
  next if( ! $gene );	
Arne Stabenau's avatar
Arne Stabenau committed
65 66 67
  for my $trans ( @{$gene->get_all_Transcripts()} ) {
    if( $trans->translate()->seq() =~ /\*./ ) {
      $translates = 0;
68
      note( $trans->stable_id()." does not translate." );
Arne Stabenau's avatar
Arne Stabenau committed
69 70 71 72 73
      last;
    }
  }
}

74
note( "utr Transcript is $utr_trans" );
Arne Stabenau's avatar
Arne Stabenau committed
75 76 77

ok( $translates );

78 79 80 81 82 83 84 85 86 87
#
# Try pulling off genes from an NTContig and making sure they still translate.
# This is a fairly good test of the chained coordinate mapping since the
# transcripts are stored in chromosomal coordinates and there is no direct
# mapping path to the supercontig coordinate system.
#
my $supercontig = $sa->fetch_by_region('supercontig', "NT_028392");

my $transcripts = $supercontig->get_all_Transcripts();

88
note( "Checking if all transcripts on NTContig NT_028392 translate and transform" );
89 90 91 92 93

$translates = 1;
for my $trans ( @$transcripts ) {
  if( $trans->translate()->seq() =~ /\*./ ) {
    $translates = 0;
94
    note( $trans->stable_id()." does not translate." );
95 96
    last;
  }
97
  note($trans->stable_id() .  ":" . $trans->translate()->seq() . "\n");
98 99 100 101
}

ok($translates);

Arne Stabenau's avatar
Arne Stabenau committed
102 103 104

my $ta = $db->get_TranscriptAdaptor();

105
note ("Transcript->list_dbIDs");
106 107 108
my $ids = $ta->list_dbIDs();
ok (@{$ids});

109
note ("Transcript->list_stable_ids");
110
my $stable_ids = $ta->list_stable_ids();
111 112
ok (@{$stable_ids});

Arne Stabenau's avatar
Arne Stabenau committed
113
my $tr = $ta->fetch_by_display_label( "DNMT3B" );
114
is($tr->dbID, 21737, 'Fetched correct dbID');
Arne Stabenau's avatar
Arne Stabenau committed
115 116 117


$tr = $ta->fetch_by_stable_id( "ENST00000217347" );
Arne Stabenau's avatar
Arne Stabenau committed
118

119 120
$tr = $tr->transform('contig');

Arne Stabenau's avatar
Arne Stabenau committed
121 122
ok( $tr );

123
note ( "External transcript name: " . $tr->external_name );
124
is ($tr->external_name, "MAPRE1", 'Fetched correct external name');
125

126
note ( "External transcript dbname: " . $tr->external_db );
127
is ( $tr->external_db, 'HUGO' , 'Fetched correct external db');
128

129
note ( "Display_xref_id: " . $tr->display_xref->dbID() );
130
is ( $tr->display_xref->dbID(), 97759, 'Fetched correct display xref id' );
131
ok( test_getter_setter( $tr, "display_xref", 42 ));
132

Arne Stabenau's avatar
Arne Stabenau committed
133
ok( test_getter_setter( $tr, "dbID", 100000 ));
134
ok( test_getter_setter( $tr, "biotype", "NOVEL" ));
135 136 137 138
ok( test_getter_setter( $tr, "created_date", time() ));
ok( test_getter_setter( $tr, "modified_date", time() ));


139
my @date_time = gmtime( $tr->created_date());
140 141
ok( $date_time[3] == 6 && $date_time[4] == 11 && $date_time[5] == 104 );

142
@date_time = gmtime( $tr->modified_date());
143 144 145
ok( $date_time[3] == 6 && $date_time[4] == 11 && $date_time[5] == 104 );


Arne Stabenau's avatar
Arne Stabenau committed
146 147
ok( $tr->translation->isa( "Bio::EnsEMBL::Translation" ));

148
is( $tr->start(), 79874, 'Start is correct' );
Arne Stabenau's avatar
Arne Stabenau committed
149

150
is( $tr->end(), 110306, 'End is correct' );
Arne Stabenau's avatar
Arne Stabenau committed
151

152
is ( substr( $tr->spliced_seq(), 0, 10 ), "ACGAGACGAA", 'Start of spliced seq is correct' ); 
153 154 155
is ( substr( $tr->spliced_seq(1), 0, 10 ), "acgagacgaa", 'Spliced seq with utr lower casing is correct');
is ( length($tr->spliced_seq()), length($tr->spliced_seq(1)), "Spliced seq with or without utr lower casing has the same length");
is ( $tr->spliced_seq(), uc($tr->spliced_seq(1)), "Spliced seq is identical to upper case utr masked spliced seq");
156 157
is ( substr($tr->spliced_seq(1), 61, 6), 'aagATG', 'Start mask boundary on forward stand transcript is correct' );
is ( substr($tr->spliced_seq(1), 865, 6), 'TATtaa', 'End mask boundary on forward stand transcript is correct' );
Arne Stabenau's avatar
Arne Stabenau committed
158

159
is ( substr( $tr->translateable_seq(),0,10 ), "ATGGCAGTGA", 'Start of translateable sequence is correct' );
Arne Stabenau's avatar
Arne Stabenau committed
160

161
is( $tr->coding_region_start(), 85834, 'Correct coding region start' );
Arne Stabenau's avatar
Arne Stabenau committed
162

163
is( $tr->coding_region_end(), 108631, 'Correct coding region end' );
Arne Stabenau's avatar
Arne Stabenau committed
164

Tiago Grego's avatar
Tiago Grego committed
165
is( $tr->feature_so_acc, 'SO:0000673', 'Transcript feature SO acc is correct (transcript)' );
166
is( $tr->feature_so_term, 'transcript', 'Transcript feature SO term is correct (transcript)' );
Tiago Grego's avatar
Tiago Grego committed
167

Arne Stabenau's avatar
Arne Stabenau committed
168
my @pepcoords = $tr->pep2genomic( 10, 20 );
169
is( $pepcoords[0]->start(), 85861, 'Correct translation start' );
Arne Stabenau's avatar
Arne Stabenau committed
170

171 172 173 174 175 176
my $t_start = $tr->start;
my $t_end   = $tr->end;
my $t_strand = $tr->get_all_Exons->[0]->strand;
my $pep_len = ($tr->cdna_coding_end - $tr->cdna_coding_start + 1) / 3;

my $coord_num = 0;
177 178
my $coding_region_start = $tr->coding_region_start;
my $coding_region_end   = $tr->coding_region_end;
179 180
#expect coordinate for each exon with coding sequence 
foreach my $e (@{$tr->get_all_Exons}) {
181
  if($e->end > $coding_region_start && $e->start < $coding_region_end) {
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
    $coord_num++;
  }
}

my @coords = ();
my @gaps = ();
my $coord_len = 0;
foreach my $c ($tr->genomic2pep($t_start, $t_end, $t_strand)) {
  if($c->isa('Bio::EnsEMBL::Mapper::Gap')) {
    push @gaps, $c;
  } else {
    push @coords, $c;
  }
}

197
is(scalar(@coords), $coord_num, 'Number of coords is correct');
198
my ($last_coord) = reverse(@coords); 
199
is($pep_len, $last_coord->end, 'Peptide length matched end coordinate');
200

201 202
note( "start Exon: ".$tr->start_Exon->stable_id() );
note( "end Exon: ".$tr->end_Exon->stable_id() );
Arne Stabenau's avatar
Arne Stabenau committed
203

204
is($tr->cdna_coding_start, 65, 'Correct cdna coding start');
205 206
ok(test_getter_setter($tr, 'cdna_coding_start', 99));

207
is( substr( $tr->five_prime_utr()->seq(), -5, 5), "CGAAG", 'Five prime utr seq is correct' ); 
Arne Stabenau's avatar
Arne Stabenau committed
208

209
is($tr->cdna_coding_end, 868, 'Correct cdna coding end');
210 211
ok(test_getter_setter($tr, 'cdna_coding_end', 102));

212
is( substr( $tr->three_prime_utr()->seq(), -5, 5  ), "TTCAA", 'Three prime utr seq is correct');
Arne Stabenau's avatar
Arne Stabenau committed
213

214
is( scalar( @{$tr->get_all_Exons()} ), 7, 'Transcript has 7 exons' );
215 216 217

$tr->flush_Exons();

218
is( scalar( @{$tr->get_all_Exons()} ), 0, 'No exons left after flushing' );
219

220 221 222 223 224 225
# Fetch a fresh tr, check incomplete codon behavior
$tr = $ta->fetch_by_stable_id( "ENST00000300425" );

# By default the incomplete codon should be dropped
is( $tr->translate()->seq() =~ /P$/, 1, "Incomplete codon is not translated");
is( $tr->translate(1)->seq() =~ /PL$/, 1, "Incomplete codon is padded then translated");
226

Alistair Rust's avatar
Alistair Rust committed
227 228 229
# get a fresh tr to check the update method
$tr = $ta->fetch_by_stable_id( "ENST00000217347" );

230
$multi->save('core', 'transcript', 'meta_coord');
231

Alistair Rust's avatar
Alistair Rust committed
232 233 234 235
# the first update should have no effect
$ta->update($tr);

my $up_tr = $ta->fetch_by_stable_id( "ENST00000217347" );
236
is( $up_tr->display_xref->dbID(), 97759, 'Fetched the correct dbID' );
Alistair Rust's avatar
Alistair Rust committed
237

238
my $dbentryAdaptor = $db->get_DBEntryAdaptor();
Alistair Rust's avatar
Alistair Rust committed
239

240
$tr->display_xref($dbentryAdaptor->fetch_by_dbID( 614 ));
Alistair Rust's avatar
Alistair Rust committed
241 242 243
$ta->update($tr);

$up_tr = $ta->fetch_by_stable_id( "ENST00000217347" );
244
is ( $up_tr->display_xref->dbID(), 614, 'Fetched the correct display xref id');
Alistair Rust's avatar
Alistair Rust committed
245

246
$multi->restore('core', 'transcript', 'meta_coord');
247

248 249 250 251 252 253 254 255 256 257 258 259
#
# Test spliced_seq on a reverse strand transcript
#

$tr = $ta->fetch_by_stable_id( "ENST00000246229" );

is ( substr( $tr->spliced_seq(), 0, 10 ), "ATGGCCCGAC", 'Start of spliced seq is correct, rev strand' );
is ( substr( $tr->spliced_seq(1), 0, 10 ), "atggcccgac", 'Spliced seq with utr lower casing is correct, rev strand');
is ( length($tr->spliced_seq()), length($tr->spliced_seq(1)), "Spliced seq with or without utr lower casing has the same length, rev strand");
is ( $tr->spliced_seq(), uc($tr->spliced_seq(1)), "Spliced seq is identical to upper case utr masked spliced seq, rev strand");
is ( substr($tr->spliced_seq(1), 199, 6), 'gccATG', 'Start mask boundary on forward stand transcript is correct, rev strand' );
is ( substr($tr->spliced_seq(1), 1687, 6), 'CAGtag', 'End mask boundary on forward stand transcript is correct, rev strand' );
260 261 262 263


my $interpro = $ta->get_Interpro_by_transid("ENST00000252021");
foreach my $i (@$interpro) {
264
  note($i);
265 266 267 268 269 270 271 272
}
###currently no interpro info in the test db
ok(@$interpro == 1);

#
# test fetch_all_by_external_name
#

Ian Longden's avatar
Ian Longden committed
273
($tr) = @{$ta->fetch_all_by_external_name('PLAGL2')};
274
is($tr->stable_id, 'ENST00000246229', 'Fetched correct transcript by external name');
275

276 277 278 279 280
#
# test fetch_by_rnaproduct_id
#

$tr = $ta->fetch_by_rnaproduct_id(1);
281
is($tr->stable_id, 'ENST00000278995', 'Fetched correct transcript by rnaproduct id');
282

283 284 285 286 287 288
#
# test fetch_by_translation_id
#

$tr = $ta->fetch_by_translation_id(21734);

289
is($tr->stable_id, 'ENST00000201961', 'Fetched correct transcript by translation id');
290

291
is($tr->display_id(), $tr->stable_id(), 'Transcript stable id and display id are identical');
292

293 294 295
#
# test TranscriptAdaptor::fetch_all_by_biotype
#
296
note("Test fetch_all_by_biotype");
297
my @transcripts = @{$ta->fetch_all_by_biotype('protein_coding')};
298
is(@transcripts, 27, 'Fetching all protein coding transcript');
299
my $transcriptCount = $ta->count_all_by_biotype('protein_coding');
300
is($transcriptCount, 27, 'Counting all protein coding');
301
@transcripts = @{$ta->fetch_all_by_biotype(['protein_coding','pseudogene'])};
302
is(@transcripts, 27, 'Got 27 transcript');
303
$transcriptCount = $ta->count_all_by_biotype(['protein_coding', 'pseudogene']);
304
is($transcriptCount, 27, 'Count by biotype is correct');
305

306 307 308 309 310 311 312 313
#
# test TranscriptAdaptor::fetch_all_by_Slice
#
note("Test fetch_all_by_Slice");
@transcripts = @{$ta->fetch_all_by_Slice($slice)};
$transcriptCount = $ta->count_all_by_Slice($slice);
is(@transcripts, $transcriptCount, "Counted as many transcripts as were fetched from slice");

314 315 316 317
#
# test TranscriptAdaptor::fetch_all_by_source
#
note("Test fetch_all_by_source");
318
@transcripts = @{$ta->fetch_all_by_source('ensembl')};
319
note "Got ".scalar(@transcripts)." ensembl transcripts\n";
320
is(24, scalar(@transcripts));
321
$transcriptCount = $ta->count_all_by_source('ensembl');
322
is(24, $transcriptCount);
323 324
@transcripts = @{$ta->fetch_all_by_source(['havana','vega'])};
note "Got ".scalar(@transcripts)." (havana, vega) transcripts\n";
325
is(3, scalar(@transcripts));
326
$transcriptCount = $ta->count_all_by_source(['havana', 'vega']);
327
is(3, $transcriptCount);
328

Magali Ruffier's avatar
Magali Ruffier committed
329 330 331 332 333
#
# test TranscriptAdaptor::fetch_all
#
note("Test fetch_all");
@transcripts = @{ $ta->fetch_all() };
334
is(27, scalar(@transcripts), "Got 27 transcripts");
Magali Ruffier's avatar
Magali Ruffier committed
335 336 337 338 339

#
# test TranscriptAdaptor::fetch_all_by_GOTerm
#
note("Test fetch_all_by_GOTerm");
340 341
my $go_adaptor;
warning { $go_adaptor = $odb->get_OntologyTermAdaptor(); };
Magali Ruffier's avatar
Magali Ruffier committed
342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369
my $go_term = $go_adaptor->fetch_by_accession('GO:0070363');
@transcripts = @{ $ta->fetch_all_by_GOTerm($go_term) };
is(scalar(@transcripts), 0, "Found 0 genes with fetch_all_by_GOTerm");


#
# test TranscriptAdaptor::fetch_all_by_exon_supporting_evidence
#
note("Test fetch_all_by_exon_supporting_evidence");
@transcripts = @{ $ta->fetch_all_by_exon_supporting_evidence('BRCA2', 'dna_align_feature') };
is(scalar(@transcripts), 0, "No transcripts with BRCA2 daf");
@transcripts = @{ $ta->fetch_all_by_exon_supporting_evidence('AK015740.1', 'dna_align_feature') };
is(scalar(@transcripts), 1, "1 transcript with AK015740.1 daf");
@transcripts = @{ $ta->fetch_all_by_exon_supporting_evidence('Q9NUG5', 'protein_align_feature') };
is(scalar(@transcripts), 1, "1 transcript with Q9NUG5 paf");

#
# test TranscriptAdaptor::fetch_all_by_transcript_supporting_evidence
#
note("Test fetch_all_by_transcript_supporting_evidence");
@transcripts = @{ $ta->fetch_all_by_transcript_supporting_evidence('BRCA2', 'dna_align_feature') };
is(scalar(@transcripts), 0, "No transcripts with BRCA2 daf");
@transcripts = @{ $ta->fetch_all_by_transcript_supporting_evidence('AK015740.1', 'dna_align_feature') };
is(scalar(@transcripts), 0, "0 transcripts with AK015740.1 daf");
@transcripts = @{ $ta->fetch_all_by_transcript_supporting_evidence('Q9NUG5', 'protein_align_feature') };
is(scalar(@transcripts), 0, "No transcripts with Q9NUG5 paf");


Ian Longden's avatar
Ian Longden committed
370 371 372 373 374 375 376 377 378 379 380 381 382
#
# Test get_all_Introns by joining Exons and introns
# and comparing it to the original
#

foreach my $stable_id (qw(ENST00000201961 ENST00000217347)){ #test both strands
#foreach my $stable_id (qw(ENST00000217347)){ #test both strands

  my $transcript_adaptor = $db->get_TranscriptAdaptor();
  my $transcript = 
    $transcript_adaptor->fetch_by_stable_id($stable_id);


383 384
  my @exons = (@{$transcript->get_all_Exons()});
  my @introns = (@{$transcript->get_all_Introns()});
Ian Longden's avatar
Ian Longden committed
385

386 387 388
  my @cds = (@{$transcript->get_all_CDS()});
  my @cds_introns = (@{$transcript->get_all_CDS_Introns()});

Ian Longden's avatar
Ian Longden committed
389 390 391 392 393
  my $orig_seq = $transcript->slice->subseq(
					    $transcript->start(),
					    $transcript->end(), 
					    $transcript->strand());

394 395 396 397 398
  my $cds_orig_seq = $transcript->slice->subseq(
                                            $transcript->coding_region_start(),
                                            $transcript->coding_region_end(),
                                            $transcript->strand());

Ian Longden's avatar
Ian Longden committed
399 400 401 402 403 404
  my $idl=0;
  my $new_seq = $exons[0]->seq()->seq();
  foreach my $intron (@introns){
    $new_seq .= $intron->seq;
    $new_seq .= $exons[$idl+1]->seq->seq();
    $idl++;
405

Ian Longden's avatar
Ian Longden committed
406
  }
407

408
  is($orig_seq, $new_seq, 'Correct new origin seq');
Ian Longden's avatar
Ian Longden committed
409

410 411 412 413 414 415 416 417 418 419 420
  my $cds_idl=0;
  my $new_cds_seq = $cds[0]->seq();
  foreach my $cds_intron (@cds_introns){
    $new_cds_seq .= $cds_intron->seq;
    $new_cds_seq .= $cds[$cds_idl+1]->seq();
    $cds_idl++;

  }

  is($cds_orig_seq, $new_cds_seq, 'Correct new cds origin seq');

Ian Longden's avatar
Ian Longden committed
421 422 423
}


424 425 426 427 428 429 430 431 432 433 434 435 436
#
# regression test:  five_prime_utr and three_prime_utr were failing
# for transcripts that had no UTR. undef should have been returned instead
#
$tr = $ta->fetch_by_stable_id('ENST00000246203');

my $three_prime = $tr->three_prime_utr();

ok(!defined($three_prime));

my $five_prime = $tr->five_prime_utr();

ok(!defined($five_prime));
Arne Stabenau's avatar
Arne Stabenau committed
437

438 439 440 441 442 443 444 445 446 447 448


#
# test removal of transcript
#
my $tl_count = count_rows($db, "translation");
my $ex_tr_count = count_rows($db, "exon_transcript");
my $tr_count = count_rows($db, "transcript");

my $ex_tr_minus = @{$tr->get_all_Exons()};

449 450


Monika Komorowska's avatar
Monika Komorowska committed
451 452 453
$multi->save("core", "transcript", "translation",
             "protein_feature", "exon",
             "exon_transcript", "object_xref",
454
             "supporting_feature", "dna_align_feature","protein_align_feature",
455
             'xref', "ontology_xref", "identity_xref", 'meta_coord');
456 457 458 459 460 461

$ta->remove($tr);

ok(!defined($tr->dbID()));
ok(!defined($tr->adaptor()));

462 463 464
is( count_rows( $db, "transcript"), ($tr_count - 1), 'Row count matches transcript count');
is( count_rows( $db, "translation"), ($tl_count - 1), 'Row count matches translation count');
is( count_rows( $db, "exon_transcript"), ($ex_tr_count - $ex_tr_minus), 'Row count matches exon count');
465

466 467 468 469 470 471
#
# test removal of transcript with supporting changes at the gene level
#

$tr = $ta->fetch_by_stable_id('ENST00000278995');
my $gene = $tr->get_Gene;
472
print $gene->stable_id."\n\n";
473 474 475 476 477 478 479 480 481 482 483 484 485 486
note(join "\n",map { $_->stable_id } @{$gene->get_all_Transcripts});

$ta->remove($tr,1);

ok(! grep { $_->stable_id eq 'ENST00000278995'} @{$gene->get_all_Transcripts});

# note(join "\n",map { $_->stable_id } @{$gene->get_all_Transcripts});
# old coords 30274334  30300924, the old gene above is still pointing to old coordinates, but remove cannot find it to change it.
my $new_copy_gene = $ta->fetch_by_stable_id('ENST00000310998')->get_Gene;
cmp_ok($new_copy_gene->start, '==', 30274334, 'Shortened Gene starts as before');
cmp_ok($new_copy_gene->end, '==', 30298904, 'Shortened Gene ends earlier');

$tr = $ta->fetch_by_stable_id('ENST00000278995');
ok(! defined $tr);
487 488 489 490 491 492 493 494 495 496
#
# test _rna_edit for transcripts
#

$tr = $ta->fetch_by_stable_id( "ENST00000217347" );

#
# 5 prime UTR editing
#

497 498
$tr->edits_enabled(1);

499
my $seq1 = $tr->spliced_seq();
500 501
my $tlseq1 = $tr->translateable_seq();

502 503 504
my $cdna_cds_start1 = $tr->cdna_coding_start();
my $cdna_cds_end1   = $tr->cdna_coding_end();

505
my $attrib = Bio::EnsEMBL::Attribute->new
506 507 508
  (-code => '_rna_edit',
   -value => "1 6 GATTACA",
   -name => "RNA editing");
509 510 511

$tr->add_Attributes( $attrib );

512
my $seq2 = $tr->spliced_seq(1);
513 514 515 516

ok( $seq1 ne $seq2 );
ok( $seq2 =~ /^GATTACA/ );

517 518 519
my $cdna_cds_start2 = $tr->cdna_coding_start();
my $cdna_cds_end2   = $tr->cdna_coding_end();

520 521
is($cdna_cds_start1, $cdna_cds_start2 - 1, 'Both cdna starts match');
is($cdna_cds_end1, $cdna_cds_end2   - 1, 'Both cdna ends match');
522

523 524 525 526 527 528

# insert just at the start of the translation
# makes it longer. (For non phase zero start exons)
# cdna_coding_start for this transcript is 65, 64 is just before that

$attrib = Bio::EnsEMBL::Attribute->new
529 530 531
  ( -code => '_rna_edit',
    -value => "65 64 NNN",
    -name => "RNA editing");
532 533 534 535 536 537 538 539 540

$tr->add_Attributes( $attrib );

my $tlseq2 = $tr->translateable_seq();

ok( $tlseq1 ne $tlseq2 );
ok( $tlseq2 =~ /^NNNATG/ );
ok( $tlseq1 eq substr( $tlseq2,3 ));

541 542
is($cdna_cds_start2, $tr->cdna_coding_start(), 'Cds start matches cdna coding start');
is($cdna_cds_end2  , $tr->cdna_coding_end() - 3, 'Coding end -3 matches cds end');
543 544 545 546

# test that the edits can be disabled
$tr->edits_enabled(0);

547 548
is($tr->cdna_coding_start(), $cdna_cds_start1, 'Cds start matches cdna coding start');
is($tr->cdna_coding_end()  , $cdna_cds_end1, 'Cds end matches cdna coding end');
549

550 551
is($tr->spliced_seq(), $seq1, 'Spliced sequence is correct');
is($tr->translateable_seq(), $tlseq1, 'Translateable sequence is correct');
552

553 554 555 556 557 558 559
#
# try save and retrieve by lazy load
#

$multi->hide( "core", "transcript_attrib" );
my $attribAdaptor = $db->get_AttributeAdaptor();

560
$attribAdaptor->store_on_Transcript($tr->dbID, $tr->get_all_Attributes);
561 562

$tr = $ta->fetch_by_stable_id( "ENST00000217347" );
563
$tr->edits_enabled(1);
564

Glenn Proctor's avatar
Glenn Proctor committed
565 566 567
#print " $tr->translateable_seq() : " .  $tr->translateable_seq() . "\n";
#print "$tr->spliced_seq() : " . $tr->spliced_seq() . "\n";

568
is( $tr->translateable_seq(), $tlseq2, 'Translateable sequence is correct' );
569
ok( $tr->spliced_seq() =~ /^GATTACA/ );
570

571
$multi->restore();
572 573 574 575 576 577 578 579 580 581 582 583 584 585 586

#
# test that the transcript mapper handles edits
#
$tr = $ta->fetch_by_translation_id(21734);
$slice = $tr->feature_Slice();
$tr = $tr->transfer($slice);
test_trans_mapper_edits($tr);

# test again with reverse strand
$tr = $ta->fetch_by_translation_id(21734);
$slice->invert();
$tr = $tr->transfer($slice);
test_trans_mapper_edits($tr);

587 588 589 590 591 592 593


# test that mitochondrial transcripts can be corrected translated with
# an alternate codon table

$tr = $ta->fetch_by_stable_id('ENST00000355555');

594
note($tr->translate->seq());
595

596
is($tr->translate->seq(), 'MNFALILMINTLLALLLMIITFWLPQLNGYMEKSTPYECGFDPMSPARVPFSMKFFLVAITFLLFDLEIALLLPLPWALQTTNLPLMVMSSLLLIIILALSLAYEWLQKGLDWAE', 'Translates correctly');
597 598 599



600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618
# check that attributes are stored when transcript is stored

$tr = $ta->fetch_by_stable_id( "ENST00000217347" );
my $g = $db->get_GeneAdaptor->fetch_by_transcript_id($tr->dbID());


$tr->translation()->adaptor(undef);
$tr->translation()->dbID(undef);

# unstore the transcript so it can be stored again

foreach my $ex (@{$tr->get_all_Exons()}) {
  $ex->dbID(undef);
  $ex->adaptor(undef);
}

$tr->dbID(undef);
$tr->adaptor(undef);

Arne Stabenau's avatar
Arne Stabenau committed
619 620 621 622
{
  # testing transform with gaps in introns
  my $tr = $ta->fetch_by_dbID( 21739 );
  my $mapped_tr = $tr->transform( "alt_chrom" );
623
  is( $tr->spliced_seq(), $mapped_tr->spliced_seq(), 'Transcript seq matches mapped seq' );
Arne Stabenau's avatar
Arne Stabenau committed
624
}
625 626

$multi->hide('core', 'transcript', 'transcript_attrib', 'translation',
627
             'exon_transcript', 'exon', 'meta_coord');
628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645


my $attrib1 = Bio::EnsEMBL::Attribute->new
  ( -code => '_rna_edit',
    -value => "65 64 NNN",
    -name => "RNA editing");

$tr->add_Attributes($attrib1);

my $attrib2 = Bio::EnsEMBL::Attribute->new
  ( -code => '_rna_edit',
    -value => "66 65 NNN",
    -name => "RNA editing");

$tr->add_Attributes($attrib2);

$ta->store($tr, $g->dbID());

646
is(count_rows($db, 'transcript_attrib'), 2, '2 rows for transcript_attrib');
647

648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692

#
# specifically test genomic2cdna given coords either side of an insertion
#

$tr = $ta->fetch_by_stable_id( "ENST00000217347" );

my $tmp_coding_start = $tr->coding_region_start;

$tr->add_Attributes(
  Bio::EnsEMBL::Attribute->new(
    -code => '_rna_edit',
    -value => "68 67 NNN",
    -name => "RNA editing"
  )
);

$tr->edits_enabled(1);

is(substr($tr->translateable_seq, 0, 6), 'ATGNNN', 'insertion mapper test - edit applied');

is_deeply(
  [$tr->get_TranscriptMapper->genomic2cdna($tmp_coding_start, $tmp_coding_start + 4, 1)],
  [
    bless( {
      'coord_system' => undef,
      'strand' => 1,
      'id' => 'cdna',
      'rank' => 0,
      'end' => 67,
      'start' => 65
    }, 'Bio::EnsEMBL::Mapper::Coordinate' ),
    bless( {
      'coord_system' => undef,
      'strand' => 1,
      'id' => 'cdna',
      'rank' => 0,
      'end' => 72,
      'start' => 71
    }, 'Bio::EnsEMBL::Mapper::Coordinate' )
  ],
  'insertion mapper test - coords either side of inserted block'
);


693
$multi->restore('core');
694

695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760
#
# tests for translation start/end within a seq_edit
#
$tr = $ta->fetch_by_stable_id( "ENST00000217347" );
$tr->edits_enabled(1);

$tr->add_Attributes(
  Bio::EnsEMBL::Attribute->new(
    -code => '_rna_edit',
    -value => "1 0 CGTCGATGTTG",
  )
);
is(substr($tr->translateable_seq, 0, 6), 'ATGGCA', 'translation start in a seq_edit - seq before');
is(length($tr->translateable_seq),       804,      'translation start in a seq_edit - length before');

$tr->add_Attributes(
  Bio::EnsEMBL::Attribute->new(
    -code => '_transl_start',
    -value => "6",
  )
);
is(substr($tr->translateable_seq, 0, 6), 'ATGTTG', 'translation start in a seq_edit - seq after');
is(length($tr->translateable_seq),       874,      'translation start in a seq_edit - length after');

$tr->add_Attributes(
  Bio::EnsEMBL::Attribute->new(
    -code => '_rna_edit',
    -value => "869 868 CGTCGTGATTG",
  )
);
is(substr(reverse($tr->translateable_seq), 0, 11), reverse('CGTCGTGATTG'), 'translation end in a seq_edit - seq before');
is(length($tr->translateable_seq),                 885,                    'translation end in a seq_edit - length before');

$tr->add_Attributes(
  Bio::EnsEMBL::Attribute->new(
    -code => '_transl_end',
    -value => "884",
  )
);
is(substr(reverse($tr->translateable_seq), 0, 11), reverse('GAGTATCGTCG'), 'translation end in a seq_edit - seq after');
is(length($tr->translateable_seq),                 879,                    'translation end in a seq_edit - length after');

$multi->restore('core');

$tr = $ta->fetch_by_stable_id( "ENST00000217347" );
$tr->edits_enabled(1);

is(length($tr->translateable_seq), 804, 'explicit translation end with no seq_edit - length before');

$tr->add_Attributes(
  Bio::EnsEMBL::Attribute->new(
    -code => '_transl_end',
    -value => "873",
  )
);
is(length($tr->translateable_seq), 804, 'explicit translation end with no seq_edit - length after');

$tr->add_Attributes(
  Bio::EnsEMBL::Attribute->new(
    -code => '_rna_edit',
    -value => "869 868 CGTCGTGATTG",
  )
);
is(length($tr->translateable_seq), 809, 'explicit translation end with seq_edit - length after');

$multi->restore('core');
761

762
#
763
# tests for multiple versions of transcripts in a database
764
#
765

766
$tr = $ta->fetch_by_stable_id('ENST00000355555');
767
is( $tr->dbID, 21740, 'Fetched transcript by stable id' );
Arne Stabenau's avatar
Arne Stabenau committed
768

769
@transcripts = @{ $ta->fetch_all_versions_by_stable_id('ENST00000355555') };
770
is( scalar(@transcripts), 1, 'Fetched all transcripts by stable_id' );
771 772

$tr = $ta->fetch_by_translation_stable_id('ENSP00000355555');
773
is( $tr->dbID, 21740, 'Fetched transcript by translation stable id' );
774

Matthew Laird's avatar
Matthew Laird committed
775
$tr->stable_id_version('ENSP00000171455.4');
Matthew Laird's avatar
Matthew Laird committed
776
is($tr->stable_id, 'ENSP00000171455', 'Stable id set with stable_id_version');
Matthew Laird's avatar
Matthew Laird committed
777 778 779 780 781 782 783 784
is($tr->version, 4, 'Version set with stable_id_version');
is($tr->stable_id_version, 'ENSP00000171455.4', 'Stable id and version from stable_id_version');

$tr->stable_id_version('ENSP00000171456');
is($tr->stable_id, 'ENSP00000171456', 'Stable id set with stable_id_version');
is($tr->version, undef, 'Version undef from stable_id_version');
is($tr->stable_id_version, 'ENSP00000171456', 'Stable id and no version from stable_id_version');

Matthew Laird's avatar
Matthew Laird committed
785 786 787 788 789 790 791 792 793 794 795 796
$tr = $ta->fetch_by_translation_stable_id('ENSP00000355555.1');
is( $tr->dbID, 21740, 'Fetched transcript by translation stable id with version' );

$tr = $ta->fetch_by_translation_stable_id('ENSP00000355555.1a');
ok( ! defined($tr), 'Fetched transcript by translation stable id with bad version' );

$tr = $ta->fetch_by_translation_stable_id_version('ENSP00000355555', 1);
is( $tr->dbID, 21740, 'Fetched transcript by translation stable id (version) with version' );

$tr = $ta->fetch_by_translation_stable_id_version('ENSP00000355555', '1a');
ok( ! defined($tr), 'Fetched transcript by translation stable id (version) with bad version' );

797
@transcripts = @{ $ta->fetch_all_by_exon_stable_id('ENSE00001109603') };
798 799
is( scalar(@transcripts), 1);
is( $transcripts[0]->dbID, 21740, 'Fetched transcript by exon stable id' );
800 801 802

$g = $db->get_GeneAdaptor->fetch_by_stable_id('ENSG00000355555');
@transcripts = @{ $ta->fetch_all_by_Gene($g) };
803 804
is( scalar(@transcripts), 1);
is( $transcripts[0]->dbID, 21740, 'Fetched transcript by gene' );
805 806 807

my $sl = $sa->fetch_by_region('chromosome', 'MT_NC_001807');
@transcripts = @{ $sl->get_all_Transcripts };
808
is( scalar(@transcripts), 1, 'Fetched all transcripts for region' );
809

Ian Longden's avatar
Ian Longden committed
810
@transcripts = @{ $ta->fetch_all_by_external_name('MAE1_HUMAN') };
811 812
is( scalar(@transcripts), 1);
is( $transcripts[0]->dbID, 21738, 'Fetched transcript by external name' );
813

Ian Longden's avatar
Ian Longden committed
814
$tr = $ta->fetch_by_display_label('MAPRE1');
815
is( $tr->dbID, 21738, 'Fetched transcript by display label' );
816

817 818 819 820 821 822
# store/update

$tr = $ta->fetch_by_stable_id('ENST00000355555');
$g = $db->get_GeneAdaptor->fetch_by_transcript_id($tr->dbID);
$tr->get_all_Exons;

823 824 825
my $tl = $tr->translation;

$multi->hide( "core", "gene", "transcript", "translation", "meta_coord" );
826 827 828 829 830 831 832 833 834 835 836

$tr->version(3);
$tr->dbID(undef);
$tr->adaptor(undef);
$ta->store($tr, $g->dbID);

$tr->version(4);
$tr->is_current(0);
$tr->dbID(undef);
$tr->adaptor(undef);
$ta->store($tr, $g->dbID);
837 838 839 840 841 842 843 844 845

$tr->version(undef);
$tr->is_current(0);
$tr->dbID(undef);
$tr->adaptor(undef);
$tl->version(undef);
$tr->translation($tl);
$ta->store($tr, $g->dbID);

846
$tr = $ta->fetch_by_stable_id('ENST00000355555');
847
is($tr->is_current, 1, 'Transcript is current');   # 148
848 849 850

@transcripts = @{ $ta->fetch_all_versions_by_stable_id('ENST00000355555') };
foreach my $t (@transcripts) {
851 852 853
  if (defined $t->version && $t->version == 4) {
    is($t->is_current, 0, 'Transcript is not current');  # 149
  }
854 855 856 857 858
}

$tr->is_current(0);
$ta->update($tr);
my $t1 = $ta->fetch_by_stable_id('ENST00000355555');
859
ok(!$t1);   # 150
860 861 862 863

$tr->is_current(1);
$ta->update($tr);
$tr = $ta->fetch_by_stable_id('ENST00000355555');
864
is($tr->is_current, 1, 'Transcript is now current');   # 151
865

866 867 868 869 870 871 872 873 874
my $null_versions = 0;
foreach my $t (@transcripts) {
  if (! defined $t->version) {
    ok(! defined $t->translation->version);
    $null_versions++;
  }
}
is ( $null_versions, 1, "Null/undef version stored and retrieved");

875 876
$multi->restore;

877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907
# UTR Tests
{
  my $utr_testing = sub {
    my ($tid, $five_start, $five_end, $three_start, $three_end) = @_;
    my $t;
    if(ref($tid)) {
      $t = $tid;
    }
    else {
      $t = $db->get_TranscriptAdaptor()->fetch_by_dbID($tid);
    }
    my $five_utr = $t->five_prime_utr_Feature();
    my $three_utr = $t->three_prime_utr_Feature();
    is($five_utr->start(), $five_start, 'Checking five prime UTR start');
    is($five_utr->end(), $five_end, 'Checking five prime UTR end');
    is($three_utr->start(), $three_start, 'Checking three prime UTR start');
    is($three_utr->end(), $three_end, 'Checking three prime UTR end');
  };
  my $three_prime_seq_test = sub {
    my ($tid) = @_;
    my $t = $db->get_TranscriptAdaptor()->fetch_by_dbID($tid);
    my $true_seq = $t->three_prime_utr()->seq();
    my $seq = $t->three_prime_utr_Feature()->feature_Slice()->seq();
    is($seq, $true_seq, 'Asserting 3 prime seq as expected; we have a transcript ending in the last Exon');
  };
  note 'Testing +ve strand 5 prime Exon UTR readthrough';
  $utr_testing->(21729, 30653524, 30685637, 30707177, 30709209);
  
  note 'Testing -ve strand 5 prime Exon UTR readthrough';
  $utr_testing->(21726, 30578039, 30583588, 30568364, 30572314);
  $three_prime_seq_test->(21726);
908 909 910

  my $tid = 21729;
  my $t = $db->get_TranscriptAdaptor()->fetch_by_dbID($tid);
911
  my $five_utrs = $t->get_all_five_prime_UTRs();
912 913 914 915
  is(scalar(@$five_utrs), 2, "There are 2 five prime utr features");
  is($five_utrs->[0]->start(), $t->seq_region_start(), "Correct five prime UTR start");
  is($five_utrs->[1]->end(), 30685637, "Correct five prime UTR end");

916
  my $three_utrs = $t->get_all_three_prime_UTRs();
917 918 919 920 921 922
  is(scalar(@$three_utrs), 1, "There is one three prime utr feature");
  is($three_utrs->[0]->start(), 30707177, "Correct three prime UTR start");
  is($three_utrs->[0]->end(), $t->seq_region_end(), "Correct three prime UTR end");

  $tid = 21726;
  $t = $db->get_TranscriptAdaptor()->fetch_by_dbID($tid);
923
  $five_utrs = $t->get_all_five_prime_UTRs();
924 925 926 927
  is(scalar(@$five_utrs), 2, "There are 2 five prime utr features on reverse strand");
  is($five_utrs->[1]->start(), 30578039, "Correct five prime UTR start on reverse strand");
  is($five_utrs->[0]->end(), $t->seq_region_end(), "Correct five prime UTR end on reverse strand");

928
  $three_utrs = $t->get_all_three_prime_UTRs();
929 930 931
  is(scalar(@$three_utrs), 1, "There is one three prime utr feature on reverse strand");
  is($three_utrs->[0]->start(), $t->seq_region_start(), "Correct three prime UTR start on reverse strand");
  is($three_utrs->[0]->end(), 30572314, "Correct three prime UTR end on reverse strand");
932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976
  
  # we have to build some of our own as it's easier to see the coordinates & do the maths
  my $transcript_builder = sub {
    my ($local_slice, $exon_array, $seq_start, $seq_end, $exon_start_idx, $exon_end_idx) = @_;
    my @exons = map {
      Bio::EnsEMBL::Exon->new(-START => $_->[0], -END => $_->[1], -SLICE => $local_slice, -STRAND => $_->[2]);
    } @{$exon_array};
    my $t = Bio::EnsEMBL::Transcript->new(-SLICE => $local_slice, -EXONS => \@exons);
    $t->translation(Bio::EnsEMBL::Translation->new(
      -SEQ_START => $seq_start,
      -SEQ_END => $seq_end,
      -START_EXON => $exons[$exon_start_idx],
      -END_EXON => $exons[$exon_end_idx],
    ));
    return $t;
  };
  
  #Local slice of the whole of Chr20
  my $local_slice = $sa->fetch_by_region('chromosome', "20");
  
  note 'Testing +ve strand 3 prime Exon UTR readthrough';
  my $read_through_pos_three_prime = $transcript_builder->($local_slice, [[1,9,1],[30,39,1],[90,99,1]], 2, 3, 0, 1);
  $utr_testing->($read_through_pos_three_prime, 1, 1, 33, 99);

  note 'Testing -ve strand prime Exon UTR readthrough';
  my $read_through_neg_three_prime = $transcript_builder->($local_slice, [[90,99,-1], [30,39,-1], [1,9,-1]], 2, 3, 0, 1);
  $utr_testing->($read_through_neg_three_prime, 99, 99, 1, 36);

  note 'Testing no UTRs; we expect no features or sequence to be returned';
  my $no_pos_utrs = $transcript_builder->($local_slice, [[1,9,1]], 1, 9, 0, 0);
  ok(! defined $no_pos_utrs->five_prime_utr_Feature(), 'No 5 prime UTR means no feature');
  ok(! $no_pos_utrs->five_prime_utr(), 'No 5 prime UTR means no seq');
  ok(! defined $no_pos_utrs->three_prime_utr_Feature(), 'No 3 prime UTR means no feature');
  ok(! defined $no_pos_utrs->three_prime_utr(), 'No 3 prime UTR means no seq');
  
  note 'Testing no UTRs; we expect no features or sequence to be returned';
  my $no_pos_neg_utrs = $transcript_builder->($local_slice, [[1,9,-1]], 1, 9, 0, 0);
  ok(! defined $no_pos_neg_utrs->five_prime_utr_Feature(), 'No 5 prime UTR means no feature');
  ok(! $no_pos_neg_utrs->five_prime_utr(), 'No 5 prime UTR means no seq');
  ok(! defined $no_pos_neg_utrs->three_prime_utr_Feature(), 'No 3 prime UTR means no feature');
  ok(! defined $no_pos_neg_utrs->three_prime_utr(), 'No 3 prime UTR means no seq');
  
  
}

977 978 979 980 981 982 983 984 985 986 987 988 989
# CDS tests

{

  my $tid = 21726;
  my $t = $db->get_TranscriptAdaptor()->fetch_by_dbID($tid);
  my $cds = $t->get_all_CDS();
  is(scalar(@$cds), scalar(@{$t->get_all_translateable_Exons()}), "There are 2 coding structures");
  is($cds->[1]->start, $t->coding_region_start, "Correct coding start");
  is($cds->[0]->end, $t->coding_region_end, "Correct coding end");

}

990 991 992
SKIP: {
  skip 'No registry support for SQLite yet', 1 if $db->dbc->driver() eq 'SQLite';

993 994 995 996 997 998
  #test the get_species_and_object_type method from the Registry
  my $registry = 'Bio::EnsEMBL::Registry';
  my ( $species, $object_type, $db_type ) = $registry->get_species_and_object_type('ENST00000355555');
  ok( $species eq 'homo_sapiens' && $object_type eq 'Transcript');
}

999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013