transcript.t 38.4 KB
Newer Older
Magali Ruffier's avatar
Magali Ruffier committed
1
# Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Tiago Grego's avatar
Tiago Grego committed
2
# Copyright [2016-2019] EMBL-European Bioinformatics Institute
3 4 5 6 7 8 9 10 11 12 13 14 15
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#      http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

Arne Stabenau's avatar
Arne Stabenau committed
16 17 18
use strict;
use warnings;

19
use Test::More;
20
use Test::Warnings qw(warning);
Arne Stabenau's avatar
Arne Stabenau committed
21

22 23
use Bio::EnsEMBL::Test::MultiTestDB;
use Bio::EnsEMBL::Test::TestUtils;
Arne Stabenau's avatar
Arne Stabenau committed
24
use Bio::EnsEMBL::Transcript;
25 26
use Bio::EnsEMBL::Exon;
use Bio::EnsEMBL::Translation;
Arne Stabenau's avatar
Arne Stabenau committed
27
use Bio::EnsEMBL::Slice;
Ian Longden's avatar
Ian Longden committed
28
use Bio::EnsEMBL::Intron;
Arne Stabenau's avatar
Arne Stabenau committed
29

30
my $multi = Bio::EnsEMBL::Test::MultiTestDB->new();
Arne Stabenau's avatar
Arne Stabenau committed
31 32 33 34

ok( $multi );

my $db = $multi->get_DBAdaptor('core' );
Magali Ruffier's avatar
Magali Ruffier committed
35
my $ontology = Bio::EnsEMBL::Test::MultiTestDB->new('ontology');
36 37
my $odb;
warning { $odb = $ontology->get_DBAdaptor("ontology"); };
Arne Stabenau's avatar
Arne Stabenau committed
38 39 40

my $sa = $db->get_SliceAdaptor();

41
my $slice = $sa->fetch_by_region('chromosome', "20", 30_249_935, 31_254_640 );
Arne Stabenau's avatar
Arne Stabenau committed
42 43 44 45 46 47

my $genes = $slice->get_all_Genes();

my $translates = 1;
my $utr_trans;

48
note( "Checking if all Transcripts translate and transform" );
Arne Stabenau's avatar
Arne Stabenau committed
49 50 51 52 53

for my $gene ( @$genes ) {
  for my $trans ( @{$gene->get_all_Transcripts()} ) {
    if( $trans->translate()->seq() =~ /\*./ ) {
      $translates = 0;
54
      note( $trans->stable_id()." does not translate." );
Arne Stabenau's avatar
Arne Stabenau committed
55 56
      last;
    }
57 58
    if( $trans->coding_region_start() != $trans->start() &&
	$trans->coding_region_end() != $trans->end() ) {
Arne Stabenau's avatar
Arne Stabenau committed
59 60 61 62
      $utr_trans = $trans->stable_id();
    }
  }

63 64
  $gene = $gene->transform( "contig" );
  next if( ! $gene );	
Arne Stabenau's avatar
Arne Stabenau committed
65 66 67
  for my $trans ( @{$gene->get_all_Transcripts()} ) {
    if( $trans->translate()->seq() =~ /\*./ ) {
      $translates = 0;
68
      note( $trans->stable_id()." does not translate." );
Arne Stabenau's avatar
Arne Stabenau committed
69 70 71 72 73
      last;
    }
  }
}

74
note( "utr Transcript is $utr_trans" );
Arne Stabenau's avatar
Arne Stabenau committed
75 76 77

ok( $translates );

78 79 80 81 82 83 84 85 86 87
#
# Try pulling off genes from an NTContig and making sure they still translate.
# This is a fairly good test of the chained coordinate mapping since the
# transcripts are stored in chromosomal coordinates and there is no direct
# mapping path to the supercontig coordinate system.
#
my $supercontig = $sa->fetch_by_region('supercontig', "NT_028392");

my $transcripts = $supercontig->get_all_Transcripts();

88
note( "Checking if all transcripts on NTContig NT_028392 translate and transform" );
89 90 91 92 93

$translates = 1;
for my $trans ( @$transcripts ) {
  if( $trans->translate()->seq() =~ /\*./ ) {
    $translates = 0;
94
    note( $trans->stable_id()." does not translate." );
95 96
    last;
  }
97
  note($trans->stable_id() .  ":" . $trans->translate()->seq() . "\n");
98 99 100 101
}

ok($translates);

Arne Stabenau's avatar
Arne Stabenau committed
102 103 104

my $ta = $db->get_TranscriptAdaptor();

105
note ("Transcript->list_dbIDs");
106 107 108
my $ids = $ta->list_dbIDs();
ok (@{$ids});

109
note ("Transcript->list_stable_ids");
110
my $stable_ids = $ta->list_stable_ids();
111 112
ok (@{$stable_ids});

Arne Stabenau's avatar
Arne Stabenau committed
113
my $tr = $ta->fetch_by_display_label( "DNMT3B" );
114
is($tr->dbID, 21737, 'Fetched correct dbID');
Arne Stabenau's avatar
Arne Stabenau committed
115 116 117


$tr = $ta->fetch_by_stable_id( "ENST00000217347" );
Arne Stabenau's avatar
Arne Stabenau committed
118

119 120
$tr = $tr->transform('contig');

Arne Stabenau's avatar
Arne Stabenau committed
121 122
ok( $tr );

123
note ( "External transcript name: " . $tr->external_name );
124
is ($tr->external_name, "MAPRE1", 'Fetched correct external name');
125

126
note ( "External transcript dbname: " . $tr->external_db );
127
is ( $tr->external_db, 'HUGO' , 'Fetched correct external db');
128

129
note ( "Display_xref_id: " . $tr->display_xref->dbID() );
130
is ( $tr->display_xref->dbID(), 97759, 'Fetched correct display xref id' );
131
ok( test_getter_setter( $tr, "display_xref", 42 ));
132

Arne Stabenau's avatar
Arne Stabenau committed
133
ok( test_getter_setter( $tr, "dbID", 100000 ));
134
ok( test_getter_setter( $tr, "biotype", "NOVEL" ));
135 136 137 138
ok( test_getter_setter( $tr, "created_date", time() ));
ok( test_getter_setter( $tr, "modified_date", time() ));


139
my @date_time = gmtime( $tr->created_date());
140 141
ok( $date_time[3] == 6 && $date_time[4] == 11 && $date_time[5] == 104 );

142
@date_time = gmtime( $tr->modified_date());
143 144 145
ok( $date_time[3] == 6 && $date_time[4] == 11 && $date_time[5] == 104 );


Arne Stabenau's avatar
Arne Stabenau committed
146 147
ok( $tr->translation->isa( "Bio::EnsEMBL::Translation" ));

148
is( $tr->start(), 79874, 'Start is correct' );
Arne Stabenau's avatar
Arne Stabenau committed
149

150
is( $tr->end(), 110306, 'End is correct' );
Arne Stabenau's avatar
Arne Stabenau committed
151

152
is ( substr( $tr->spliced_seq(), 0, 10 ), "ACGAGACGAA", 'Start of spliced seq is correct' ); 
153 154 155
is ( substr( $tr->spliced_seq(1), 0, 10 ), "acgagacgaa", 'Spliced seq with utr lower casing is correct');
is ( length($tr->spliced_seq()), length($tr->spliced_seq(1)), "Spliced seq with or without utr lower casing has the same length");
is ( $tr->spliced_seq(), uc($tr->spliced_seq(1)), "Spliced seq is identical to upper case utr masked spliced seq");
156 157
is ( substr($tr->spliced_seq(1), 61, 6), 'aagATG', 'Start mask boundary on forward stand transcript is correct' );
is ( substr($tr->spliced_seq(1), 865, 6), 'TATtaa', 'End mask boundary on forward stand transcript is correct' );
Arne Stabenau's avatar
Arne Stabenau committed
158

159
is ( substr( $tr->translateable_seq(),0,10 ), "ATGGCAGTGA", 'Start of translateable sequence is correct' );
Arne Stabenau's avatar
Arne Stabenau committed
160

161
is( $tr->coding_region_start(), 85834, 'Correct coding region start' );
Arne Stabenau's avatar
Arne Stabenau committed
162

163
is( $tr->coding_region_end(), 108631, 'Correct coding region end' );
Arne Stabenau's avatar
Arne Stabenau committed
164

Tiago Grego's avatar
Tiago Grego committed
165
is( $tr->feature_so_acc, 'SO:0000673', 'Transcript feature SO acc is correct (transcript)' );
166
is( $tr->feature_so_term, 'transcript', 'Transcript feature SO term is correct (transcript)' );
Tiago Grego's avatar
Tiago Grego committed
167

Arne Stabenau's avatar
Arne Stabenau committed
168
my @pepcoords = $tr->pep2genomic( 10, 20 );
169
is( $pepcoords[0]->start(), 85861, 'Correct translation start' );
Arne Stabenau's avatar
Arne Stabenau committed
170

171 172 173 174 175 176
my $t_start = $tr->start;
my $t_end   = $tr->end;
my $t_strand = $tr->get_all_Exons->[0]->strand;
my $pep_len = ($tr->cdna_coding_end - $tr->cdna_coding_start + 1) / 3;

my $coord_num = 0;
177 178
my $coding_region_start = $tr->coding_region_start;
my $coding_region_end   = $tr->coding_region_end;
179 180
#expect coordinate for each exon with coding sequence 
foreach my $e (@{$tr->get_all_Exons}) {
181
  if($e->end > $coding_region_start && $e->start < $coding_region_end) {
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
    $coord_num++;
  }
}

my @coords = ();
my @gaps = ();
my $coord_len = 0;
foreach my $c ($tr->genomic2pep($t_start, $t_end, $t_strand)) {
  if($c->isa('Bio::EnsEMBL::Mapper::Gap')) {
    push @gaps, $c;
  } else {
    push @coords, $c;
  }
}

197
is(scalar(@coords), $coord_num, 'Number of coords is correct');
198
my ($last_coord) = reverse(@coords); 
199
is($pep_len, $last_coord->end, 'Peptide length matched end coordinate');
200

201 202
note( "start Exon: ".$tr->start_Exon->stable_id() );
note( "end Exon: ".$tr->end_Exon->stable_id() );
Arne Stabenau's avatar
Arne Stabenau committed
203

204
is($tr->cdna_coding_start, 65, 'Correct cdna coding start');
205 206
ok(test_getter_setter($tr, 'cdna_coding_start', 99));

207
is( substr( $tr->five_prime_utr()->seq(), -5, 5), "CGAAG", 'Five prime utr seq is correct' ); 
Arne Stabenau's avatar
Arne Stabenau committed
208

209
is($tr->cdna_coding_end, 868, 'Correct cdna coding end');
210 211
ok(test_getter_setter($tr, 'cdna_coding_end', 102));

212
is( substr( $tr->three_prime_utr()->seq(), -5, 5  ), "TTCAA", 'Three prime utr seq is correct');
Arne Stabenau's avatar
Arne Stabenau committed
213

214
is( scalar( @{$tr->get_all_Exons()} ), 7, 'Transcript has 7 exons' );
215 216 217

$tr->flush_Exons();

218
is( scalar( @{$tr->get_all_Exons()} ), 0, 'No exons left after flushing' );
219

220 221
is ( $tr->appris, 'principal1' , 'APPRIS tag fetched correctly');

222 223 224 225 226 227
# Fetch a fresh tr, check incomplete codon behavior
$tr = $ta->fetch_by_stable_id( "ENST00000300425" );

# By default the incomplete codon should be dropped
is( $tr->translate()->seq() =~ /P$/, 1, "Incomplete codon is not translated");
is( $tr->translate(1)->seq() =~ /PL$/, 1, "Incomplete codon is padded then translated");
228

Alistair Rust's avatar
Alistair Rust committed
229 230 231
# get a fresh tr to check the update method
$tr = $ta->fetch_by_stable_id( "ENST00000217347" );

232
$multi->save('core', 'transcript', 'meta_coord');
233

Alistair Rust's avatar
Alistair Rust committed
234 235 236 237
# the first update should have no effect
$ta->update($tr);

my $up_tr = $ta->fetch_by_stable_id( "ENST00000217347" );
238
is( $up_tr->display_xref->dbID(), 97759, 'Fetched the correct dbID' );
Alistair Rust's avatar
Alistair Rust committed
239

240
my $dbentryAdaptor = $db->get_DBEntryAdaptor();
Alistair Rust's avatar
Alistair Rust committed
241

242
$tr->display_xref($dbentryAdaptor->fetch_by_dbID( 614 ));
Alistair Rust's avatar
Alistair Rust committed
243 244 245
$ta->update($tr);

$up_tr = $ta->fetch_by_stable_id( "ENST00000217347" );
246
is ( $up_tr->display_xref->dbID(), 614, 'Fetched the correct display xref id');
Alistair Rust's avatar
Alistair Rust committed
247

248
$multi->restore('core', 'transcript', 'meta_coord');
249

250 251 252 253 254 255 256 257 258 259 260 261
#
# Test spliced_seq on a reverse strand transcript
#

$tr = $ta->fetch_by_stable_id( "ENST00000246229" );

is ( substr( $tr->spliced_seq(), 0, 10 ), "ATGGCCCGAC", 'Start of spliced seq is correct, rev strand' );
is ( substr( $tr->spliced_seq(1), 0, 10 ), "atggcccgac", 'Spliced seq with utr lower casing is correct, rev strand');
is ( length($tr->spliced_seq()), length($tr->spliced_seq(1)), "Spliced seq with or without utr lower casing has the same length, rev strand");
is ( $tr->spliced_seq(), uc($tr->spliced_seq(1)), "Spliced seq is identical to upper case utr masked spliced seq, rev strand");
is ( substr($tr->spliced_seq(1), 199, 6), 'gccATG', 'Start mask boundary on forward stand transcript is correct, rev strand' );
is ( substr($tr->spliced_seq(1), 1687, 6), 'CAGtag', 'End mask boundary on forward stand transcript is correct, rev strand' );
262 263 264 265


my $interpro = $ta->get_Interpro_by_transid("ENST00000252021");
foreach my $i (@$interpro) {
266
  note($i);
267 268 269 270 271 272 273 274
}
###currently no interpro info in the test db
ok(@$interpro == 1);

#
# test fetch_all_by_external_name
#

Ian Longden's avatar
Ian Longden committed
275
($tr) = @{$ta->fetch_all_by_external_name('PLAGL2')};
276
is($tr->stable_id, 'ENST00000246229', 'Fetched correct transcript by external name');
277

278 279 280 281 282
#
# test fetch_by_rnaproduct_id
#

$tr = $ta->fetch_by_rnaproduct_id(1);
283
is($tr->stable_id, 'ENST00000278995', 'Fetched correct transcript by rnaproduct id');
284

285 286 287 288 289 290
#
# test fetch_by_translation_id
#

$tr = $ta->fetch_by_translation_id(21734);

291
is($tr->stable_id, 'ENST00000201961', 'Fetched correct transcript by translation id');
292

293
is($tr->display_id(), $tr->stable_id(), 'Transcript stable id and display id are identical');
294

295 296 297
#
# test TranscriptAdaptor::fetch_all_by_biotype
#
298
note("Test fetch_all_by_biotype");
299
my @transcripts = @{$ta->fetch_all_by_biotype('protein_coding')};
300
is(@transcripts, 27, 'Fetching all protein coding transcript');
301
my $transcriptCount = $ta->count_all_by_biotype('protein_coding');
302
is($transcriptCount, 27, 'Counting all protein coding');
303
@transcripts = @{$ta->fetch_all_by_biotype(['protein_coding','pseudogene'])};
304
is(@transcripts, 27, 'Got 27 transcript');
305
$transcriptCount = $ta->count_all_by_biotype(['protein_coding', 'pseudogene']);
306
is($transcriptCount, 27, 'Count by biotype is correct');
307

308 309 310 311 312 313 314 315
#
# test TranscriptAdaptor::fetch_all_by_Slice
#
note("Test fetch_all_by_Slice");
@transcripts = @{$ta->fetch_all_by_Slice($slice)};
$transcriptCount = $ta->count_all_by_Slice($slice);
is(@transcripts, $transcriptCount, "Counted as many transcripts as were fetched from slice");

316 317 318 319
#
# test TranscriptAdaptor::fetch_all_by_source
#
note("Test fetch_all_by_source");
320
@transcripts = @{$ta->fetch_all_by_source('ensembl')};
321
note "Got ".scalar(@transcripts)." ensembl transcripts\n";
322
is(24, scalar(@transcripts));
323
$transcriptCount = $ta->count_all_by_source('ensembl');
324
is(24, $transcriptCount);
325 326
@transcripts = @{$ta->fetch_all_by_source(['havana','vega'])};
note "Got ".scalar(@transcripts)." (havana, vega) transcripts\n";
327
is(3, scalar(@transcripts));
328
$transcriptCount = $ta->count_all_by_source(['havana', 'vega']);
329
is(3, $transcriptCount);
330

Magali Ruffier's avatar
Magali Ruffier committed
331 332 333 334 335
#
# test TranscriptAdaptor::fetch_all
#
note("Test fetch_all");
@transcripts = @{ $ta->fetch_all() };
336
is(27, scalar(@transcripts), "Got 27 transcripts");
Magali Ruffier's avatar
Magali Ruffier committed
337 338 339 340 341

#
# test TranscriptAdaptor::fetch_all_by_GOTerm
#
note("Test fetch_all_by_GOTerm");
342 343
my $go_adaptor;
warning { $go_adaptor = $odb->get_OntologyTermAdaptor(); };
Magali Ruffier's avatar
Magali Ruffier committed
344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
my $go_term = $go_adaptor->fetch_by_accession('GO:0070363');
@transcripts = @{ $ta->fetch_all_by_GOTerm($go_term) };
is(scalar(@transcripts), 0, "Found 0 genes with fetch_all_by_GOTerm");


#
# test TranscriptAdaptor::fetch_all_by_exon_supporting_evidence
#
note("Test fetch_all_by_exon_supporting_evidence");
@transcripts = @{ $ta->fetch_all_by_exon_supporting_evidence('BRCA2', 'dna_align_feature') };
is(scalar(@transcripts), 0, "No transcripts with BRCA2 daf");
@transcripts = @{ $ta->fetch_all_by_exon_supporting_evidence('AK015740.1', 'dna_align_feature') };
is(scalar(@transcripts), 1, "1 transcript with AK015740.1 daf");
@transcripts = @{ $ta->fetch_all_by_exon_supporting_evidence('Q9NUG5', 'protein_align_feature') };
is(scalar(@transcripts), 1, "1 transcript with Q9NUG5 paf");

#
# test TranscriptAdaptor::fetch_all_by_transcript_supporting_evidence
#
note("Test fetch_all_by_transcript_supporting_evidence");
@transcripts = @{ $ta->fetch_all_by_transcript_supporting_evidence('BRCA2', 'dna_align_feature') };
is(scalar(@transcripts), 0, "No transcripts with BRCA2 daf");
@transcripts = @{ $ta->fetch_all_by_transcript_supporting_evidence('AK015740.1', 'dna_align_feature') };
is(scalar(@transcripts), 0, "0 transcripts with AK015740.1 daf");
@transcripts = @{ $ta->fetch_all_by_transcript_supporting_evidence('Q9NUG5', 'protein_align_feature') };
is(scalar(@transcripts), 0, "No transcripts with Q9NUG5 paf");


Ian Longden's avatar
Ian Longden committed
372 373 374 375 376 377 378 379 380 381 382 383 384
#
# Test get_all_Introns by joining Exons and introns
# and comparing it to the original
#

foreach my $stable_id (qw(ENST00000201961 ENST00000217347)){ #test both strands
#foreach my $stable_id (qw(ENST00000217347)){ #test both strands

  my $transcript_adaptor = $db->get_TranscriptAdaptor();
  my $transcript = 
    $transcript_adaptor->fetch_by_stable_id($stable_id);


385 386
  my @exons = (@{$transcript->get_all_Exons()});
  my @introns = (@{$transcript->get_all_Introns()});
Ian Longden's avatar
Ian Longden committed
387

388 389 390
  my @cds = (@{$transcript->get_all_CDS()});
  my @cds_introns = (@{$transcript->get_all_CDS_Introns()});

Ian Longden's avatar
Ian Longden committed
391 392 393 394 395
  my $orig_seq = $transcript->slice->subseq(
					    $transcript->start(),
					    $transcript->end(), 
					    $transcript->strand());

396 397 398 399 400
  my $cds_orig_seq = $transcript->slice->subseq(
                                            $transcript->coding_region_start(),
                                            $transcript->coding_region_end(),
                                            $transcript->strand());

Ian Longden's avatar
Ian Longden committed
401 402 403 404 405 406
  my $idl=0;
  my $new_seq = $exons[0]->seq()->seq();
  foreach my $intron (@introns){
    $new_seq .= $intron->seq;
    $new_seq .= $exons[$idl+1]->seq->seq();
    $idl++;
407

Ian Longden's avatar
Ian Longden committed
408
  }
409

410
  is($orig_seq, $new_seq, 'Correct new origin seq');
Ian Longden's avatar
Ian Longden committed
411

412 413 414 415 416 417 418 419 420 421 422
  my $cds_idl=0;
  my $new_cds_seq = $cds[0]->seq();
  foreach my $cds_intron (@cds_introns){
    $new_cds_seq .= $cds_intron->seq;
    $new_cds_seq .= $cds[$cds_idl+1]->seq();
    $cds_idl++;

  }

  is($cds_orig_seq, $new_cds_seq, 'Correct new cds origin seq');

Ian Longden's avatar
Ian Longden committed
423 424 425
}


426 427 428 429 430 431 432 433 434 435 436 437 438
#
# regression test:  five_prime_utr and three_prime_utr were failing
# for transcripts that had no UTR. undef should have been returned instead
#
$tr = $ta->fetch_by_stable_id('ENST00000246203');

my $three_prime = $tr->three_prime_utr();

ok(!defined($three_prime));

my $five_prime = $tr->five_prime_utr();

ok(!defined($five_prime));
Arne Stabenau's avatar
Arne Stabenau committed
439

440 441 442 443 444 445 446 447 448 449 450


#
# test removal of transcript
#
my $tl_count = count_rows($db, "translation");
my $ex_tr_count = count_rows($db, "exon_transcript");
my $tr_count = count_rows($db, "transcript");

my $ex_tr_minus = @{$tr->get_all_Exons()};

451 452


Monika Komorowska's avatar
Monika Komorowska committed
453 454 455
$multi->save("core", "transcript", "translation",
             "protein_feature", "exon",
             "exon_transcript", "object_xref",
456
             "supporting_feature", "dna_align_feature","protein_align_feature",
457
             'xref', "ontology_xref", "identity_xref", 'meta_coord');
458 459 460 461 462 463

$ta->remove($tr);

ok(!defined($tr->dbID()));
ok(!defined($tr->adaptor()));

464 465 466
is( count_rows( $db, "transcript"), ($tr_count - 1), 'Row count matches transcript count');
is( count_rows( $db, "translation"), ($tl_count - 1), 'Row count matches translation count');
is( count_rows( $db, "exon_transcript"), ($ex_tr_count - $ex_tr_minus), 'Row count matches exon count');
467

468 469 470 471 472 473
#
# test removal of transcript with supporting changes at the gene level
#

$tr = $ta->fetch_by_stable_id('ENST00000278995');
my $gene = $tr->get_Gene;
474
print $gene->stable_id."\n\n";
475 476 477 478 479 480 481 482 483 484 485 486 487 488
note(join "\n",map { $_->stable_id } @{$gene->get_all_Transcripts});

$ta->remove($tr,1);

ok(! grep { $_->stable_id eq 'ENST00000278995'} @{$gene->get_all_Transcripts});

# note(join "\n",map { $_->stable_id } @{$gene->get_all_Transcripts});
# old coords 30274334  30300924, the old gene above is still pointing to old coordinates, but remove cannot find it to change it.
my $new_copy_gene = $ta->fetch_by_stable_id('ENST00000310998')->get_Gene;
cmp_ok($new_copy_gene->start, '==', 30274334, 'Shortened Gene starts as before');
cmp_ok($new_copy_gene->end, '==', 30298904, 'Shortened Gene ends earlier');

$tr = $ta->fetch_by_stable_id('ENST00000278995');
ok(! defined $tr);
489 490 491 492 493 494 495 496 497 498
#
# test _rna_edit for transcripts
#

$tr = $ta->fetch_by_stable_id( "ENST00000217347" );

#
# 5 prime UTR editing
#

499 500
$tr->edits_enabled(1);

501
my $seq1 = $tr->spliced_seq();
502 503
my $tlseq1 = $tr->translateable_seq();

504 505 506
my $cdna_cds_start1 = $tr->cdna_coding_start();
my $cdna_cds_end1   = $tr->cdna_coding_end();

507
my $attrib = Bio::EnsEMBL::Attribute->new
508 509 510
  (-code => '_rna_edit',
   -value => "1 6 GATTACA",
   -name => "RNA editing");
511 512 513

$tr->add_Attributes( $attrib );

514
my $seq2 = $tr->spliced_seq(1);
515 516 517 518

ok( $seq1 ne $seq2 );
ok( $seq2 =~ /^GATTACA/ );

519 520 521
my $cdna_cds_start2 = $tr->cdna_coding_start();
my $cdna_cds_end2   = $tr->cdna_coding_end();

522 523
is($cdna_cds_start1, $cdna_cds_start2 - 1, 'Both cdna starts match');
is($cdna_cds_end1, $cdna_cds_end2   - 1, 'Both cdna ends match');
524

525 526 527 528 529 530

# insert just at the start of the translation
# makes it longer. (For non phase zero start exons)
# cdna_coding_start for this transcript is 65, 64 is just before that

$attrib = Bio::EnsEMBL::Attribute->new
531 532 533
  ( -code => '_rna_edit',
    -value => "65 64 NNN",
    -name => "RNA editing");
534 535 536 537 538 539 540 541 542

$tr->add_Attributes( $attrib );

my $tlseq2 = $tr->translateable_seq();

ok( $tlseq1 ne $tlseq2 );
ok( $tlseq2 =~ /^NNNATG/ );
ok( $tlseq1 eq substr( $tlseq2,3 ));

543 544
is($cdna_cds_start2, $tr->cdna_coding_start(), 'Cds start matches cdna coding start');
is($cdna_cds_end2  , $tr->cdna_coding_end() - 3, 'Coding end -3 matches cds end');
545 546 547 548

# test that the edits can be disabled
$tr->edits_enabled(0);

549 550
is($tr->cdna_coding_start(), $cdna_cds_start1, 'Cds start matches cdna coding start');
is($tr->cdna_coding_end()  , $cdna_cds_end1, 'Cds end matches cdna coding end');
551

552 553
is($tr->spliced_seq(), $seq1, 'Spliced sequence is correct');
is($tr->translateable_seq(), $tlseq1, 'Translateable sequence is correct');
554

555 556 557 558 559 560 561
#
# try save and retrieve by lazy load
#

$multi->hide( "core", "transcript_attrib" );
my $attribAdaptor = $db->get_AttributeAdaptor();

562
$attribAdaptor->store_on_Transcript($tr->dbID, $tr->get_all_Attributes);
563 564

$tr = $ta->fetch_by_stable_id( "ENST00000217347" );
565
$tr->edits_enabled(1);
566

Glenn Proctor's avatar
Glenn Proctor committed
567 568 569
#print " $tr->translateable_seq() : " .  $tr->translateable_seq() . "\n";
#print "$tr->spliced_seq() : " . $tr->spliced_seq() . "\n";

570
is( $tr->translateable_seq(), $tlseq2, 'Translateable sequence is correct' );
571
ok( $tr->spliced_seq() =~ /^GATTACA/ );
572

573
$multi->restore();
574 575 576 577 578 579 580 581 582 583 584 585 586 587 588

#
# test that the transcript mapper handles edits
#
$tr = $ta->fetch_by_translation_id(21734);
$slice = $tr->feature_Slice();
$tr = $tr->transfer($slice);
test_trans_mapper_edits($tr);

# test again with reverse strand
$tr = $ta->fetch_by_translation_id(21734);
$slice->invert();
$tr = $tr->transfer($slice);
test_trans_mapper_edits($tr);

589 590 591 592 593 594 595


# test that mitochondrial transcripts can be corrected translated with
# an alternate codon table

$tr = $ta->fetch_by_stable_id('ENST00000355555');

596
note($tr->translate->seq());
597

598
is($tr->translate->seq(), 'MNFALILMINTLLALLLMIITFWLPQLNGYMEKSTPYECGFDPMSPARVPFSMKFFLVAITFLLFDLEIALLLPLPWALQTTNLPLMVMSSLLLIIILALSLAYEWLQKGLDWAE', 'Translates correctly');
599 600 601



602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620
# check that attributes are stored when transcript is stored

$tr = $ta->fetch_by_stable_id( "ENST00000217347" );
my $g = $db->get_GeneAdaptor->fetch_by_transcript_id($tr->dbID());


$tr->translation()->adaptor(undef);
$tr->translation()->dbID(undef);

# unstore the transcript so it can be stored again

foreach my $ex (@{$tr->get_all_Exons()}) {
  $ex->dbID(undef);
  $ex->adaptor(undef);
}

$tr->dbID(undef);
$tr->adaptor(undef);

Arne Stabenau's avatar
Arne Stabenau committed
621 622 623 624
{
  # testing transform with gaps in introns
  my $tr = $ta->fetch_by_dbID( 21739 );
  my $mapped_tr = $tr->transform( "alt_chrom" );
625
  is( $tr->spliced_seq(), $mapped_tr->spliced_seq(), 'Transcript seq matches mapped seq' );
Arne Stabenau's avatar
Arne Stabenau committed
626
}
627 628

$multi->hide('core', 'transcript', 'transcript_attrib', 'translation',
629
             'exon_transcript', 'exon', 'meta_coord');
630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647


my $attrib1 = Bio::EnsEMBL::Attribute->new
  ( -code => '_rna_edit',
    -value => "65 64 NNN",
    -name => "RNA editing");

$tr->add_Attributes($attrib1);

my $attrib2 = Bio::EnsEMBL::Attribute->new
  ( -code => '_rna_edit',
    -value => "66 65 NNN",
    -name => "RNA editing");

$tr->add_Attributes($attrib2);

$ta->store($tr, $g->dbID());

648
is(count_rows($db, 'transcript_attrib'), 2, '2 rows for transcript_attrib');
649

650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694

#
# specifically test genomic2cdna given coords either side of an insertion
#

$tr = $ta->fetch_by_stable_id( "ENST00000217347" );

my $tmp_coding_start = $tr->coding_region_start;

$tr->add_Attributes(
  Bio::EnsEMBL::Attribute->new(
    -code => '_rna_edit',
    -value => "68 67 NNN",
    -name => "RNA editing"
  )
);

$tr->edits_enabled(1);

is(substr($tr->translateable_seq, 0, 6), 'ATGNNN', 'insertion mapper test - edit applied');

is_deeply(
  [$tr->get_TranscriptMapper->genomic2cdna($tmp_coding_start, $tmp_coding_start + 4, 1)],
  [
    bless( {
      'coord_system' => undef,
      'strand' => 1,
      'id' => 'cdna',
      'rank' => 0,
      'end' => 67,
      'start' => 65
    }, 'Bio::EnsEMBL::Mapper::Coordinate' ),
    bless( {
      'coord_system' => undef,
      'strand' => 1,
      'id' => 'cdna',
      'rank' => 0,
      'end' => 72,
      'start' => 71
    }, 'Bio::EnsEMBL::Mapper::Coordinate' )
  ],
  'insertion mapper test - coords either side of inserted block'
);


695
$multi->restore('core');
696

697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
#
# tests for translation start/end within a seq_edit
#
$tr = $ta->fetch_by_stable_id( "ENST00000217347" );
$tr->edits_enabled(1);

$tr->add_Attributes(
  Bio::EnsEMBL::Attribute->new(
    -code => '_rna_edit',
    -value => "1 0 CGTCGATGTTG",
  )
);
is(substr($tr->translateable_seq, 0, 6), 'ATGGCA', 'translation start in a seq_edit - seq before');
is(length($tr->translateable_seq),       804,      'translation start in a seq_edit - length before');

$tr->add_Attributes(
  Bio::EnsEMBL::Attribute->new(
    -code => '_transl_start',
    -value => "6",
  )
);
is(substr($tr->translateable_seq, 0, 6), 'ATGTTG', 'translation start in a seq_edit - seq after');
is(length($tr->translateable_seq),       874,      'translation start in a seq_edit - length after');

$tr->add_Attributes(
  Bio::EnsEMBL::Attribute->new(
    -code => '_rna_edit',
    -value => "869 868 CGTCGTGATTG",
  )
);
is(substr(reverse($tr->translateable_seq), 0, 11), reverse('CGTCGTGATTG'), 'translation end in a seq_edit - seq before');
is(length($tr->translateable_seq),                 885,                    'translation end in a seq_edit - length before');

$tr->add_Attributes(
  Bio::EnsEMBL::Attribute->new(
    -code => '_transl_end',
    -value => "884",
  )
);
is(substr(reverse($tr->translateable_seq), 0, 11), reverse('GAGTATCGTCG'), 'translation end in a seq_edit - seq after');
is(length($tr->translateable_seq),                 879,                    'translation end in a seq_edit - length after');

$multi->restore('core');

$tr = $ta->fetch_by_stable_id( "ENST00000217347" );
$tr->edits_enabled(1);

is(length($tr->translateable_seq), 804, 'explicit translation end with no seq_edit - length before');

$tr->add_Attributes(
  Bio::EnsEMBL::Attribute->new(
    -code => '_transl_end',
    -value => "873",
  )
);
is(length($tr->translateable_seq), 804, 'explicit translation end with no seq_edit - length after');

$tr->add_Attributes(
  Bio::EnsEMBL::Attribute->new(
    -code => '_rna_edit',
    -value => "869 868 CGTCGTGATTG",
  )
);
is(length($tr->translateable_seq), 809, 'explicit translation end with seq_edit - length after');

$multi->restore('core');
763

764
#
765
# tests for multiple versions of transcripts in a database
766
#
767

768
$tr = $ta->fetch_by_stable_id('ENST00000355555');
769
is( $tr->dbID, 21740, 'Fetched transcript by stable id' );
Arne Stabenau's avatar
Arne Stabenau committed
770

771
@transcripts = @{ $ta->fetch_all_versions_by_stable_id('ENST00000355555') };
772
is( scalar(@transcripts), 1, 'Fetched all transcripts by stable_id' );
773 774

$tr = $ta->fetch_by_translation_stable_id('ENSP00000355555');
775
is( $tr->dbID, 21740, 'Fetched transcript by translation stable id' );
776

Matthew Laird's avatar
Matthew Laird committed
777
$tr->stable_id_version('ENSP00000171455.4');
Matthew Laird's avatar
Matthew Laird committed
778
is($tr->stable_id, 'ENSP00000171455', 'Stable id set with stable_id_version');
Matthew Laird's avatar
Matthew Laird committed
779 780 781 782 783 784 785 786
is($tr->version, 4, 'Version set with stable_id_version');
is($tr->stable_id_version, 'ENSP00000171455.4', 'Stable id and version from stable_id_version');

$tr->stable_id_version('ENSP00000171456');
is($tr->stable_id, 'ENSP00000171456', 'Stable id set with stable_id_version');
is($tr->version, undef, 'Version undef from stable_id_version');
is($tr->stable_id_version, 'ENSP00000171456', 'Stable id and no version from stable_id_version');

Matthew Laird's avatar
Matthew Laird committed
787 788 789 790 791 792 793 794 795 796 797 798
$tr = $ta->fetch_by_translation_stable_id('ENSP00000355555.1');
is( $tr->dbID, 21740, 'Fetched transcript by translation stable id with version' );

$tr = $ta->fetch_by_translation_stable_id('ENSP00000355555.1a');
ok( ! defined($tr), 'Fetched transcript by translation stable id with bad version' );

$tr = $ta->fetch_by_translation_stable_id_version('ENSP00000355555', 1);
is( $tr->dbID, 21740, 'Fetched transcript by translation stable id (version) with version' );

$tr = $ta->fetch_by_translation_stable_id_version('ENSP00000355555', '1a');
ok( ! defined($tr), 'Fetched transcript by translation stable id (version) with bad version' );

799
@transcripts = @{ $ta->fetch_all_by_exon_stable_id('ENSE00001109603') };
800 801
is( scalar(@transcripts), 1);
is( $transcripts[0]->dbID, 21740, 'Fetched transcript by exon stable id' );
802 803 804

$g = $db->get_GeneAdaptor->fetch_by_stable_id('ENSG00000355555');
@transcripts = @{ $ta->fetch_all_by_Gene($g) };
805 806
is( scalar(@transcripts), 1);
is( $transcripts[0]->dbID, 21740, 'Fetched transcript by gene' );
807 808 809

my $sl = $sa->fetch_by_region('chromosome', 'MT_NC_001807');
@transcripts = @{ $sl->get_all_Transcripts };
810
is( scalar(@transcripts), 1, 'Fetched all transcripts for region' );
811

Ian Longden's avatar
Ian Longden committed
812
@transcripts = @{ $ta->fetch_all_by_external_name('MAE1_HUMAN') };
813 814
is( scalar(@transcripts), 1);
is( $transcripts[0]->dbID, 21738, 'Fetched transcript by external name' );
815

Ian Longden's avatar
Ian Longden committed
816
$tr = $ta->fetch_by_display_label('MAPRE1');
817
is( $tr->dbID, 21738, 'Fetched transcript by display label' );
818

819 820 821 822 823 824
# store/update

$tr = $ta->fetch_by_stable_id('ENST00000355555');
$g = $db->get_GeneAdaptor->fetch_by_transcript_id($tr->dbID);
$tr->get_all_Exons;

825 826 827
my $tl = $tr->translation;

$multi->hide( "core", "gene", "transcript", "translation", "meta_coord" );
828 829 830 831 832 833 834 835 836 837 838

$tr->version(3);
$tr->dbID(undef);
$tr->adaptor(undef);
$ta->store($tr, $g->dbID);

$tr->version(4);
$tr->is_current(0);
$tr->dbID(undef);
$tr->adaptor(undef);
$ta->store($tr, $g->dbID);
839 840 841 842 843 844 845 846 847

$tr->version(undef);
$tr->is_current(0);
$tr->dbID(undef);
$tr->adaptor(undef);
$tl->version(undef);
$tr->translation($tl);
$ta->store($tr, $g->dbID);

848
$tr = $ta->fetch_by_stable_id('ENST00000355555');
849
is($tr->is_current, 1, 'Transcript is current');   # 148
850 851 852

@transcripts = @{ $ta->fetch_all_versions_by_stable_id('ENST00000355555') };
foreach my $t (@transcripts) {
853 854 855
  if (defined $t->version && $t->version == 4) {
    is($t->is_current, 0, 'Transcript is not current');  # 149
  }
856 857 858 859 860
}

$tr->is_current(0);
$ta->update($tr);
my $t1 = $ta->fetch_by_stable_id('ENST00000355555');
861
ok(!$t1);   # 150
862 863 864 865

$tr->is_current(1);
$ta->update($tr);
$tr = $ta->fetch_by_stable_id('ENST00000355555');
866
is($tr->is_current, 1, 'Transcript is now current');   # 151
867

868 869 870 871 872 873 874 875 876
my $null_versions = 0;
foreach my $t (@transcripts) {
  if (! defined $t->version) {
    ok(! defined $t->translation->version);
    $null_versions++;
  }
}
is ( $null_versions, 1, "Null/undef version stored and retrieved");

877 878
$multi->restore;

879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909
# UTR Tests
{
  my $utr_testing = sub {
    my ($tid, $five_start, $five_end, $three_start, $three_end) = @_;
    my $t;
    if(ref($tid)) {
      $t = $tid;
    }
    else {
      $t = $db->get_TranscriptAdaptor()->fetch_by_dbID($tid);
    }
    my $five_utr = $t->five_prime_utr_Feature();
    my $three_utr = $t->three_prime_utr_Feature();
    is($five_utr->start(), $five_start, 'Checking five prime UTR start');
    is($five_utr->end(), $five_end, 'Checking five prime UTR end');
    is($three_utr->start(), $three_start, 'Checking three prime UTR start');
    is($three_utr->end(), $three_end, 'Checking three prime UTR end');
  };
  my $three_prime_seq_test = sub {
    my ($tid) = @_;
    my $t = $db->get_TranscriptAdaptor()->fetch_by_dbID($tid);
    my $true_seq = $t->three_prime_utr()->seq();
    my $seq = $t->three_prime_utr_Feature()->feature_Slice()->seq();
    is($seq, $true_seq, 'Asserting 3 prime seq as expected; we have a transcript ending in the last Exon');
  };
  note 'Testing +ve strand 5 prime Exon UTR readthrough';
  $utr_testing->(21729, 30653524, 30685637, 30707177, 30709209);
  
  note 'Testing -ve strand 5 prime Exon UTR readthrough';
  $utr_testing->(21726, 30578039, 30583588, 30568364, 30572314);
  $three_prime_seq_test->(21726);
910 911 912

  my $tid = 21729;
  my $t = $db->get_TranscriptAdaptor()->fetch_by_dbID($tid);
913
  my $five_utrs = $t->get_all_five_prime_UTRs();
914 915 916 917
  is(scalar(@$five_utrs), 2, "There are 2 five prime utr features");
  is($five_utrs->[0]->start(), $t->seq_region_start(), "Correct five prime UTR start");
  is($five_utrs->[1]->end(), 30685637, "Correct five prime UTR end");

918
  my $three_utrs = $t->get_all_three_prime_UTRs();
919 920 921 922 923 924
  is(scalar(@$three_utrs), 1, "There is one three prime utr feature");
  is($three_utrs->[0]->start(), 30707177, "Correct three prime UTR start");
  is($three_utrs->[0]->end(), $t->seq_region_end(), "Correct three prime UTR end");

  $tid = 21726;
  $t = $db->get_TranscriptAdaptor()->fetch_by_dbID($tid);
925
  $five_utrs = $t->get_all_five_prime_UTRs();
926 927 928 929
  is(scalar(@$five_utrs), 2, "There are 2 five prime utr features on reverse strand");
  is($five_utrs->[1]->start(), 30578039, "Correct five prime UTR start on reverse strand");
  is($five_utrs->[0]->end(), $t->seq_region_end(), "Correct five prime UTR end on reverse strand");

930
  $three_utrs = $t->get_all_three_prime_UTRs();
931 932 933
  is(scalar(@$three_utrs), 1, "There is one three prime utr feature on reverse strand");
  is($three_utrs->[0]->start(), $t->seq_region_start(), "Correct three prime UTR start on reverse strand");
  is($three_utrs->[0]->end(), 30572314, "Correct three prime UTR end on reverse strand");
934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978
  
  # we have to build some of our own as it's easier to see the coordinates & do the maths
  my $transcript_builder = sub {
    my ($local_slice, $exon_array, $seq_start, $seq_end, $exon_start_idx, $exon_end_idx) = @_;
    my @exons = map {
      Bio::EnsEMBL::Exon->new(-START => $_->[0], -END => $_->[1], -SLICE => $local_slice, -STRAND => $_->[2]);
    } @{$exon_array};
    my $t = Bio::EnsEMBL::Transcript->new(-SLICE => $local_slice, -EXONS => \@exons);
    $t->translation(Bio::EnsEMBL::Translation->new(
      -SEQ_START => $seq_start,
      -SEQ_END => $seq_end,
      -START_EXON => $exons[$exon_start_idx],
      -END_EXON => $exons[$exon_end_idx],
    ));
    return $t;
  };
  
  #Local slice of the whole of Chr20
  my $local_slice = $sa->fetch_by_region('chromosome', "20");
  
  note 'Testing +ve strand 3 prime Exon UTR readthrough';
  my $read_through_pos_three_prime = $transcript_builder->($local_slice, [[1,9,1],[30,39,1],[90,99,1]], 2, 3, 0, 1);
  $utr_testing->($read_through_pos_three_prime, 1, 1, 33, 99);

  note 'Testing -ve strand prime Exon UTR readthrough';
  my $read_through_neg_three_prime = $transcript_builder->($local_slice, [[90,99,-1], [30,39,-1], [1,9,-1]], 2, 3, 0, 1);
  $utr_testing->($read_through_neg_three_prime, 99, 99, 1, 36);

  note 'Testing no UTRs; we expect no features or sequence to be returned';
  my $no_pos_utrs = $transcript_builder->($local_slice, [[1,9,1]], 1, 9, 0, 0);
  ok(! defined $no_pos_utrs->five_prime_utr_Feature(), 'No 5 prime UTR means no feature');
  ok(! $no_pos_utrs->five_prime_utr(), 'No 5 prime UTR means no seq');
  ok(! defined $no_pos_utrs->three_prime_utr_Feature(), 'No 3 prime UTR means no feature');
  ok(! defined $no_pos_utrs->three_prime_utr(), 'No 3 prime UTR means no seq');
  
  note 'Testing no UTRs; we expect no features or sequence to be returned';
  my $no_pos_neg_utrs = $transcript_builder->($local_slice, [[1,9,-1]], 1, 9, 0, 0);
  ok(! defined $no_pos_neg_utrs->five_prime_utr_Feature(), 'No 5 prime UTR means no feature');
  ok(! $no_pos_neg_utrs->five_prime_utr(), 'No 5 prime UTR means no seq');
  ok(! defined $no_pos_neg_utrs->three_prime_utr_Feature(), 'No 3 prime UTR means no feature');
  ok(! defined $no_pos_neg_utrs->three_prime_utr(), 'No 3 prime UTR means no seq');
  
  
}

979 980 981 982 983 984 985 986 987 988 989 990 991
# CDS tests

{

  my $tid = 21726;
  my $t = $db->get_TranscriptAdaptor()->fetch_by_dbID($tid);
  my $cds = $t->get_all_CDS();
  is(scalar(@$cds), scalar(@{$t->get_all_translateable_Exons()}), "There are 2 coding structures");
  is($cds->[1]->start, $t->coding_region_start, "Correct coding start");
  is($cds->[0]->end, $t->coding_region_end, "Correct coding end");

}

992 993 994
SKIP: {
  skip 'No registry support for SQLite yet', 1 if $db->dbc->driver() eq 'SQLite';

995 996 997 998 999 1000
  #test the get_species_and_object_type method from the Registry
  my $registry = 'Bio::EnsEMBL::Registry';
  my ( $species, $object_type, $db_type ) = $registry->get_species_and_object_type('ENST00000355555');
  ok( $species eq 'homo_sapiens' && $object_type eq 'Transcript');
}

1001 1002 1003 1004 1005 1006