Newer
Older
# Copyright [1999-2013] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Graham McVicker
committed
use strict;
use warnings;
Ewan Birney
committed
use Test::More;
Ewan Birney
committed
use Bio::EnsEMBL::Test::TestUtils;
use Bio::EnsEMBL::Test::MultiTestDB;
Graham McVicker
committed
use Bio::EnsEMBL::Slice;
Andy Yates
committed
use Test::Exception;
Ewan Birney
committed
our $verbose = 0;
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
Ewan Birney
committed
Graham McVicker
committed
my $CHR = '20';
Graham McVicker
committed
my $END = 31_200_000;
my $STRAND = 1;
my $SEQ_REGION_LENGTH = 50e6;
Ewan Birney
committed
my $multi_db = Bio::EnsEMBL::Test::MultiTestDB->new;
Graham McVicker
committed
my $db = $multi_db->get_DBAdaptor('core');
Ewan Birney
committed
Graham McVicker
committed
#
# TEST - Slice creation from adaptor
Graham McVicker
committed
#
my $slice_adaptor = $db->get_SliceAdaptor;
my $csa = $db->get_CoordSystemAdaptor();
Ewan Birney
committed
my $slice = $slice_adaptor->fetch_by_region('chromosome', $CHR, $START, $END);
ok($slice->seq_region_name eq $CHR);
ok($slice->start == $START);
ok($slice->end == $END);
ok($slice->seq_region_length == 62842997);
ok($slice->adaptor == $slice_adaptor);
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
my $coord_system = $csa->fetch_by_name('chromosome');
Ewan Birney
committed
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
my $test_seq = 'ATGCATGCATGCATGCATGCATGC';
my $test_slice = new Bio::EnsEMBL::Slice
(-seq_region_name => 'misc',
-seq_region_length => 24,
-start => 1,
-end => 24,
-strand => 1,
-coord_system => $coord_system,
-seq => $test_seq,
);
ok($test_slice->length == 24);
my $hash = $test_slice->get_base_count;
my $a = $hash->{'a'};
my $c = $hash->{'c'};
my $t = $hash->{'t'};
my $g = $hash->{'g'};
my $n = $hash->{'n'};
my $gc_content = $hash->{'%gc'};
ok($a == 6
&& $c == 6
&& $t == 6
&& $g == 6
&& $n == 0
&& $gc_content == 50
&& $a+$c+$t+$g+$n == $test_slice->length);
#
# test that subseq works correctly with attached sequence
#
my $subseq = $test_slice->subseq(2, 6);
debug("subseq = $subseq");
ok($subseq eq 'TGCAT');
$subseq = $test_slice->subseq(2,6,-1);
ok($subseq eq 'ATGCA');
debug("subseq = $subseq");
# test that subslice works correctly with attached sequence
my $sub_slice = $test_slice->sub_Slice(2, 6);
ok($sub_slice->seq eq 'TGCAT');
# test that invert works correctly with attached sequence
ok($sub_slice->invert()->seq() eq 'ATGCA');
# test that slice can be created without db, seq or coord system
{
my $warnings = q{};
my $new_stderr = IO::String->new(\$warnings);
my $oldfh = select(STDERR);
local *STDERR = $new_stderr;
$test_slice = Bio::EnsEMBL::Slice->new('-seq_region_name' => 'test',
'-start' => 1,
'-end' => 3);
my $check = qr/MSG: Slice without coordinate system/;
like($warnings, $check, 'Checking we are still warning about lack of coordinate system');
}
ok($test_slice);
ok($test_slice->seq() eq 'NNN');
debug("\$test_slice->name = " . $test_slice->name());
ok($test_slice->name() eq '::test:1:3:1');
$slice = new Bio::EnsEMBL::Slice
(-seq_region_name => $CHR,
-seq_region_length => $SEQ_REGION_LENGTH,
-start => $START,
-end => $END,
-strand => $STRAND,
-coord_system => $coord_system);
Ewan Birney
committed
ok($slice->seq_region_name eq $CHR);
ok($slice->start == $START);
ok($slice->end == $END);
Graham McVicker
committed
ok($slice->strand == $STRAND);
ok($slice->seq_region_length == $SEQ_REGION_LENGTH);
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
$slice->adaptor($slice_adaptor);
ok($slice->adaptor == $slice_adaptor);
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
#verify that chr_name start and end are contained in the name
my $name = $slice->name;
ok($name eq "chromosome:NCBI33:$CHR:$START:$END:$STRAND");
#
# Test Slice::length
#
ok($slice->length == ($END-$START + 1));
Ewan Birney
committed
# Test exception name
is($slice->assembly_exception_type(), 'REF', 'Type of region is REF');
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
Ewan Birney
committed
my $clone = $slice_adaptor->fetch_by_region('clone','AL121583.25');
my @attrib = @{$clone->get_all_Attributes('htg_phase')};
ok(@attrib == 1 && $attrib[0]->value() == 4);
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
my $len = $clone->length();
$clone = $clone->expand(100,100);
ok(($clone->start == -99) && ($clone->end() == $len+100));
$clone = $clone->expand(-100,-100);
ok(($clone->start == 1) && ($clone->end() == $len));
$clone = $clone->expand(0,1000);
ok(($clone->start == 1) && ($clone->end() == $len + 1000));
$clone = $clone->expand(-1000, 0);
ok(($clone->start == 1001) && ($clone->end() == $len + 1000));
Ewan Birney
committed
#
# Test constrain_to_seq_region
#
my $tidy_clone = $clone->expand(1000000,10000000);
$tidy_clone = $tidy_clone->constrain_to_seq_region;
ok($tidy_clone->start == 1 && $tidy_clone->end == 84710, 'Huge expand call truncates nicely');
$tidy_clone = $clone->expand(0,-1000);
$tidy_clone = $tidy_clone->constrain_to_seq_region;
note($tidy_clone->start."-".$tidy_clone->end());
ok(($tidy_clone->start == 1001) && ($tidy_clone->end() == 84710), 'constrain_to_seq_region does no harm');
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
my $inverted_slice = $slice->invert;
ok($slice != $inverted_slice); #slice is not same object as inverted slice
#inverted slice on opposite strand
ok($slice->strand == ($inverted_slice->strand * -1));
#slice still on same strand
ok($slice->strand == $STRAND);
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
my $seq = uc $slice->seq;
my $invert_seq = uc $slice->invert->seq;
Ewan Birney
committed
Graham McVicker
committed
ok(length($seq) == $slice->length); #sequence is correct length
$seq = reverse $seq; #reverse complement seq
$seq =~ tr/ACTG/TGAC/;
Graham McVicker
committed
ok($seq eq $invert_seq); #revcom same as seq on inverted slice
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
my $SPAN = 10;
my $sub_seq = uc $slice->subseq(-$SPAN,$SPAN);
my $invert_sub_seq = uc $slice->invert->subseq( $slice->length - $SPAN + 1,
$slice->length + $SPAN + 1);
Graham McVicker
committed
ok(length $sub_seq == (2*$SPAN) + 1 );
$sub_seq = reverse $sub_seq;
$sub_seq =~ tr/ACTG/TGAC/;
ok($sub_seq eq $invert_sub_seq);
#
# Test Slice::get_all_PredictionTranscripts
#
my $pts = $slice->get_all_PredictionTranscripts;
#
# Test Slice::get_seq_region_id
#
ok($slice->get_seq_region_id());
# Test Slice::get_all_DnaAlignFeatures
#
my $count = 0;
my $dafs = $slice->get_all_DnaAlignFeatures;
is(scalar(@$dafs), 27081, 'Checking count of returned DnaAlignFeatures');
# Test Slice::get_all_ProteinAlignFeatures
#
my $pafs = $slice->get_all_ProteinAlignFeatures;
is(scalar(@$pafs),7205, 'Checking count of returned ProteinAlignFeatures');
# Test Slice::get_all_SimilarityFeatures
#
ok($count == scalar @{$slice->get_all_SimilarityFeatures});
#
# Test Slice::get_all_SimpleFeatures
#
ok(scalar @{$slice->get_all_SimpleFeatures});
#
# Test Slice::get_all_RepeatFeatures
#
ok(scalar @{$slice->get_all_RepeatFeatures});
#
# Test Slice::get_all_Genes
#
ok(scalar @{$slice->get_all_Genes});
#
# Test Slice::get_all_Genes_by_type
ok(scalar @{$slice->get_all_Genes_by_type('protein_coding')});
#
# Test Slice::get_all_Transcripts
#
ok(scalar @{$slice->get_all_Transcripts});
# Test Slice::get_all_KaryotypeBands
#
ok(scalar @{$slice->get_all_KaryotypeBands});
Ewan Birney
committed
# Test Slice::get_RepeatMaskedSeq
#
$seq = $slice->seq;
ok(length($slice->get_repeatmasked_seq->seq) == length($seq));
my $softmasked_seq = $slice->get_repeatmasked_seq(['RepeatMask'], 1)->seq;
ok($softmasked_seq ne $seq);
ok(uc($softmasked_seq) eq $seq);
$softmasked_seq = $seq = undef;
# Test Slice::get_all_MiscFeatures
ok(scalar @{$slice->get_all_MiscFeatures()});
my @segments = @{$slice->project( 'seqlevel' )};
ok(scalar @segments );
eval {
my @sub_slices = map { $_->to_Slice() } @segments;
my @starts = map { $_->from_start() } @segments;
my @ends = map { $_->from_end() } @segments;
};
if( $@ ) {
debug( "to_Slice call failed on segment of projection" );
ok(0);
} else {
ok(1)
}
Ewan Birney
committed
#my $super_slices = $slice->get_all_supercontig_Slices();
Ewan Birney
committed
##
## get_all_supercontig_Slices()
##
#debug( "Supercontig starts at ".$super_slices->[0]->chr_start() );
#ok( $super_slices->[0]->chr_start() == 29591966 );
#debug( "Supercontig name ".$super_slices->[0]->name() );
Ewan Birney
committed
#ok( $super_slices->[0]->name() eq "NT_028392" );
$hash = $slice->get_base_count;
$a = $hash->{'a'};
$c = $hash->{'c'};
$t = $hash->{'t'};
$g = $hash->{'g'};
$n = $hash->{'n'};
$gc_content = $hash->{'%gc'};
debug( "Base count: a=$a c=$c t=$t g=$g n=$n \%gc=$gc_content");
ok($a == 234371
&& $c == 224761
&& $t == 243734
&& $g == 227135
&& $n == 0
&& $gc_content == 48.59
&& $a+$c+$t+$g+$n == $slice->length);
$slice = $slice_adaptor->fetch_by_region('chromosome', '20', 10, 30);
my $sr_slice = $slice->seq_region_Slice();
ok($sr_slice->start() == 1 &&
$sr_slice->end() == $slice->seq_region_length() &&
$sr_slice->strand() == 1);
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
# synonym tests
debug("START syn test");
my $multi = $multi_db;
$multi->save("core", "seq_region_synonym");
debug("get slice");
$slice = $slice_adaptor->fetch_by_region('chromosome', 20, 1, 10);
my @alt_names = @{$slice->get_all_synonyms()};
foreach my $syn (@alt_names){
debug("syn\t".$syn->name."\n");
}
debug("altnames ".scalar(@alt_names)."\n");
ok(@alt_names == 2);
$slice->add_synonym("20ish");
@alt_names = @{$slice->get_all_synonyms()};
ok(@alt_names == 3);
#slcie aleady stored so need to store syns
my $syn_adap = $db->get_SeqRegionSynonymAdaptor;
foreach my $syn (@alt_names){
$syn_adap->store($syn);
}
$slice = $slice_adaptor->fetch_by_region('chromosome', 20, 1, 10);
@alt_names = @{$slice->get_all_synonyms()};
ok(@alt_names == 3);
$multi->restore();
$multi->save("core", 'seq_region_synonym');
$slice = $slice_adaptor->fetch_by_region('chromosome', 1, 1, 10);
@alt_names = @{$slice->get_all_synonyms()};
ok(@alt_names == 0);
$slice->add_synonym("1ish");
@alt_names = @{$slice->get_all_synonyms()};
ok(@alt_names == 1);
foreach my $syn (@alt_names){
$syn_adap->store($syn);
}
$slice = $slice_adaptor->fetch_by_region('chromosome', 1, 1, 10);
@alt_names = @{$slice->get_all_synonyms()};
ok(@alt_names == 1);
$multi->restore();
Andy Yates
committed
# Testing synonym searching
{
my $chr_20 = $slice_adaptor->fetch_by_region('chromosome', 20);
my ($syn) = @{$chr_20->get_all_synonyms('RFAM')};
is($syn->name(), 'anoth_20', 'We have the right synonym');
dies_ok { $chr_20->get_all_synonyms('RFAM', 'wibble') } 'Bad external DB version means dying code';
dies_ok { $chr_20->get_all_synonyms('RFAMing', 'wibble') } 'Bad external DB name means dying code';
($syn) = @{$chr_20->get_all_synonyms('RFAM', 1)};
is($syn->name(), 'anoth_20', 'We have the right synonym');
}
#Test assembly exception type on HAP
my $hap_slice = $slice_adaptor->fetch_by_region(undef, '20_HAP1');
is($hap_slice->assembly_exception_type(), 'HAP', 'Ensuring haplotype regions are HAP');
my $chr_one_slice = $slice_adaptor->fetch_by_region('chromosome', '1', 1, 10);
is($chr_one_slice->assembly_exception_type(), 'REF', 'Ensuring reference regions are REF');
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
#Test slice iterator
{
my $large_slice = $slice_adaptor->fetch_by_region('chromosome', 1, 1, 21);
my $map = sub { $_->length() };
my $si = sub {
my ($chunk) = @_;
return $large_slice->sub_Slice_Iterator($chunk)->map($map)->to_arrayref();
};
is_deeply($si->(100), [21], 'Subslice larger than actual slice gives just 1 slice back');
is_deeply($si->(10), [10,10,1], 'Subslice smaller than actual slice gives 3 slices back');
is_deeply($si->(20), [20,1], 'Subslice just smaller than actual slice gives 2 slices back');
is_deeply($si->(21), [21], 'Subslice equal to slice size gives 1 slice back');
my $slice_count = $large_slice->sub_Slice_Iterator(1)->reduce(sub { $_[0]+1 }, 0);
is($slice_count, 21, 'Giving a subslice size of 1 means 21 slices');
{
my $fake_slice = Bio::EnsEMBL::Slice->new(-SEQ => 'AAACCCTTTGGGA',
-START => 1, -END => 13, -SEQ_REGION_NAME => 'fake',
-COORD_SYSTEM => $coord_system);
my $subseqs = $fake_slice->sub_Slice_Iterator(3)->map(sub { $_->seq() })->to_arrayref();
my $expected = ['AAA','CCC','TTT','GGG','A'];
is_deeply($subseqs, $expected, 'Calling seq on subslices returns only the sequence for the bounds');
}
{
my $one_bp_slice = Bio::EnsEMBL::Slice->new(-SEQ => 'A',
-START => 1, -END => 1, -SEQ_REGION_NAME => 'fake',
-COORD_SYSTEM => $coord_system);
my $subseqs = $one_bp_slice->sub_Slice_Iterator(1)->map(sub { $_->seq() })->to_arrayref();
my $expected = ['A'];
is_deeply($subseqs, $expected, 'Calling seq on subslices for 1bp slice returns only an A');
}
}
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
# Test alternative region mappings from fetch_by_seq_region_id()
{
my $current_slice = $slice_adaptor->fetch_by_region('chromosome', $CHR, $START, $END);
my $alternative_seq_region_id = 1;
#Get the alternative seq region id. Expect nothing
ok(! defined $slice_adaptor->fetch_by_seq_region_id($alternative_seq_region_id), 'Asking for a non-existent ID results in no slice returned');
#Save the tables and add the mapping values in
my @tables = ('seq_region_mapping', 'mapping_set');
$multi_db->save('core', @tables);
my $ms_sql = 'insert into mapping_set (mapping_set_id, internal_schema_build, external_schema_build) values (?,?,?)';
$db->dbc->sql_helper->execute_update(-SQL => $ms_sql, -PARAMS => [1, $db->_get_schema_build(), 'oldbuild']);
my $srm_sql = 'insert into seq_region_mapping (mapping_set_id, internal_seq_region_id, external_seq_region_id) values (?,?,?)';
$db->dbc->sql_helper->execute_update(-SQL => $srm_sql, -PARAMS => [1, $current_slice->get_seq_region_id(), $alternative_seq_region_id]);
#Force a refresh in CSA
delete $db->get_CoordSystemAdaptor->{$_} for qw/_internal_seq_region_mapping _external_seq_region_mapping/;
$db->get_CoordSystemAdaptor->_cache_seq_region_mapping();
my $alternative_slice = $slice_adaptor->fetch_by_seq_region_id($alternative_seq_region_id);
ok(!defined $alternative_slice, 'Cannot retrieve the alternative slice without asking to look at alternatives');
$alternative_slice = $slice_adaptor->fetch_by_seq_region_id($alternative_seq_region_id, undef, undef, undef, 1); #don't care about start,end,strand
ok(defined $alternative_slice, 'Got a slice after asking for it');
cmp_ok($current_slice->get_seq_region_id(), '==', $alternative_slice->get_seq_region_id(), 'Seq region IDs should be equivalent even though query seq_region_id was different');
#Restore & force a refresh
$multi_db->restore('core', @tables);
delete $db->get_CoordSystemAdaptor->{$_} for qw/_internal_seq_region_mapping _external_seq_region_mapping/;
$db->get_CoordSystemAdaptor->_cache_seq_region_mapping();
# Just checking we are back to normal
$alternative_slice = $slice_adaptor->fetch_by_seq_region_id($alternative_seq_region_id);
ok(!defined $alternative_slice, 'Cannot retrieve the alternative slice post restore');
}
done_testing();