Newer
Older
Graham McVicker
committed
use strict;
use warnings;
Ewan Birney
committed
use Test::More;
Ewan Birney
committed
use Bio::EnsEMBL::Test::TestUtils;
use Bio::EnsEMBL::Test::MultiTestDB;
Graham McVicker
committed
use Bio::EnsEMBL::Slice;
Andy Yates
committed
use Test::Exception;
Ewan Birney
committed
our $verbose = 0;
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
Ewan Birney
committed
Graham McVicker
committed
my $CHR = '20';
Graham McVicker
committed
my $END = 31_200_000;
my $STRAND = 1;
my $SEQ_REGION_LENGTH = 50e6;
Ewan Birney
committed
my $multi_db = Bio::EnsEMBL::Test::MultiTestDB->new;
Graham McVicker
committed
my $db = $multi_db->get_DBAdaptor('core');
Ewan Birney
committed
Graham McVicker
committed
#
# TEST - Slice creation from adaptor
Graham McVicker
committed
#
my $slice_adaptor = $db->get_SliceAdaptor;
my $csa = $db->get_CoordSystemAdaptor();
Ewan Birney
committed
my $slice = $slice_adaptor->fetch_by_region('chromosome', $CHR, $START, $END);
ok($slice->seq_region_name eq $CHR);
ok($slice->start == $START);
ok($slice->end == $END);
ok($slice->seq_region_length == 62842997);
ok($slice->adaptor == $slice_adaptor);
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
my $coord_system = $csa->fetch_by_name('chromosome');
Ewan Birney
committed
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
my $test_seq = 'ATGCATGCATGCATGCATGCATGC';
my $test_slice = new Bio::EnsEMBL::Slice
(-seq_region_name => 'misc',
-seq_region_length => 24,
-start => 1,
-end => 24,
-strand => 1,
-coord_system => $coord_system,
-seq => $test_seq,
);
ok($test_slice->length == 24);
my $hash = $test_slice->get_base_count;
my $a = $hash->{'a'};
my $c = $hash->{'c'};
my $t = $hash->{'t'};
my $g = $hash->{'g'};
my $n = $hash->{'n'};
my $gc_content = $hash->{'%gc'};
ok($a == 6
&& $c == 6
&& $t == 6
&& $g == 6
&& $n == 0
&& $gc_content == 50
&& $a+$c+$t+$g+$n == $test_slice->length);
#
# test that subseq works correctly with attached sequence
#
my $subseq = $test_slice->subseq(2, 6);
debug("subseq = $subseq");
ok($subseq eq 'TGCAT');
$subseq = $test_slice->subseq(2,6,-1);
ok($subseq eq 'ATGCA');
debug("subseq = $subseq");
# test that subslice works correctly with attached sequence
my $sub_slice = $test_slice->sub_Slice(2, 6);
ok($sub_slice->seq eq 'TGCAT');
# test that invert works correctly with attached sequence
ok($sub_slice->invert()->seq() eq 'ATGCA');
# test that slice can be created without db, seq or coord system
{
my $warnings = q{};
my $new_stderr = IO::String->new(\$warnings);
my $oldfh = select(STDERR);
local *STDERR = $new_stderr;
$test_slice = Bio::EnsEMBL::Slice->new('-seq_region_name' => 'test',
'-start' => 1,
'-end' => 3);
my $check = qr/MSG: Slice without coordinate system/;
like($warnings, $check, 'Checking we are still warning about lack of coordinate system');
}
ok($test_slice);
ok($test_slice->seq() eq 'NNN');
debug("\$test_slice->name = " . $test_slice->name());
ok($test_slice->name() eq '::test:1:3:1');
$slice = new Bio::EnsEMBL::Slice
(-seq_region_name => $CHR,
-seq_region_length => $SEQ_REGION_LENGTH,
-start => $START,
-end => $END,
-strand => $STRAND,
-coord_system => $coord_system);
Ewan Birney
committed
ok($slice->seq_region_name eq $CHR);
ok($slice->start == $START);
ok($slice->end == $END);
Graham McVicker
committed
ok($slice->strand == $STRAND);
ok($slice->seq_region_length == $SEQ_REGION_LENGTH);
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
$slice->adaptor($slice_adaptor);
ok($slice->adaptor == $slice_adaptor);
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
#verify that chr_name start and end are contained in the name
my $name = $slice->name;
ok($name eq "chromosome:NCBI33:$CHR:$START:$END:$STRAND");
#
# Test Slice::length
#
ok($slice->length == ($END-$START + 1));
Ewan Birney
committed
# Test exception name
is($slice->assembly_exception_type(), 'REF', 'Type of region is REF');
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
Ewan Birney
committed
my $clone = $slice_adaptor->fetch_by_region('clone','AL121583.25');
my @attrib = @{$clone->get_all_Attributes('htg_phase')};
ok(@attrib == 1 && $attrib[0]->value() == 4);
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
my $len = $clone->length();
$clone = $clone->expand(100,100);
ok(($clone->start == -99) && ($clone->end() == $len+100));
$clone = $clone->expand(-100,-100);
ok(($clone->start == 1) && ($clone->end() == $len));
$clone = $clone->expand(0,1000);
ok(($clone->start == 1) && ($clone->end() == $len + 1000));
$clone = $clone->expand(-1000, 0);
ok(($clone->start == 1001) && ($clone->end() == $len + 1000));
Ewan Birney
committed
#
# Test constrain_to_seq_region
#
my $tidy_clone = $clone->expand(1000000,10000000);
$tidy_clone = $tidy_clone->constrain_to_seq_region;
ok($tidy_clone->start == 1 && $tidy_clone->end == 84710, 'Huge expand call truncates nicely');
$tidy_clone = $clone->expand(0,-1000);
$tidy_clone = $tidy_clone->constrain_to_seq_region;
note($tidy_clone->start."-".$tidy_clone->end());
ok(($tidy_clone->start == 1001) && ($tidy_clone->end() == 84710), 'constrain_to_seq_region does no harm');
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
my $inverted_slice = $slice->invert;
ok($slice != $inverted_slice); #slice is not same object as inverted slice
#inverted slice on opposite strand
ok($slice->strand == ($inverted_slice->strand * -1));
#slice still on same strand
ok($slice->strand == $STRAND);
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
my $seq = uc $slice->seq;
my $invert_seq = uc $slice->invert->seq;
Ewan Birney
committed
Graham McVicker
committed
ok(length($seq) == $slice->length); #sequence is correct length
$seq = reverse $seq; #reverse complement seq
$seq =~ tr/ACTG/TGAC/;
Graham McVicker
committed
ok($seq eq $invert_seq); #revcom same as seq on inverted slice
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
my $SPAN = 10;
my $sub_seq = uc $slice->subseq(-$SPAN,$SPAN);
my $invert_sub_seq = uc $slice->invert->subseq( $slice->length - $SPAN + 1,
$slice->length + $SPAN + 1);
Graham McVicker
committed
ok(length $sub_seq == (2*$SPAN) + 1 );
$sub_seq = reverse $sub_seq;
$sub_seq =~ tr/ACTG/TGAC/;
ok($sub_seq eq $invert_sub_seq);
#
# Test Slice::get_all_PredictionTranscripts
#
my $pts = $slice->get_all_PredictionTranscripts;
#
# Test Slice::get_seq_region_id
#
ok($slice->get_seq_region_id());
# Test Slice::get_all_DnaAlignFeatures
#
my $count = 0;
my $dafs = $slice->get_all_DnaAlignFeatures;
is(scalar(@$dafs), 27081, 'Checking count of returned DnaAlignFeatures');
# Test Slice::get_all_ProteinAlignFeatures
#
my $pafs = $slice->get_all_ProteinAlignFeatures;
is(scalar(@$pafs),7205, 'Checking count of returned ProteinAlignFeatures');
# Test Slice::get_all_SimilarityFeatures
#
ok($count == scalar @{$slice->get_all_SimilarityFeatures});
#
# Test Slice::get_all_SimpleFeatures
#
ok(scalar @{$slice->get_all_SimpleFeatures});
#
# Test Slice::get_all_RepeatFeatures
#
ok(scalar @{$slice->get_all_RepeatFeatures});
#
# Test Slice::get_all_Genes
#
ok(scalar @{$slice->get_all_Genes});
#
# Test Slice::get_all_Genes_by_type
ok(scalar @{$slice->get_all_Genes_by_type('protein_coding')});
#
# Test Slice::get_all_Transcripts
#
ok(scalar @{$slice->get_all_Transcripts});
# Test Slice::get_all_KaryotypeBands
#
ok(scalar @{$slice->get_all_KaryotypeBands});
Ewan Birney
committed
# Test Slice::get_RepeatMaskedSeq
#
$seq = $slice->seq;
ok(length($slice->get_repeatmasked_seq->seq) == length($seq));
my $softmasked_seq = $slice->get_repeatmasked_seq(['RepeatMask'], 1)->seq;
ok($softmasked_seq ne $seq);
ok(uc($softmasked_seq) eq $seq);
$softmasked_seq = $seq = undef;
# Test Slice::get_all_MiscFeatures
ok(scalar @{$slice->get_all_MiscFeatures()});
my @segments = @{$slice->project( 'seqlevel' )};
ok(scalar @segments );
eval {
my @sub_slices = map { $_->to_Slice() } @segments;
my @starts = map { $_->from_start() } @segments;
my @ends = map { $_->from_end() } @segments;
};
if( $@ ) {
debug( "to_Slice call failed on segment of projection" );
ok(0);
} else {
ok(1)
}
Ewan Birney
committed
#my $super_slices = $slice->get_all_supercontig_Slices();
Ewan Birney
committed
##
## get_all_supercontig_Slices()
##
#debug( "Supercontig starts at ".$super_slices->[0]->chr_start() );
#ok( $super_slices->[0]->chr_start() == 29591966 );
#debug( "Supercontig name ".$super_slices->[0]->name() );
Ewan Birney
committed
#ok( $super_slices->[0]->name() eq "NT_028392" );
$hash = $slice->get_base_count;
$a = $hash->{'a'};
$c = $hash->{'c'};
$t = $hash->{'t'};
$g = $hash->{'g'};
$n = $hash->{'n'};
$gc_content = $hash->{'%gc'};
debug( "Base count: a=$a c=$c t=$t g=$g n=$n \%gc=$gc_content");
ok($a == 234371
&& $c == 224761
&& $t == 243734
&& $g == 227135
&& $n == 0
&& $gc_content == 48.59
&& $a+$c+$t+$g+$n == $slice->length);
$slice = $slice_adaptor->fetch_by_region('chromosome', '20', 10, 30);
my $sr_slice = $slice->seq_region_Slice();
ok($sr_slice->start() == 1 &&
$sr_slice->end() == $slice->seq_region_length() &&
$sr_slice->strand() == 1);
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
# synonym tests
debug("START syn test");
my $multi = $multi_db;
$multi->save("core", "seq_region_synonym");
debug("get slice");
$slice = $slice_adaptor->fetch_by_region('chromosome', 20, 1, 10);
my @alt_names = @{$slice->get_all_synonyms()};
foreach my $syn (@alt_names){
debug("syn\t".$syn->name."\n");
}
debug("altnames ".scalar(@alt_names)."\n");
ok(@alt_names == 2);
$slice->add_synonym("20ish");
@alt_names = @{$slice->get_all_synonyms()};
ok(@alt_names == 3);
#slcie aleady stored so need to store syns
my $syn_adap = $db->get_SeqRegionSynonymAdaptor;
foreach my $syn (@alt_names){
$syn_adap->store($syn);
}
$slice = $slice_adaptor->fetch_by_region('chromosome', 20, 1, 10);
@alt_names = @{$slice->get_all_synonyms()};
ok(@alt_names == 3);
$multi->restore();
$multi->save("core", 'seq_region_synonym');
$slice = $slice_adaptor->fetch_by_region('chromosome', 1, 1, 10);
@alt_names = @{$slice->get_all_synonyms()};
ok(@alt_names == 0);
$slice->add_synonym("1ish");
@alt_names = @{$slice->get_all_synonyms()};
ok(@alt_names == 1);
foreach my $syn (@alt_names){
$syn_adap->store($syn);
}
$slice = $slice_adaptor->fetch_by_region('chromosome', 1, 1, 10);
@alt_names = @{$slice->get_all_synonyms()};
ok(@alt_names == 1);
$multi->restore();
Andy Yates
committed
# Testing synonym searching
{
my $chr_20 = $slice_adaptor->fetch_by_region('chromosome', 20);
my ($syn) = @{$chr_20->get_all_synonyms('RFAM')};
is($syn->name(), 'anoth_20', 'We have the right synonym');
dies_ok { $chr_20->get_all_synonyms('RFAM', 'wibble') } 'Bad external DB version means dying code';
dies_ok { $chr_20->get_all_synonyms('RFAMing', 'wibble') } 'Bad external DB name means dying code';
($syn) = @{$chr_20->get_all_synonyms('RFAM', 1)};
is($syn->name(), 'anoth_20', 'We have the right synonym');
}
#Test assembly exception type on HAP
my $hap_slice = $slice_adaptor->fetch_by_region(undef, '20_HAP1');
is($hap_slice->assembly_exception_type(), 'HAP', 'Ensuring haplotype regions are HAP');
my $chr_one_slice = $slice_adaptor->fetch_by_region('chromosome', '1', 1, 10);
is($chr_one_slice->assembly_exception_type(), 'REF', 'Ensuring reference regions are REF');
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
#Test slice iterator
{
my $large_slice = $slice_adaptor->fetch_by_region('chromosome', 1, 1, 21);
my $map = sub { $_->length() };
my $si = sub {
my ($chunk) = @_;
return $large_slice->sub_Slice_Iterator($chunk)->map($map)->to_arrayref();
};
is_deeply($si->(100), [21], 'Subslice larger than actual slice gives just 1 slice back');
is_deeply($si->(10), [10,10,1], 'Subslice smaller than actual slice gives 3 slices back');
is_deeply($si->(20), [20,1], 'Subslice just smaller than actual slice gives 2 slices back');
is_deeply($si->(21), [21], 'Subslice equal to slice size gives 1 slice back');
my $slice_count = $large_slice->sub_Slice_Iterator(1)->reduce(sub { $_[0]+1 }, 0);
is($slice_count, 21, 'Giving a subslice size of 1 means 21 slices');
{
my $fake_slice = Bio::EnsEMBL::Slice->new(-SEQ => 'AAACCCTTTGGGA',
-START => 1, -END => 13, -SEQ_REGION_NAME => 'fake',
-COORD_SYSTEM => $coord_system);
my $subseqs = $fake_slice->sub_Slice_Iterator(3)->map(sub { $_->seq() })->to_arrayref();
my $expected = ['AAA','CCC','TTT','GGG','A'];
is_deeply($subseqs, $expected, 'Calling seq on subslices returns only the sequence for the bounds');
}
{
my $one_bp_slice = Bio::EnsEMBL::Slice->new(-SEQ => 'A',
-START => 1, -END => 1, -SEQ_REGION_NAME => 'fake',
-COORD_SYSTEM => $coord_system);
my $subseqs = $one_bp_slice->sub_Slice_Iterator(1)->map(sub { $_->seq() })->to_arrayref();
my $expected = ['A'];
is_deeply($subseqs, $expected, 'Calling seq on subslices for 1bp slice returns only an A');
}
}
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
# Test alternative region mappings from fetch_by_seq_region_id()
{
my $current_slice = $slice_adaptor->fetch_by_region('chromosome', $CHR, $START, $END);
my $alternative_seq_region_id = 1;
#Get the alternative seq region id. Expect nothing
ok(! defined $slice_adaptor->fetch_by_seq_region_id($alternative_seq_region_id), 'Asking for a non-existent ID results in no slice returned');
#Save the tables and add the mapping values in
my @tables = ('seq_region_mapping', 'mapping_set');
$multi_db->save('core', @tables);
my $ms_sql = 'insert into mapping_set (mapping_set_id, internal_schema_build, external_schema_build) values (?,?,?)';
$db->dbc->sql_helper->execute_update(-SQL => $ms_sql, -PARAMS => [1, $db->_get_schema_build(), 'oldbuild']);
my $srm_sql = 'insert into seq_region_mapping (mapping_set_id, internal_seq_region_id, external_seq_region_id) values (?,?,?)';
$db->dbc->sql_helper->execute_update(-SQL => $srm_sql, -PARAMS => [1, $current_slice->get_seq_region_id(), $alternative_seq_region_id]);
#Force a refresh in CSA
delete $db->get_CoordSystemAdaptor->{$_} for qw/_internal_seq_region_mapping _external_seq_region_mapping/;
$db->get_CoordSystemAdaptor->_cache_seq_region_mapping();
my $alternative_slice = $slice_adaptor->fetch_by_seq_region_id($alternative_seq_region_id);
ok(!defined $alternative_slice, 'Cannot retrieve the alternative slice without asking to look at alternatives');
$alternative_slice = $slice_adaptor->fetch_by_seq_region_id($alternative_seq_region_id, undef, undef, undef, 1); #don't care about start,end,strand
ok(defined $alternative_slice, 'Got a slice after asking for it');
cmp_ok($current_slice->get_seq_region_id(), '==', $alternative_slice->get_seq_region_id(), 'Seq region IDs should be equivalent even though query seq_region_id was different');
#Restore & force a refresh
$multi_db->restore('core', @tables);
delete $db->get_CoordSystemAdaptor->{$_} for qw/_internal_seq_region_mapping _external_seq_region_mapping/;
$db->get_CoordSystemAdaptor->_cache_seq_region_mapping();
# Just checking we are back to normal
$alternative_slice = $slice_adaptor->fetch_by_seq_region_id($alternative_seq_region_id);
ok(!defined $alternative_slice, 'Cannot retrieve the alternative slice post restore');
}
done_testing();