Newer
Older
Graham McVicker
committed
use strict;
use warnings;
Ewan Birney
committed
use Test::More;
Ewan Birney
committed
use Bio::EnsEMBL::Test::TestUtils;
use Bio::EnsEMBL::Test::MultiTestDB;
Graham McVicker
committed
use Bio::EnsEMBL::Slice;
Andy Yates
committed
use Test::Exception;
Ewan Birney
committed
our $verbose = 0;
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
Ewan Birney
committed
Graham McVicker
committed
my $CHR = '20';
Graham McVicker
committed
my $END = 31_200_000;
my $STRAND = 1;
my $SEQ_REGION_LENGTH = 50e6;
Ewan Birney
committed
my $multi_db = Bio::EnsEMBL::Test::MultiTestDB->new;
Graham McVicker
committed
my $db = $multi_db->get_DBAdaptor('core');
Ewan Birney
committed
Graham McVicker
committed
#
# TEST - Slice creation from adaptor
Graham McVicker
committed
#
my $slice_adaptor = $db->get_SliceAdaptor;
my $csa = $db->get_CoordSystemAdaptor();
Ewan Birney
committed
my $slice = $slice_adaptor->fetch_by_region('chromosome', $CHR, $START, $END);
ok($slice->seq_region_name eq $CHR);
ok($slice->start == $START);
ok($slice->end == $END);
ok($slice->seq_region_length == 62842997);
ok($slice->adaptor == $slice_adaptor);
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
my $coord_system = $csa->fetch_by_name('chromosome');
Ewan Birney
committed
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
my $test_seq = 'ATGCATGCATGCATGCATGCATGC';
my $test_slice = new Bio::EnsEMBL::Slice
(-seq_region_name => 'misc',
-seq_region_length => 24,
-start => 1,
-end => 24,
-strand => 1,
-coord_system => $coord_system,
-seq => $test_seq,
);
ok($test_slice->length == 24);
my $hash = $test_slice->get_base_count;
my $a = $hash->{'a'};
my $c = $hash->{'c'};
my $t = $hash->{'t'};
my $g = $hash->{'g'};
my $n = $hash->{'n'};
my $gc_content = $hash->{'%gc'};
ok($a == 6
&& $c == 6
&& $t == 6
&& $g == 6
&& $n == 0
&& $gc_content == 50
&& $a+$c+$t+$g+$n == $test_slice->length);
#
# test that subseq works correctly with attached sequence
#
my $subseq = $test_slice->subseq(2, 6);
debug("subseq = $subseq");
ok($subseq eq 'TGCAT');
$subseq = $test_slice->subseq(2,6,-1);
ok($subseq eq 'ATGCA');
debug("subseq = $subseq");
# test that subslice works correctly with attached sequence
my $sub_slice = $test_slice->sub_Slice(2, 6);
ok($sub_slice->seq eq 'TGCAT');
# test that invert works correctly with attached sequence
ok($sub_slice->invert()->seq() eq 'ATGCA');
# test that slice can be created without db, seq or coord system
{
my $warnings = q{};
my $new_stderr = IO::String->new(\$warnings);
my $oldfh = select(STDERR);
local *STDERR = $new_stderr;
$test_slice = Bio::EnsEMBL::Slice->new('-seq_region_name' => 'test',
'-start' => 1,
'-end' => 3);
my $check = qr/MSG: Slice without coordinate system/;
like($warnings, $check, 'Checking we are still warning about lack of coordinate system');
}
ok($test_slice);
ok($test_slice->seq() eq 'NNN');
debug("\$test_slice->name = " . $test_slice->name());
ok($test_slice->name() eq '::test:1:3:1');
$slice = new Bio::EnsEMBL::Slice
(-seq_region_name => $CHR,
-seq_region_length => $SEQ_REGION_LENGTH,
-start => $START,
-end => $END,
-strand => $STRAND,
-coord_system => $coord_system);
Ewan Birney
committed
ok($slice->seq_region_name eq $CHR);
ok($slice->start == $START);
ok($slice->end == $END);
Graham McVicker
committed
ok($slice->strand == $STRAND);
ok($slice->seq_region_length == $SEQ_REGION_LENGTH);
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
$slice->adaptor($slice_adaptor);
ok($slice->adaptor == $slice_adaptor);
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
#verify that chr_name start and end are contained in the name
my $name = $slice->name;
ok($name eq "chromosome:NCBI33:$CHR:$START:$END:$STRAND");
#
# Test Slice::length
#
ok($slice->length == ($END-$START + 1));
Ewan Birney
committed
# Test exception name
is($slice->assembly_exception_type(), 'REF', 'Type of region is REF');
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
Ewan Birney
committed
my $clone = $slice_adaptor->fetch_by_region('clone','AL121583.25');
my @attrib = @{$clone->get_all_Attributes('htg_phase')};
ok(@attrib == 1 && $attrib[0]->value() == 4);
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
my $len = $clone->length();
$clone = $clone->expand(100,100);
ok(($clone->start == -99) && ($clone->end() == $len+100));
$clone = $clone->expand(-100,-100);
ok(($clone->start == 1) && ($clone->end() == $len));
$clone = $clone->expand(0,1000);
ok(($clone->start == 1) && ($clone->end() == $len + 1000));
$clone = $clone->expand(-1000, 0);
ok(($clone->start == 1001) && ($clone->end() == $len + 1000));
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
my $inverted_slice = $slice->invert;
ok($slice != $inverted_slice); #slice is not same object as inverted slice
#inverted slice on opposite strand
ok($slice->strand == ($inverted_slice->strand * -1));
#slice still on same strand
ok($slice->strand == $STRAND);
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
my $seq = uc $slice->seq;
my $invert_seq = uc $slice->invert->seq;
Ewan Birney
committed
Graham McVicker
committed
ok(length($seq) == $slice->length); #sequence is correct length
$seq = reverse $seq; #reverse complement seq
$seq =~ tr/ACTG/TGAC/;
Graham McVicker
committed
ok($seq eq $invert_seq); #revcom same as seq on inverted slice
Ewan Birney
committed
Graham McVicker
committed
#
Graham McVicker
committed
#
my $SPAN = 10;
my $sub_seq = uc $slice->subseq(-$SPAN,$SPAN);
my $invert_sub_seq = uc $slice->invert->subseq( $slice->length - $SPAN + 1,
$slice->length + $SPAN + 1);
Graham McVicker
committed
ok(length $sub_seq == (2*$SPAN) + 1 );
$sub_seq = reverse $sub_seq;
$sub_seq =~ tr/ACTG/TGAC/;
ok($sub_seq eq $invert_sub_seq);
#
# Test Slice::get_all_PredictionTranscripts
#
my $pts = $slice->get_all_PredictionTranscripts;
#
# Test Slice::get_seq_region_id
#
ok($slice->get_seq_region_id());
# Test Slice::get_all_DnaAlignFeatures
#
my $count = 0;
my $dafs = $slice->get_all_DnaAlignFeatures;
# Test Slice::get_all_ProteinAlignFeatures
#
my $pafs = $slice->get_all_ProteinAlignFeatures;
is(scalar(@$pafs),7205, 'Checking count of returned ProteinAlignFeatures');
# Test Slice::get_all_SimilarityFeatures
#
ok($count == scalar @{$slice->get_all_SimilarityFeatures});
#
# Test Slice::get_all_SimpleFeatures
#
ok(scalar @{$slice->get_all_SimpleFeatures});
#
# Test Slice::get_all_RepeatFeatures
#
ok(scalar @{$slice->get_all_RepeatFeatures});
#
# Test Slice::get_all_Genes
#
ok(scalar @{$slice->get_all_Genes});
#
# Test Slice::get_all_Genes_by_type
ok(scalar @{$slice->get_all_Genes_by_type('protein_coding')});
#
# Test Slice::get_all_Transcripts
#
ok(scalar @{$slice->get_all_Transcripts});
# Test Slice::get_all_KaryotypeBands
#
ok(scalar @{$slice->get_all_KaryotypeBands});
Ewan Birney
committed
# Test Slice::get_RepeatMaskedSeq
#
$seq = $slice->seq;
ok(length($slice->get_repeatmasked_seq->seq) == length($seq));
my $softmasked_seq = $slice->get_repeatmasked_seq(['RepeatMask'], 1)->seq;
ok($softmasked_seq ne $seq);
ok(uc($softmasked_seq) eq $seq);
$softmasked_seq = $seq = undef;
# Test Slice::get_all_MiscFeatures
ok(scalar @{$slice->get_all_MiscFeatures()});
my @segments = @{$slice->project( 'seqlevel' )};
ok(scalar @segments );
eval {
my @sub_slices = map { $_->to_Slice() } @segments;
my @starts = map { $_->from_start() } @segments;
my @ends = map { $_->from_end() } @segments;
};
if( $@ ) {
debug( "to_Slice call failed on segment of projection" );
ok(0);
} else {
ok(1)
}
Ewan Birney
committed
#my $super_slices = $slice->get_all_supercontig_Slices();
Ewan Birney
committed
##
## get_all_supercontig_Slices()
##
#debug( "Supercontig starts at ".$super_slices->[0]->chr_start() );
#ok( $super_slices->[0]->chr_start() == 29591966 );
#debug( "Supercontig name ".$super_slices->[0]->name() );
Ewan Birney
committed
#ok( $super_slices->[0]->name() eq "NT_028392" );
$hash = $slice->get_base_count;
$a = $hash->{'a'};
$c = $hash->{'c'};
$t = $hash->{'t'};
$g = $hash->{'g'};
$n = $hash->{'n'};
$gc_content = $hash->{'%gc'};
debug( "Base count: a=$a c=$c t=$t g=$g n=$n \%gc=$gc_content");
ok($a == 234371
&& $c == 224761
&& $t == 243734
&& $g == 227135
&& $n == 0
&& $gc_content == 48.59
&& $a+$c+$t+$g+$n == $slice->length);
$slice = $slice_adaptor->fetch_by_region('chromosome', '20', 10, 30);
my $sr_slice = $slice->seq_region_Slice();
ok($sr_slice->start() == 1 &&
$sr_slice->end() == $slice->seq_region_length() &&
$sr_slice->strand() == 1);
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
# synonym tests
debug("START syn test");
my $multi = $multi_db;
$multi->save("core", "seq_region_synonym");
debug("get slice");
$slice = $slice_adaptor->fetch_by_region('chromosome', 20, 1, 10);
my @alt_names = @{$slice->get_all_synonyms()};
foreach my $syn (@alt_names){
debug("syn\t".$syn->name."\n");
}
debug("altnames ".scalar(@alt_names)."\n");
ok(@alt_names == 2);
$slice->add_synonym("20ish");
@alt_names = @{$slice->get_all_synonyms()};
ok(@alt_names == 3);
#slcie aleady stored so need to store syns
my $syn_adap = $db->get_SeqRegionSynonymAdaptor;
foreach my $syn (@alt_names){
$syn_adap->store($syn);
}
$slice = $slice_adaptor->fetch_by_region('chromosome', 20, 1, 10);
@alt_names = @{$slice->get_all_synonyms()};
ok(@alt_names == 3);
$multi->restore();
$multi->save("core", 'seq_region_synonym');
$slice = $slice_adaptor->fetch_by_region('chromosome', 1, 1, 10);
@alt_names = @{$slice->get_all_synonyms()};
ok(@alt_names == 0);
$slice->add_synonym("1ish");
@alt_names = @{$slice->get_all_synonyms()};
ok(@alt_names == 1);
foreach my $syn (@alt_names){
$syn_adap->store($syn);
}
$slice = $slice_adaptor->fetch_by_region('chromosome', 1, 1, 10);
@alt_names = @{$slice->get_all_synonyms()};
ok(@alt_names == 1);
$multi->restore();
Andy Yates
committed
# Testing synonym searching
{
my $chr_20 = $slice_adaptor->fetch_by_region('chromosome', 20);
my ($syn) = @{$chr_20->get_all_synonyms('RFAM')};
is($syn->name(), 'anoth_20', 'We have the right synonym');
dies_ok { $chr_20->get_all_synonyms('RFAM', 'wibble') } 'Bad external DB version means dying code';
dies_ok { $chr_20->get_all_synonyms('RFAMing', 'wibble') } 'Bad external DB name means dying code';
($syn) = @{$chr_20->get_all_synonyms('RFAM', 1)};
is($syn->name(), 'anoth_20', 'We have the right synonym');
}
#Test assembly exception type on HAP
my $hap_slice = $slice_adaptor->fetch_by_region(undef, '20_HAP1');
is($hap_slice->assembly_exception_type(), 'HAP', 'Ensuring haplotype regions are HAP');
my $chr_one_slice = $slice_adaptor->fetch_by_region('chromosome', '1', 1, 10);
is($chr_one_slice->assembly_exception_type(), 'REF', 'Ensuring reference regions are REF');
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
#Test slice iterator
{
my $large_slice = $slice_adaptor->fetch_by_region('chromosome', 1, 1, 21);
my $map = sub { $_->length() };
my $si = sub {
my ($chunk) = @_;
return $large_slice->sub_Slice_Iterator($chunk)->map($map)->to_arrayref();
};
is_deeply($si->(100), [21], 'Subslice larger than actual slice gives just 1 slice back');
is_deeply($si->(10), [10,10,1], 'Subslice smaller than actual slice gives 3 slices back');
is_deeply($si->(20), [20,1], 'Subslice just smaller than actual slice gives 2 slices back');
is_deeply($si->(21), [21], 'Subslice equal to slice size gives 1 slice back');
my $slice_count = $large_slice->sub_Slice_Iterator(1)->reduce(sub { $_[0]+1 }, 0);
is($slice_count, 21, 'Giving a subslice size of 1 means 21 slices');
{
my $fake_slice = Bio::EnsEMBL::Slice->new(-SEQ => 'AAACCCTTTGGGA',
-START => 1, -END => 13, -SEQ_REGION_NAME => 'fake',
-COORD_SYSTEM => $coord_system);
my $subseqs = $fake_slice->sub_Slice_Iterator(3)->map(sub { $_->seq() })->to_arrayref();
my $expected = ['AAA','CCC','TTT','GGG','A'];
is_deeply($subseqs, $expected, 'Calling seq on subslices returns only the sequence for the bounds');
}
{
my $one_bp_slice = Bio::EnsEMBL::Slice->new(-SEQ => 'A',
-START => 1, -END => 1, -SEQ_REGION_NAME => 'fake',
-COORD_SYSTEM => $coord_system);
my $subseqs = $one_bp_slice->sub_Slice_Iterator(1)->map(sub { $_->seq() })->to_arrayref();
my $expected = ['A'];
is_deeply($subseqs, $expected, 'Calling seq on subslices for 1bp slice returns only an A');
}
}
done_testing();