Skip to content
Snippets Groups Projects
slice.t 15 KiB
Newer Older
# Copyright [1999-2013] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#      http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

use Bio::EnsEMBL::Test::TestUtils;
Andy Yates's avatar
Andy Yates committed
use IO::String;
use Bio::EnsEMBL::Test::MultiTestDB;
use Bio::EnsEMBL::ProjectionSegment;
# TEST - Slice Compiles
Graham McVicker's avatar
Graham McVicker committed
my $START         = 30_270_000;
my $SEQ_REGION_LENGTH = 50e6;
my $multi_db = Bio::EnsEMBL::Test::MultiTestDB->new;
my $db = $multi_db->get_DBAdaptor('core');
# TEST - Slice creation from adaptor
#
my $slice_adaptor = $db->get_SliceAdaptor;
my $csa = $db->get_CoordSystemAdaptor();
my $slice = $slice_adaptor->fetch_by_region('chromosome', $CHR, $START, $END);
ok($slice->seq_region_name eq $CHR);
ok($slice->start == $START);
ok($slice->end == $END);
ok($slice->seq_region_length == 62842997);
ok($slice->adaptor == $slice_adaptor);
#TEST - Slice::new
my $coord_system = $csa->fetch_by_name('chromosome');

my $test_seq = 'ATGCATGCATGCATGCATGCATGC';
my $test_slice = new Bio::EnsEMBL::Slice
  (-seq_region_name  => 'misc',
   -seq_region_length => 24,
   -start            => 1,
   -end              => 24,
   -strand           => 1,
   -coord_system     => $coord_system,
   -seq              => $test_seq,
   );




ok($test_slice->length == 24);

my $hash = $test_slice->get_base_count;
my $a = $hash->{'a'};
my $c = $hash->{'c'};
my $t = $hash->{'t'};
my $g = $hash->{'g'};
my $n = $hash->{'n'};
my $gc_content = $hash->{'%gc'};

ok($a == 6
   && $c == 6 
   && $t == 6 
   && $g == 6 
   && $n == 0 
   && $gc_content == 50 
   && $a+$c+$t+$g+$n == $test_slice->length);


#
# test that subseq works correctly with attached sequence
#

my $subseq = $test_slice->subseq(2, 6);

debug("subseq = $subseq");
ok($subseq eq 'TGCAT');

$subseq = $test_slice->subseq(2,6,-1);
ok($subseq eq 'ATGCA');
debug("subseq = $subseq");

# test that subslice works correctly with attached sequence

my $sub_slice = $test_slice->sub_Slice(2, 6);
ok($sub_slice->seq eq 'TGCAT');

# test that invert works correctly with attached sequence
ok($sub_slice->invert()->seq() eq 'ATGCA');


# test that slice can be created without db, seq or coord system
Andy Yates's avatar
Andy Yates committed
{
  my $warnings = q{};
  my $new_stderr = IO::String->new(\$warnings);
  my $oldfh = select(STDERR);
  local *STDERR = $new_stderr;
  $test_slice = Bio::EnsEMBL::Slice->new('-seq_region_name' => 'test',
                                         '-start'           => 1,
                                         '-end'             => 3);
  my $check = qr/MSG: Slice without coordinate system/;
  like($warnings, $check, 'Checking we are still warning about lack of coordinate system');
}

ok($test_slice);
ok($test_slice->seq() eq 'NNN');

debug("\$test_slice->name = " . $test_slice->name());

ok($test_slice->name() eq '::test:1:3:1');

$slice = new Bio::EnsEMBL::Slice
  (-seq_region_name   => $CHR,
   -seq_region_length => $SEQ_REGION_LENGTH,
   -start             => $START,
   -end               => $END,
   -strand            => $STRAND,
   -coord_system      => $coord_system);
ok($slice->seq_region_name eq $CHR);
ok($slice->start == $START);
ok($slice->end == $END);
ok($slice->seq_region_length == $SEQ_REGION_LENGTH);
#Test - Slice::adaptor
#
$slice->adaptor($slice_adaptor);
ok($slice->adaptor == $slice_adaptor);
#1 Test Slice::name
#
#verify that chr_name start and end are contained in the name
my $name = $slice->name;
ok($name eq "chromosome:NCBI33:$CHR:$START:$END:$STRAND");

#
# Test Slice::length
#
ok($slice->length == ($END-$START + 1));
# Test exception name
is($slice->assembly_exception_type(), 'REF', 'Type of region is REF');
# Test get_attributes
my $clone = $slice_adaptor->fetch_by_region('clone','AL121583.25');

my @attrib = @{$clone->get_all_Attributes('htg_phase')};
ok(@attrib == 1 && $attrib[0]->value() == 4);
my $len = $clone->length();

$clone = $clone->expand(100,100);
ok(($clone->start == -99) && ($clone->end() == $len+100));

$clone = $clone->expand(-100,-100);
ok(($clone->start == 1) && ($clone->end() == $len));

$clone = $clone->expand(0,1000);
ok(($clone->start == 1) && ($clone->end() == $len + 1000));

$clone = $clone->expand(-1000, 0);
ok(($clone->start == 1001) && ($clone->end() == $len + 1000));
#
# Test constrain_to_seq_region
#
my $tidy_clone = $clone->expand(1000000,10000000);
$tidy_clone = $tidy_clone->constrain_to_seq_region;
ok($tidy_clone->start == 1 && $tidy_clone->end == 84710, 'Huge expand call truncates nicely');

$tidy_clone = $clone->expand(0,-1000);
$tidy_clone = $tidy_clone->constrain_to_seq_region;
note($tidy_clone->start."-".$tidy_clone->end());
ok(($tidy_clone->start == 1001) && ($tidy_clone->end() == 84710), 'constrain_to_seq_region does no harm');
# Test Slice::invert
#
my $inverted_slice = $slice->invert;
ok($slice != $inverted_slice); #slice is not same object as inverted slice
#inverted slice on opposite strand
ok($slice->strand == ($inverted_slice->strand * -1)); 
#slice still on same strand
ok($slice->strand == $STRAND);
# Test Slice::seq
Graham McVicker's avatar
Graham McVicker committed
my $seq = uc $slice->seq;
my $invert_seq = uc $slice->invert->seq;
ok(length($seq) == $slice->length); #sequence is correct length
Graham McVicker's avatar
Graham McVicker committed

$seq = reverse $seq;  #reverse complement seq
$seq =~ tr/ACTG/TGAC/; 

ok($seq eq $invert_seq); #revcom same as seq on inverted slice
# Test Slice::subseq
Graham McVicker's avatar
Graham McVicker committed
my $sub_seq = uc $slice->subseq(-$SPAN,$SPAN);
my $invert_sub_seq = uc $slice->invert->subseq( $slice->length - $SPAN + 1, 
						$slice->length + $SPAN + 1);

Graham McVicker's avatar
Graham McVicker committed
$sub_seq = reverse $sub_seq;
$sub_seq =~ tr/ACTG/TGAC/;

ok($sub_seq eq $invert_sub_seq);

#
# Test Slice::get_all_PredictionTranscripts
Graham McVicker's avatar
Graham McVicker committed
#
my $pts = $slice->get_all_PredictionTranscripts;
ok(@$pts == 24);
#
# Test Slice::get_seq_region_id
#
ok($slice->get_seq_region_id());
# Test Slice::get_all_DnaAlignFeatures
Graham McVicker's avatar
Graham McVicker committed
#
my $count = 0;
my $dafs = $slice->get_all_DnaAlignFeatures;
is(scalar(@$dafs), 27081, 'Checking count of returned DnaAlignFeatures');
Graham McVicker's avatar
Graham McVicker committed
$count += scalar @$dafs;

#
# Test Slice::get_all_ProteinAlignFeatures
Graham McVicker's avatar
Graham McVicker committed
#
my $pafs = $slice->get_all_ProteinAlignFeatures;
is(scalar(@$pafs),7205, 'Checking count of returned ProteinAlignFeatures');
Graham McVicker's avatar
Graham McVicker committed
$count += scalar @$pafs;

#
# Test Slice::get_all_SimilarityFeatures
Graham McVicker's avatar
Graham McVicker committed
#
ok($count == scalar @{$slice->get_all_SimilarityFeatures});

#
#  Test Slice::get_all_SimpleFeatures
Graham McVicker's avatar
Graham McVicker committed
#
ok(scalar @{$slice->get_all_SimpleFeatures});

#
#  Test Slice::get_all_RepeatFeatures
Graham McVicker's avatar
Graham McVicker committed
#
ok(scalar @{$slice->get_all_RepeatFeatures});

#
#  Test Slice::get_all_Genes
Graham McVicker's avatar
Graham McVicker committed
#
ok(scalar @{$slice->get_all_Genes});

#
#  Test Slice::get_all_Genes_by_type
Graham McVicker's avatar
Graham McVicker committed
#
ok(scalar @{$slice->get_all_Genes_by_type('protein_coding')});
#
#  Test Slice::get_all_Transcripts
#
ok(scalar @{$slice->get_all_Transcripts});

# Test Slice::get_all_KaryotypeBands
Graham McVicker's avatar
Graham McVicker committed
#
ok(scalar @{$slice->get_all_KaryotypeBands});
Graham McVicker's avatar
Graham McVicker committed
#
# Test Slice::get_RepeatMaskedSeq
Graham McVicker's avatar
Graham McVicker committed
#
$seq = $slice->seq;
ok(length($slice->get_repeatmasked_seq->seq) == length($seq));

my $softmasked_seq = $slice->get_repeatmasked_seq(['RepeatMask'], 1)->seq;

ok($softmasked_seq ne $seq);
ok(uc($softmasked_seq) eq $seq);

$softmasked_seq = $seq = undef;
# Test Slice::get_all_MiscFeatures
Graham McVicker's avatar
Graham McVicker committed
#
ok(scalar @{$slice->get_all_MiscFeatures()});
# Test Slice::project
Graham McVicker's avatar
Graham McVicker committed
#

my @segments = @{$slice->project( 'seqlevel' )};
ok(scalar @segments );

eval {
  my @sub_slices = map { $_->to_Slice() } @segments;
  my @starts = map { $_->from_start() } @segments;
  my @ends = map { $_->from_end() } @segments;
};

if( $@ ) {
  debug( "to_Slice call failed on segment of projection" );
  ok(0);
} else {
  ok(1)
}
#my $super_slices = $slice->get_all_supercontig_Slices();
##
## get_all_supercontig_Slices()
##
#debug( "Supercontig starts at ".$super_slices->[0]->chr_start() );
#ok( $super_slices->[0]->chr_start() == 29591966 );
#debug( "Supercontig name ".$super_slices->[0]->name() );
#ok( $super_slices->[0]->name() eq "NT_028392" );
# get_base_count
$hash = $slice->get_base_count;
$a = $hash->{'a'};
$c = $hash->{'c'};
$t = $hash->{'t'};
$g = $hash->{'g'};
$n = $hash->{'n'};
$gc_content = $hash->{'%gc'};

debug( "Base count: a=$a c=$c t=$t g=$g n=$n \%gc=$gc_content");
ok($a == 234371 
   && $c == 224761 
   && $t == 243734 
   && $g == 227135 
   && $n == 0 
   && $gc_content == 48.59 
   && $a+$c+$t+$g+$n == $slice->length);


$slice = $slice_adaptor->fetch_by_region('chromosome', '20', 10, 30);

my $sr_slice = $slice->seq_region_Slice();

ok($sr_slice->start() == 1 &&
   $sr_slice->end()   == $slice->seq_region_length() &&
   $sr_slice->strand() == 1);






# synonym tests
debug("START syn test");
my $multi = $multi_db;
$multi->save("core", "seq_region_synonym");

debug("get slice");
$slice = $slice_adaptor->fetch_by_region('chromosome', 20, 1, 10);

my @alt_names = @{$slice->get_all_synonyms()};
foreach my $syn (@alt_names){
  debug("syn\t".$syn->name."\n");
}
debug("altnames ".scalar(@alt_names)."\n");
ok(@alt_names == 2);


$slice->add_synonym("20ish");
@alt_names = @{$slice->get_all_synonyms()};

ok(@alt_names == 3);


#slcie aleady stored so need to store syns
my $syn_adap =  $db->get_SeqRegionSynonymAdaptor; 
foreach my $syn (@alt_names){
 $syn_adap->store($syn);	
}

$slice = $slice_adaptor->fetch_by_region('chromosome', 20, 1, 10);

@alt_names = @{$slice->get_all_synonyms()};

ok(@alt_names == 3);

$multi->restore();



$multi->save("core", 'seq_region_synonym');

$slice = $slice_adaptor->fetch_by_region('chromosome', 1, 1, 10);

@alt_names = @{$slice->get_all_synonyms()};

ok(@alt_names == 0);

$slice->add_synonym("1ish");

@alt_names = @{$slice->get_all_synonyms()};

ok(@alt_names == 1);

foreach my $syn (@alt_names){
 $syn_adap->store($syn);	
}


$slice = $slice_adaptor->fetch_by_region('chromosome', 1, 1, 10);

@alt_names = @{$slice->get_all_synonyms()};

ok(@alt_names == 1);

$multi->restore();
# Testing synonym searching
{
  my $chr_20 = $slice_adaptor->fetch_by_region('chromosome', 20);
  my ($syn) = @{$chr_20->get_all_synonyms('RFAM')};
  is($syn->name(), 'anoth_20', 'We have the right synonym');
  dies_ok { $chr_20->get_all_synonyms('RFAM', 'wibble') } 'Bad external DB version means dying code';
  dies_ok { $chr_20->get_all_synonyms('RFAMing', 'wibble') } 'Bad external DB name means dying code';
  ($syn) = @{$chr_20->get_all_synonyms('RFAM', 1)};
  is($syn->name(), 'anoth_20', 'We have the right synonym');
}


#Test assembly exception type on HAP
my $hap_slice = $slice_adaptor->fetch_by_region(undef, '20_HAP1');
is($hap_slice->assembly_exception_type(), 'HAP', 'Ensuring haplotype regions are HAP');
my $chr_one_slice = $slice_adaptor->fetch_by_region('chromosome', '1', 1, 10);
is($chr_one_slice->assembly_exception_type(), 'REF', 'Ensuring reference regions are REF');

#Test slice iterator
{
  my $large_slice = $slice_adaptor->fetch_by_region('chromosome', 1, 1, 21);
  my $map = sub { $_->length() };
  my $si = sub {
    my ($chunk) = @_;
    return $large_slice->sub_Slice_Iterator($chunk)->map($map)->to_arrayref();
  };
  is_deeply($si->(100), [21], 'Subslice larger than actual slice gives just 1 slice back');
  is_deeply($si->(10), [10,10,1], 'Subslice smaller than actual slice gives 3 slices back');
  is_deeply($si->(20), [20,1], 'Subslice just smaller than actual slice gives 2 slices back');
  is_deeply($si->(21), [21], 'Subslice equal to slice size gives 1 slice back');
  my $slice_count = $large_slice->sub_Slice_Iterator(1)->reduce(sub { $_[0]+1 }, 0);
  is($slice_count, 21, 'Giving a subslice size of 1 means 21 slices');
  
  {
    my $fake_slice = Bio::EnsEMBL::Slice->new(-SEQ => 'AAACCCTTTGGGA', 
      -START => 1, -END => 13, -SEQ_REGION_NAME => 'fake', 
      -COORD_SYSTEM => $coord_system);
    my $subseqs = $fake_slice->sub_Slice_Iterator(3)->map(sub { $_->seq() })->to_arrayref();
    my $expected = ['AAA','CCC','TTT','GGG','A'];
    is_deeply($subseqs, $expected, 'Calling seq on subslices returns only the sequence for the bounds');
  }
  
  {
    my $one_bp_slice = Bio::EnsEMBL::Slice->new(-SEQ => 'A', 
      -START => 1, -END => 1, -SEQ_REGION_NAME => 'fake', 
      -COORD_SYSTEM => $coord_system);
    my $subseqs = $one_bp_slice->sub_Slice_Iterator(1)->map(sub { $_->seq() })->to_arrayref();
    my $expected = ['A'];
    is_deeply($subseqs, $expected, 'Calling seq on subslices for 1bp slice returns only an A');
  }
}

# Test alternative region mappings from fetch_by_seq_region_id()
{
  my $current_slice = $slice_adaptor->fetch_by_region('chromosome', $CHR, $START, $END);
  my $alternative_seq_region_id = 1;

  #Get the alternative seq region id. Expect nothing
  ok(! defined $slice_adaptor->fetch_by_seq_region_id($alternative_seq_region_id), 'Asking for a non-existent ID results in no slice returned');

  #Save the tables and add the mapping values in
  my @tables = ('seq_region_mapping', 'mapping_set');
  $multi_db->save('core', @tables);
  my $ms_sql = 'insert into mapping_set (mapping_set_id, internal_schema_build, external_schema_build) values (?,?,?)';
  $db->dbc->sql_helper->execute_update(-SQL => $ms_sql, -PARAMS => [1, $db->_get_schema_build(), 'oldbuild']);
  my $srm_sql = 'insert into seq_region_mapping (mapping_set_id, internal_seq_region_id, external_seq_region_id) values (?,?,?)';
  $db->dbc->sql_helper->execute_update(-SQL => $srm_sql, -PARAMS => [1, $current_slice->get_seq_region_id(), $alternative_seq_region_id]);


  #Force a refresh in CSA
  delete $db->get_CoordSystemAdaptor->{$_} for qw/_internal_seq_region_mapping _external_seq_region_mapping/;
  $db->get_CoordSystemAdaptor->_cache_seq_region_mapping();
  my $alternative_slice = $slice_adaptor->fetch_by_seq_region_id($alternative_seq_region_id);
  ok(!defined $alternative_slice, 'Cannot retrieve the alternative slice without asking to look at alternatives');
  $alternative_slice = $slice_adaptor->fetch_by_seq_region_id($alternative_seq_region_id, undef, undef, undef, 1); #don't care about start,end,strand
  ok(defined $alternative_slice, 'Got a slice after asking for it');
  cmp_ok($current_slice->get_seq_region_id(), '==', $alternative_slice->get_seq_region_id(), 'Seq region IDs should be equivalent even though query seq_region_id was different');
  
  #Restore & force a refresh
  $multi_db->restore('core', @tables);
  delete $db->get_CoordSystemAdaptor->{$_} for qw/_internal_seq_region_mapping _external_seq_region_mapping/;
  $db->get_CoordSystemAdaptor->_cache_seq_region_mapping();

  # Just checking we are back to normal
  $alternative_slice = $slice_adaptor->fetch_by_seq_region_id($alternative_seq_region_id);
  ok(!defined $alternative_slice, 'Cannot retrieve the alternative slice post restore');
}