Skip to content
Snippets Groups Projects
Commit e38051a4 authored by Kieron Taylor's avatar Kieron Taylor :angry:
Browse files

Basic tests for TranscriptSelector.pm

Tests null cases and basic functioning of sorting algorithm.
parent 923cc6c4
No related branches found
No related tags found
No related merge requests found
use strict;
use warnings;
use Test::More;
use Test::Exception;
use Test::MockObject;
use Test::MockObject::Extends;
use Bio::EnsEMBL::Utils::TranscriptSelector;
use Bio::EnsEMBL::Exon;
use Bio::EnsEMBL::Gene;
use Bio::EnsEMBL::Transcript;
use Bio::EnsEMBL::Translation;
use Bio::EnsEMBL::CoordSystem;
use Bio::EnsEMBL::Analysis;
my $transcript_selector = Bio::EnsEMBL::Utils::TranscriptSelector->new(undef, 'VERBOSE');
# Test the sorting algorithm
# encoded arrays as follows:
#[transcript dbID, source , biotype, translation length, transcript length, stable ID]
my $sortables = [
[qw( a 1 1 500 250 ENST7 )],
[qw( b 2 1 500 250 ENST6 )],
[qw( c 3 1 500 250 ENST5 )],
[qw( d 1 1 450 250 ENST4 )],
[qw( e 1 1 500 250 ENST3 )],
[qw( f 3 3 0 700 ENST2 )],
[qw( g 3 3 0 700 ENST1 )],
];
my $sorted = $transcript_selector->sort_into_canonical_order($sortables);
print "Sorted order:\n";
print join(',',@$sorted);
print "\n";
my $correct_order = [qw(e a d b c g f)];
is_deeply($sorted,$correct_order,'Canonical sort order');
$sortables = [];
$sorted = $transcript_selector->sort_into_canonical_order($sortables);
print join(',',@$sorted);
is(scalar(@$sorted), 0,'Null data into sort.');
# create a mock CCDS dba to test db-dependent code
# returns a pretend Slice object containing some test genes
# This tests check_Ens_trans_against_CCDS() and shortcuts
# the need for a slice adaptor.
my $mock_sa = Test::MockObject->new();
$mock_sa->set_isa('Bio::EnsEMBL::DBSQL::SliceAdaptor');
$mock_sa->mock('get_seq_region_id', sub {
return 1;
});
my $coord_system = Bio::EnsEMBL::CoordSystem->new(
-NAME => 'landofgiants',
-TOP_LEVEL => 0,
-RANK => 1,
-DBID => 1,
);
my $slice = Bio::EnsEMBL::Slice->new(
-START => 1,
-END => 10000,
-STRAND => 1,
-SEQ_REGION_LENGTH => 1e4,
-SEQ_REGION_NAME => '1',
-COORD_SYSTEM => $coord_system,
-ADAPTOR => $mock_sa,
-SEQ => 'N' x 10000,
);
my $other_slice = Bio::EnsEMBL::Slice->new(
-START => 1,
-END => 4000,
-STRAND => -1,
-SEQ_REGION_LENGTH => 4e3,
-SEQ_REGION_NAME => '1',
-COORD_SYSTEM => $coord_system,
-ADAPTOR => $mock_sa,
-SEQ => 'A' x 4000,
);
$slice = Test::MockObject::Extends->new($slice);
$slice->mock('get_all_Attributes', sub {return [];});
my $exon = Bio::EnsEMBL::Exon->new(
-START => 1000,
-END => 2000,
-STRAND => 1,
-DBID => 15,
-STABLE_ID => 'ENSE01',
-SLICE => $slice,
);
my $analysis = Bio::EnsEMBL::Analysis->new(
-id => 1,
-logic_name => 'bananas_are_nice',
);
my $transcript1 = Bio::EnsEMBL::Transcript->new(
-DBID => 1,
-STABLE_ID => 'ENST01',
-BIOTYPE => 'protein_coding',
-IS_CURRENT => 1,
-SLICE => $slice,
-ANALYSIS => $analysis,
);
$transcript1 = Test::MockObject::Extends->new($transcript1);
$transcript1->mock('translate', sub {return $slice});
my $transcript2 = Bio::EnsEMBL::Transcript->new(
-DBID => 2,
-STABLE_ID => 'ENST02',
-BIOTYPE => 'nonsense_mediated_decay',
-IS_CURRENT => 1,
-SLICE => $slice,
-ANALYSIS => $analysis,
);
$transcript2 = Test::MockObject::Extends->new($transcript2);
$transcript2->mock('translate', sub {return $slice});
my $transcript3 = Bio::EnsEMBL::Transcript->new(
-DBID => 3,
-STABLE_ID => 'ENST03',
-BIOTYPE => 'flying_poofish',
-IS_CURRENT => 1,
-SLICE => $slice,
-ANALYSIS => $analysis,
);
$transcript3 = Test::MockObject::Extends->new($transcript3);
$transcript3->mock('translate', sub {return $other_slice});
$transcript2->add_Exon($exon);
$transcript2->translation(Bio::EnsEMBL::Translation->new(
-START_EXON => $exon,
-END_EXON => $exon,
-SEQ_START => 1,
-SEQ_END => 1001,
)
);
$transcript1->add_Exon($exon);
$transcript3->add_Exon($exon);
my $transcripts = [ $transcript1, $transcript2, $transcript3 ];
my $gene = Bio::EnsEMBL::Gene->new(
-START => 123,
-END => 2045,
-STRAND => 1,
-BIOTYPE => 'protein_coding',
-TRANSCRIPTS => $transcripts,
-SLICE => $slice,
);
my $mock_slice = Test::MockObject->new();
$mock_slice->mock('get_seq_region_id', sub {
return 1;
});
$mock_slice->mock('is_circular', sub { return 0;});
$mock_slice->mock('get_all_Genes', sub {
return [$gene];
});
$mock_sa->mock('fetch_by_region', sub {
return $mock_slice;
});
$mock_sa->mock('is_reference', sub {
return 1;
});
$mock_sa->mock('is_circular', sub { return 0;});
my $fake_seq_adaptor = Test::MockObject->new();
$fake_seq_adaptor->mock('fetch_by_Slice_start_end_strand', sub {my $seq = 'A'x 20; return \$seq;});
my $fake_db = Test::MockObject->new();
$fake_db->mock('get_SequenceAdaptor', sub {return $fake_seq_adaptor;});
$mock_sa->mock('db', sub {return $fake_db;});
my $mock_dba = Test::MockObject->new();
$mock_dba->mock('get_SliceAdaptor', sub {
return $mock_sa;
});
$transcript_selector = Bio::EnsEMBL::Utils::TranscriptSelector->new($mock_dba, 'VERBOSE');
ok($transcript_selector->check_Ens_trans_against_CCDS($transcript2),'CCDS transcript lookup with good data');
ok($transcript_selector->check_Ens_trans_against_CCDS($transcript3) != 1,'CCDS transcript lookup with non-coding');
my $canonical_transcript = $transcript_selector->select_canonical_transcript_for_Gene($gene);
is($canonical_transcript->stable_id, 'ENST02','Full select canonical transcript');
my $transcript4 = Bio::EnsEMBL::Transcript->new(
-DBID => 5,
-STABLE_ID => 'ENST04',
-BIOTYPE => 'protein_coding',
-IS_CURRENT => 1,
-SLICE => $slice,
-ANALYSIS => $analysis,
);
$transcript4->add_Exon($exon);
my $transcript5 = Bio::EnsEMBL::Transcript->new(
-DBID => 4,
-STABLE_ID => 'ENST05',
-BIOTYPE => 'protein_coding',
-IS_CURRENT => 1,
-SLICE => $slice,
-ANALYSIS => $analysis,
);
$transcript5->add_Exon($exon);
$transcripts = [ $transcript1, $transcript3, $transcript4, $transcript5 ];
$gene = Bio::EnsEMBL::Gene->new(
-START => 123,
-END => 2045,
-STRAND => 1,
-BIOTYPE => 'protein_coding',
-TRANSCRIPTS => $transcripts,
-SLICE => $slice,
);
$canonical_transcript = $transcript_selector->select_canonical_transcript_for_Gene($gene);
is($canonical_transcript->stable_id, 'ENST01', 'Sorting with no CCDS option and equal lengths');
$gene = Bio::EnsEMBL::Gene->new(
-STABLE_ID => 'ENSGFAKE',
-START => 123,
-END => 2045,
-STRAND => 1,
-BIOTYPE => 'protein_coding',
-SLICE => $slice,
);
$canonical_transcript = $transcript_selector->select_canonical_transcript_for_Gene($gene);
note ($canonical_transcript);
is($canonical_transcript, undef, "Gene with no transcripts, fault tolerance");
done_testing();
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment