Skip to content
Snippets Groups Projects
Commit 2d892a32 authored by Matthew Laird's avatar Matthew Laird
Browse files

- Off by one error in spliced_seq masking for reverse strand transcripts

- Added tests to cover UTR-coding sequence boundary for forward and reverse strands
parent 6c5b0e50
No related branches found
No related tags found
No related merge requests found
......@@ -833,7 +833,6 @@ sub spliced_seq {
my $seq_string = "";
for my $ex ( @{$self->get_all_Exons()} ) {
my $seq = $ex->seq();
if(!$seq) {
warning("Could not obtain seq for exon. Transcript sequence may not " .
"be correct.");
......@@ -850,7 +849,7 @@ sub spliced_seq {
if ($ex->strand == 1) {
$exon_seq = lc (substr($exon_seq, 0, $forward_length)) . substr($exon_seq, $forward_length);
} else {
$exon_seq = substr($exon_seq, 0, $reverse_length) . lc(substr($exon_seq, $reverse_length));
$exon_seq = substr($exon_seq, 0, $reverse_length+1) . lc(substr($exon_seq, $reverse_length+1));
}
} elsif ($ex->coding_region_end($self) < $ex->end()) {
my $forward_length = $ex->coding_region_end($self) - $ex->start();
......
......@@ -152,6 +152,8 @@ is ( substr( $tr->spliced_seq(), 0, 10 ), "ACGAGACGAA", 'Start of spliced seq is
is ( substr( $tr->spliced_seq(1), 0, 10 ), "acgagacgaa", 'Spliced seq with utr lower casing is correct');
is ( length($tr->spliced_seq()), length($tr->spliced_seq(1)), "Spliced seq with or without utr lower casing has the same length");
is ( $tr->spliced_seq(), uc($tr->spliced_seq(1)), "Spliced seq is identical to upper case utr masked spliced seq");
is ( substr($tr->spliced_seq(1), 61, 6), 'aagATG', 'Start mask boundary on forward stand transcript is correct' );
is ( substr($tr->spliced_seq(1), 865, 6), 'TATtaa', 'End mask boundary on forward stand transcript is correct' );
is ( substr( $tr->translateable_seq(),0,10 ), "ATGGCAGTGA", 'Start of translateable sequence is correct' );
......@@ -239,6 +241,18 @@ is ( $up_tr->display_xref->dbID(), 614, 'Fetched the correct display xref id');
$multi->restore('core', 'transcript', 'meta_coord');
#
# Test spliced_seq on a reverse strand transcript
#
$tr = $ta->fetch_by_stable_id( "ENST00000246229" );
is ( substr( $tr->spliced_seq(), 0, 10 ), "ATGGCCCGAC", 'Start of spliced seq is correct, rev strand' );
is ( substr( $tr->spliced_seq(1), 0, 10 ), "atggcccgac", 'Spliced seq with utr lower casing is correct, rev strand');
is ( length($tr->spliced_seq()), length($tr->spliced_seq(1)), "Spliced seq with or without utr lower casing has the same length, rev strand");
is ( $tr->spliced_seq(), uc($tr->spliced_seq(1)), "Spliced seq is identical to upper case utr masked spliced seq, rev strand");
is ( substr($tr->spliced_seq(1), 199, 6), 'gccATG', 'Start mask boundary on forward stand transcript is correct, rev strand' );
is ( substr($tr->spliced_seq(1), 1687, 6), 'CAGtag', 'End mask boundary on forward stand transcript is correct, rev strand' );
my $interpro = $ta->get_Interpro_by_transid("ENST00000252021");
......@@ -278,7 +292,6 @@ is(@transcripts, 27, 'Got 27 transcript');
$transcriptCount = $ta->count_all_by_biotype(['protein_coding', 'pseudogene']);
is($transcriptCount, 27, 'Count by biotype is correct');
#
# test TranscriptAdaptor::fetch_all_by_Slice
#
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment