Skip to content
Snippets Groups Projects
Commit be06192b authored by Graham McVicker's avatar Graham McVicker
Browse files

transcript fetching optimisations, allows optional immediate exon loading as...

transcript fetching optimisations, allows optional immediate exon loading as opposed to lazy-loading
parent 8678747f
No related branches found
No related tags found
No related merge requests found
......@@ -60,6 +60,8 @@ use Bio::EnsEMBL::Translation;
use Bio::EnsEMBL::Utils::Exception qw( deprecate throw warning );
use Time::HiRes qw(time);
@ISA = qw( Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor );
......@@ -272,6 +274,109 @@ sub fetch_all_by_Gene {
=head2 fetch_all_by_Slice
Arg [1] : Bio::EnsEMBL::Slice $slice
The slice to fetch transcripts on.
Arg [3] : (optional) boolean $load_exons
if true, exons will be loaded immediately rather than
lazy loaded later.
Example : $transcripts = $
Description: Overrides superclass method to optionally load exons
immediately rather than lazy-loading them later. This
is more efficient when there are a lot of transcripts whose
exons are going to be used.
Returntype : reference to list of transcripts
Exceptions : thrown if exon cannot be placed on transcript slice
Caller : Slice::get_all_Transcripts
=cut
sub fetch_all_by_Slice {
my $self = shift;
my $slice = shift;
my $load_exons = shift;
my $transcripts = $self->SUPER::fetch_all_by_Slice($slice);
# if there are 0 or 1 transcripts still do lazy-loading
if(!$load_exons || @$transcripts < 2) {
return $transcripts;
}
# preload all of the exons now, instead of lazy loading later
# faster than 1 query per transcript
# get extent of region spanned by transcripts
my ($min_start, $max_end);
foreach my $tr (@$transcripts) {
if(!defined($min_start) || $tr->start() < $min_start) {
$min_start = $tr->start();
}
if(!defined($max_end) || $tr->end() > $max_end) {
$max_end = $tr->end();
}
}
$min_start += $slice->start() - 1;
$max_end += $slice->start() - 1;
my $ext_slice;
if($min_start >= $slice->start() && $max_end <= $slice->end()) {
$ext_slice = $slice;
} else {
my $sa = $self->db()->get_SliceAdaptor();
my $ext_slice = $sa->fetch_by_region
($slice->coord_system->name(), $slice->seq_region_name(),
$min_start,$max_end, $slice->strand(), $slice->coord_system->version());
}
# associate exon identifiers with transcripts
my %tr_hash = map {$_->dbID => $_} @$transcripts;
my $tr_id_str = '(' . join(',', keys %tr_hash) . ')';
my $sth = $self->prepare("SELECT transcript_id, exon_id, rank " .
"FROM exon_transcript " .
"WHERE transcript_id IN $tr_id_str");
$sth->execute();
my ($ex_id, $tr_id, $rank);
$sth->bind_columns(\$tr_id, \$ex_id, \$rank);
my %ex_tr_hash;
while($sth->fetch()) {
$ex_tr_hash{$ex_id} ||= [];
push @{$ex_tr_hash{$ex_id}}, [$tr_hash{$tr_id}, $rank];
}
$sth->finish();
my $ea = $self->db()->get_ExonAdaptor();
my $exons = $ea->fetch_all_by_Slice($ext_slice);
# move exons onto transcript slice, and add them to transcripts
foreach my $ex (@$exons) {
$ex = $ex->transfer($slice) if($slice != $ext_slice);
if(!$ex) {
throw("Unexpected. Exon could not be transfered onto transcript slice.");
}
foreach my $row (@{$ex_tr_hash{$ex->dbID()}}) {
my ($tr, $rank) = @$row;
$tr->add_Exon($ex, $rank);
}
}
return $transcripts;
}
=head2 fetch_all_by_external_name
......
......@@ -1316,8 +1316,11 @@ sub get_all_Genes_by_type{
=head2 get_all_Transcripts
Arg [1] : none
Example : @transcripts = @{$slice->get_all_Transcripts};
Arg [1] : (optional) boolean $load_exons
If set to true exons will not be lazy-loaded but will instead
be loaded right away. This is faster if the exons are
actually going to be used right away.
Example : @transcripts = @{$slice->get_all_Transcripts)_};
Description: Gets all transcripts which overlap this slice. If you want to
specify a particular analysis or type, then you are better off
using get_all_Genes or get_all_Genes_by_type and iterating
......@@ -1330,13 +1333,15 @@ sub get_all_Genes_by_type{
sub get_all_Transcripts {
my $self = shift;
my $load_exons = shift;
if(!$self->adaptor()) {
warning('Cannot get Transcripts without attached adaptor');
return [];
}
return $self->adaptor->db->get_TranscriptAdaptor->fetch_all_by_Slice($self);
my $ta = $self->adaptor()->db()->get_TranscriptAdaptor();
return $ta->fetch_all_by_Slice($self, $load_exons);
}
......
......@@ -791,7 +791,7 @@ sub add_Attributes {
=cut
sub add_Exon{
my ($self,$exon) = @_;
my ($self,$exon, $rank) = @_;
#yup - we are going to be picky here...
unless(defined $exon && ref $exon && $exon->isa("Bio::EnsEMBL::Exon") ) {
......@@ -800,6 +800,11 @@ sub add_Exon{
$self->{'_trans_exon_array'} ||= [];
if(defined($rank)) {
$self->{'_trans_exon_array'}->[$rank-1] = $exon;
return;
}
my $was_added = 0;
my $ea = $self->{'_trans_exon_array'};
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment