Skip to content
Snippets Groups Projects
Commit acea98ea authored by Graham McVicker's avatar Graham McVicker
Browse files

added optimisation: Transcripts and Exons can now be loaded immediately on...

added optimisation: Transcripts and Exons can now be loaded immediately on Gene fetch rather than lazy-loading later
parent d1f6562d
No related branches found
No related tags found
No related merge requests found
......@@ -268,6 +268,110 @@ sub fetch_all_by_domain {
=head2 fetch_all_by_Slice
Arg [1] : Bio::EnsEMBL::Slice $slice
The slice to fetch genes on.
Arg [3] : (optional) boolean $load_transcripts
if true, transcripts will be loaded immediately rather than
lazy loaded later.
Example : @genes = @{$gene_adaptor->fetch_all_by_Slice()};
Description: Overrides superclass method to optionally load transcripts
immediately rather than lazy-loading them later. This
is more efficient when there are a lot of genes whose
transcripts are going to be used.
Returntype : reference to list of transcripts
Exceptions : thrown if exon cannot be placed on transcript slice
Caller : Slice::get_all_Transcripts
=cut
sub fetch_all_by_Slice {
my $self = shift;
my $slice = shift;
my $logic_name = shift;
my $load_exons = shift;
my $genes = $self->SUPER::fetch_all_by_Slice($slice, $logic_name);
# if there are 0 or 1 genes still do lazy-loading
if(!$load_exons || @$genes < 2) {
return $genes;
}
# preload all of the transcripts now, instead of lazy loading later
# faster than 1 query per transcript
# get extent of region spanned by transcripts
my ($min_start, $max_end);
foreach my $g (@$genes) {
if(!defined($min_start) || $g->start() < $min_start) {
$min_start = $g->start();
}
if(!defined($max_end) || $g->end() > $max_end) {
$max_end = $g->end();
}
}
$min_start += $slice->start() - 1;
$max_end += $slice->start() - 1;
my $ext_slice;
if($min_start >= $slice->start() && $max_end <= $slice->end()) {
$ext_slice = $slice;
} else {
my $sa = $self->db()->get_SliceAdaptor();
$ext_slice = $sa->fetch_by_region
($slice->coord_system->name(), $slice->seq_region_name(),
$min_start,$max_end, $slice->strand(), $slice->coord_system->version());
}
# associate transcript identifiers with genes
my %g_hash = map {$_->dbID => $_} @$genes;
my $g_id_str = '(' . join(',', keys %g_hash) . ')';
my $sth = $self->prepare("SELECT gene_id, transcript_id " .
"FROM transcript " .
"WHERE gene_id IN $g_id_str");
$sth->execute();
my ($g_id, $tr_id);
$sth->bind_columns(\$g_id, \$tr_id);
my %tr_g_hash;
while($sth->fetch()) {
$tr_g_hash{$tr_id} = $g_hash{$g_id};
}
$sth->finish();
my $ta = $self->db()->get_TranscriptAdaptor();
my $transcripts = $ta->fetch_all_by_Slice($ext_slice,1);
# move transcripts onto gene slice, and add them to genes
foreach my $tr (@$transcripts) {
$tr = $tr->transfer($slice) if($slice != $ext_slice);
if(!$tr) {
throw("Unexpected. Transcript could not be transfered onto Gene slice.");
}
$tr_g_hash{$tr->dbID()}->add_Transcript($tr);
}
return $genes;
}
=head2 fetch_by_transcript_id
Arg [1] : int $transid
......
......@@ -327,7 +327,7 @@ sub fetch_all_by_Slice {
$ext_slice = $slice;
} else {
my $sa = $self->db()->get_SliceAdaptor();
my $ext_slice = $sa->fetch_by_region
$ext_slice = $sa->fetch_by_region
($slice->coord_system->name(), $slice->seq_region_name(),
$min_start,$max_end, $slice->strand(), $slice->coord_system->version());
}
......
......@@ -1250,9 +1250,18 @@ sub get_all_SNPs_transcripts {
=head2 get_all_Genes
Arg [1] : (optional) string $logic_name
The name of the analysis used to generate the genes to retrieve
The name of the analysis used to generate the genes to retrieve
Arg [2] : (optional) string $dbtype
The dbtype of genes to obtain. This assumes that the db has
been added to the DBAdaptor under this name (using the
DBConnection::add_db_adaptor method).
Arg [3] : (optional) boolean $load_transcripts
If set to true, transcripts will be loaded immediately rather
than being lazy-loaded on request. This will result in a
significant speed up if the Transcripts and Exons are going to
be used (but a slow down if they are not).
Example : @genes = @{$slice->get_all_Genes};
Description: Retrieves all genes that overlap this slice.
Description: Retrieves all genes that overlap this slice.
Returntype : listref of Bio::EnsEMBL::Genes
Exceptions : none
Caller : none
......@@ -1260,7 +1269,7 @@ sub get_all_SNPs_transcripts {
=cut
sub get_all_Genes{
my ($self, $logic_name, $dbtype) = @_;
my ($self, $logic_name, $dbtype, $load_transcripts) = @_;
if(!$self->adaptor()) {
warning('Cannot get Genes without attached adaptor');
......@@ -1278,7 +1287,9 @@ sub get_all_Genes{
$db = $self->adaptor->db;
}
return $db->get_GeneAdaptor()->fetch_all_by_Slice( $self, $logic_name );
my $ga = $db->get_GeneAdaptor();
return $ga->fetch_all_by_Slice( $self, $logic_name, $load_transcripts);
}
=head2 get_all_Genes_by_type
......@@ -1286,6 +1297,11 @@ sub get_all_Genes{
Arg [1] : string $type
Arg [2] : (optional) string $logic_name
Arg [3] : (optional) boolean $load_transcripts
If set to true, transcripts will be loaded immediately rather
than being lazy-loaded on request. This will result in a
significant speed up if the Transcripts and Exons are going to
be used (but a slow down if they are not).
Example : @genes = @{$slice->get_all_Genes_by_type($type, 'ensembl')};
Description: Retrieves genes that overlap this slice of type $type.
This is primarily used by the genebuilding code when several
......@@ -1301,14 +1317,15 @@ sub get_all_Genes{
=cut
sub get_all_Genes_by_type{
my ($self, $type, $logic_name) = @_;
my ($self, $type, $logic_name, $load_transcripts) = @_;
if(!$self->adaptor()) {
warning('Cannot get Genes without attached adaptor');
return [];
}
my @out = grep { $_->type eq $type } @{ $self->get_all_Genes($logic_name)};
my @out = grep { $_->type eq $type }
@{ $self->get_all_Genes($logic_name, $load_transcripts)};
return \@out;
}
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment