Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
ensembl-gh-mirror
ensembl
Commits
c4a5cddc
Commit
c4a5cddc
authored
Apr 22, 2004
by
Graham McVicker
Browse files
tidied and split large fetch_all_by_Slice_constraint method into 2 smaller methods
parent
20789164
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
146 additions
and
163 deletions
+146
-163
modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm
modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm
+146
-163
No files found.
modules/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm
View file @
c4a5cddc
...
...
@@ -40,8 +40,8 @@ use Bio::EnsEMBL::Utils::Argument qw(rearrange);
@ISA
=
qw(Bio::EnsEMBL::DBSQL::BaseAdaptor)
;
my
$SLICE_FEATURE_CACHE_SIZE
=
4
;
my
$MAX_SPLIT_QUERY_SEQ_REGIONS
=
3
;
our
$SLICE_FEATURE_CACHE_SIZE
=
4
;
our
$MAX_SPLIT_QUERY_SEQ_REGIONS
=
3
;
=head2 new
...
...
@@ -72,19 +72,17 @@ sub new {
=head2
_straight_join
#
_straight_join
Arg [1] : (optional) boolean $new_val
Example : $self->_straight_join(1);
$self->generic_fetch($constraint);
$self->_straight_join(0);
Description: Getter/Setter that turns on/off the use of a straight join
in queries.
Returntype : boolean
Exceptions : none
Caller : general
=cut
# Arg [1] : (optional) boolean $new_val
# Example : $self->_straight_join(1);
# $self->generic_fetch($constraint);
# $self->_straight_join(0);
# Description: PROTECTED Getter/Setter that turns on/off the use of
# a straight join in queries.
# Returntype : boolean
# Exceptions : none
# Caller : general
sub
_straight_join
{
my
$self
=
shift
;
...
...
@@ -255,8 +253,8 @@ sub fetch_all_by_dbID_list {
my
@tabs
=
$self
->
_tables
;
my
(
$name
,
$syn
)
=
@
{
$tabs
[
0
]};
#mysql is faster and we ensure that we do not exceed the max query size by
#splitting large queries into smaller queries of 200 ids
#
mysql is faster and we ensure that we do not exceed the max query size by
#
splitting large queries into smaller queries of 200 ids
my
$max_size
=
200
;
while
(
@$id_list
)
{
...
...
@@ -267,7 +265,6 @@ sub fetch_all_by_dbID_list {
@ids
=
splice
(
@$id_list
,
0
);
}
my
$id_str
;
if
(
@ids
>
1
)
{
$id_str
=
"
IN (
"
.
join
('
,
',
@ids
)
.
"
)
";
...
...
@@ -366,175 +363,184 @@ sub fetch_all_by_Slice_and_score {
=cut
sub
fetch_all_by_Slice_constraint
{
my
(
$self
,
$orig_slice
,
$original_constraint
,
$logic_name
)
=
@_
;
my
@result_features
;
my
(
$self
,
$slice
,
$constraint
,
$logic_name
)
=
@_
;
my
@result
;
if
(
!
ref
(
$
orig_
slice
)
||
!
$
orig_
slice
->
isa
("
Bio::EnsEMBL::Slice
"))
{
if
(
!
ref
(
$slice
)
||
!
$slice
->
isa
("
Bio::EnsEMBL::Slice
"))
{
throw
("
Bio::EnsEMBL::Slice argument expected.
");
}
$original_constraint
||=
'';
$original_constraint
=
$self
->
_logic_name_to_constraint
(
$original_constraint
,
$logic_name
);
$constraint
||=
'';
$constraint
=
$self
->
_logic_name_to_constraint
(
$constraint
,
$logic_name
);
#if the logic name was invalid, undef was returned
return
[]
if
(
!
defined
(
$
original_
constraint
));
return
[]
if
(
!
defined
(
$constraint
));
#check the cache and return if we have already done this query
my
$key
=
uc
(
join
('
:
',
$
orig_
slice
->
name
,
$
original_
constraint
));
my
$key
=
uc
(
join
('
:
',
$slice
->
name
,
$constraint
));
if
(
exists
(
$self
->
{'
_slice_feature_cache
'}
->
{
$key
}))
{
return
$self
->
{'
_slice_feature_cache
'}
->
{
$key
};
}
my
$s
lice_adaptor
=
$orig_
slice
->
adaptor
();
my
$s
a
=
$
slice
->
adaptor
();
#retrieve normalized 'non-symlinked' slices
#this allows us to support haplotypes and PARs
my
@projection
=
@
{
$slice_adaptor
->
fetch_normalized_slice_projection
(
$orig_slice
)};
# Hap/PAR support: retrieve normalized 'non-symlinked' slices
my
@proj
=
@
{
$sa
->
fetch_normalized_slice_projection
(
$slice
)};
if
(
@proj
ection
==
0
)
{
if
(
@proj
==
0
)
{
throw
('
Could not retrieve normalized Slices. Database contains
'
.
'
incorrect assembly_exception information.
');
}
#
we w
ant to
r
et
rieve all features calculated
on the FULL original slice
#as well as any symlinked slices
.
#
W
ant to
g
et
features
on the FULL original slice
#
as well as any symlinked slices
#Filter out any partial slices from the normalized projection that are on
#the same seq region as the original slice
my
@new_projection
=
grep
{
$_
->
[
2
]
->
seq_region_name
()
ne
$orig_slice
->
seq_region_name
()
}
@projection
;
# Filter out partial slices from projection that are on
# same seq_region as original slice
push
(
@new_projection
,
[
1
,
$orig_slice
->
length
(),
$orig_slice
]
);
my
$sr_id
=
$slice
->
get_seq_region_id
(
);
#fetch features for the primary slice AND all symlinked slices
foreach
my
$segment
(
@new_projection
)
{
my
(
$offset
,
$slice
);
(
$offset
,
undef
,
$slice
)
=
@$segment
;
@proj
=
grep
{
$_
->
to_Slice
->
get_seq_region_id
()
!=
$sr_id
}
@proj
;
my
$slice_start
=
$slice
->
start
();
my
$slice_end
=
$slice
->
end
();
my
$slice_strand
=
$slice
->
strand
();
my
$slice_cs
=
$slice
->
coord_system
();
my
$slice_seq_region
=
$slice
->
seq_region_name
();
my
$segment
=
bless
([
1
,
$slice
->
length
(),
$slice
],
'
Bio::EnsEMBL::ProjectionSegment
');
push
(
@proj
,
$segment
);
#get the synonym and name of the primary_table
my
@tabs
=
$self
->
_tables
;
my
(
$tab_name
,
$tab_syn
)
=
@
{
$tabs
[
0
]};
# fetch features for the primary slice AND all symlinked slices
foreach
my
$segment
(
@proj
)
{
my
$offset
=
$segment
->
from_start
();
my
$seg_slice
=
$segment
->
to_Slice
();
#find out what coordinate systems the features are in
my
$mcc
=
$self
->
db
->
get_MetaCoordContainer
();
my
@feat_css
=
@
{
$mcc
->
fetch_all_CoordSystems_by_feature_type
(
$tab_name
)};
my
$features
=
$self
->
_slice_fetch
(
$seg_slice
,
$constraint
);
my
$asma
=
$self
->
db
->
get_AssemblyMapperAdaptor
();
my
@features
;
# if this was a symlinked slice offset the feature coordinates as needed
if
(
$seg_slice
->
name
()
ne
$slice
->
name
())
{
foreach
my
$f
(
@$features
)
{
if
(
$offset
!=
1
)
{
$f
->
{'
start
'}
+=
$offset
-
1
;
$f
->
{'
end
'}
+=
$offset
-
1
;
}
$f
->
{'
slice
'}
=
$slice
;
push
@result
,
$f
;
}
}
else
{
push
@result
,
@$features
;
}
}
$self
->
{'
_slice_feature_cache
'}
->
{
$key
}
=
\
@result
;
# fetch the features from each coordinate system they are stored in
COORD_SYSTEM:
foreach
my
$feat_cs
(
@feat_css
)
{
my
$mapper
;
my
@coords
;
my
@ids
;
return
\
@result
;
}
if
(
$feat_cs
->
equals
(
$slice_cs
))
{
#no mapping is required if this is the same coord system
my
$constraint
=
$original_constraint
;
# obtain seq_region_id of this slice from db
my
$seq_region_id
=
$self
->
db
->
get_SliceAdaptor
->
get_seq_region_id
(
$slice
);
$constraint
.=
"
AND
"
if
(
$constraint
);
$constraint
.=
"
${tab_syn}
.seq_region_id =
$seq_region_id
AND
"
.
#
# helper function used by fetch_all_by_Slice_constraint method
#
sub
_slice_fetch
{
my
$self
=
shift
;
my
$slice
=
shift
;
my
$orig_constraint
=
shift
;
my
$slice_start
=
$slice
->
start
();
my
$slice_end
=
$slice
->
end
();
my
$slice_strand
=
$slice
->
strand
();
my
$slice_cs
=
$slice
->
coord_system
();
my
$slice_seq_region
=
$slice
->
seq_region_name
();
#get the synonym and name of the primary_table
my
@tabs
=
$self
->
_tables
;
my
(
$tab_name
,
$tab_syn
)
=
@
{
$tabs
[
0
]};
#find out what coordinate systems the features are in
my
$mcc
=
$self
->
db
->
get_MetaCoordContainer
();
my
@feat_css
=
@
{
$mcc
->
fetch_all_CoordSystems_by_feature_type
(
$tab_name
)};
my
$asma
=
$self
->
db
->
get_AssemblyMapperAdaptor
();
my
@features
;
# fetch the features from each coordinate system they are stored in
COORD_SYSTEM:
foreach
my
$feat_cs
(
@feat_css
)
{
my
$mapper
;
my
@coords
;
my
@ids
;
if
(
$feat_cs
->
equals
(
$slice_cs
))
{
# no mapping is required if this is the same coord system
my
$constraint
=
$orig_constraint
;
my
$sr_id
=
$self
->
db
->
get_SliceAdaptor
->
get_seq_region_id
(
$slice
);
$constraint
.=
"
AND
"
if
(
$constraint
);
$constraint
.=
"
${tab_syn}
.seq_region_id =
$sr_id
AND
"
.
"
${tab_syn}
.seq_region_start <=
$slice_end
AND
"
.
"
${tab_syn}
.seq_region_end >=
$slice_start
";
my
$fs
=
$self
->
generic_fetch
(
$constraint
,
undef
,
$slice
);
my
$fs
=
$self
->
generic_fetch
(
$constraint
,
undef
,
$slice
);
#features may still have to have coordinates made relative to slice
#start
$fs
=
$self
->
_remap
(
$fs
,
$mapper
,
$slice
);
#
features may still have to have coordinates made relative to slice
#
start
$fs
=
$self
->
_remap
(
$fs
,
$mapper
,
$slice
);
push
@features
,
@$fs
;
}
else
{
$mapper
=
$asma
->
fetch_by_CoordSystems
(
$slice_cs
,
$feat_cs
);
# Get a list of coordinates and corresponding internal ids for the
# regions we are interested in
@coords
=
$mapper
->
map
(
$slice_seq_region
,
$slice_start
,
$slice_end
,
$slice_strand
,
$slice_cs
);
@coords
=
grep
{
!
$_
->
isa
('
Bio::EnsEMBL::Mapper::Gap
')}
@coords
;
next
COORD_SYSTEM
if
(
!
@coords
);
@ids
=
map
{
$_
->
id
()}
@coords
;
@ids
=
@
{
$asma
->
seq_regions_to_ids
(
$feat_cs
,
\
@ids
)};
#if the regions are large and only partially spanned
#it is faster to to limit the query with start and end constraints
#however, it is difficult to tell if a region is large and only
#partially wanted. The easy approach is just to limit the queries if
#there are less than a certain number of regions. As well seperate
#queries are needed otherwise the indices will not be useful
if
(
@coords
>
$MAX_SPLIT_QUERY_SEQ_REGIONS
)
{
#do one query, and do not limit with start / end constraints
my
$constraint
=
$original_constraint
;
my
$id_str
=
join
('
,
',
@ids
);
$constraint
.=
"
AND
"
if
(
$constraint
);
$constraint
.=
"
${tab_syn}
.seq_region_id IN (
$id_str
)
";
push
@features
,
@$fs
;
}
else
{
$mapper
=
$asma
->
fetch_by_CoordSystems
(
$slice_cs
,
$feat_cs
);
my
$fs
=
$self
->
generic_fetch
(
$constraint
,
$mapper
,
$slice
);
# Get list of coordinates and corresponding internal ids for
# regions the slice spans
@coords
=
$mapper
->
map
(
$slice_seq_region
,
$slice_start
,
$slice_end
,
$slice_strand
,
$slice_cs
);
$fs
=
$self
->
_remap
(
$fs
,
$mapper
,
$slice
)
;
@coords
=
grep
{
!
$_
->
isa
('
Bio::EnsEMBL::Mapper::Gap
')}
@coords
;
push
@features
,
@$fs
;
next
COORD_SYSTEM
if
(
!
@coords
);
@ids
=
map
{
$_
->
id
()}
@coords
;
@ids
=
@
{
$asma
->
seq_regions_to_ids
(
$feat_cs
,
\
@ids
)};
# When regions are large and only partially spanned by slice
# it is faster to to limit the query with start and end constraints.
# Take simple approach: use regional constraints if there are less
# than a specific number of regions covered.
if
(
@coords
>
$MAX_SPLIT_QUERY_SEQ_REGIONS
)
{
my
$constraint
=
$orig_constraint
;
my
$id_str
=
join
('
,
',
@ids
);
$constraint
.=
"
AND
"
if
(
$constraint
);
$constraint
.=
"
${tab_syn}
.seq_region_id IN (
$id_str
)
";
my
$fs
=
$self
->
generic_fetch
(
$constraint
,
$mapper
,
$slice
);
}
else
{
#do multiple split queries using start / end constraints
my
$len
=
@coords
;
for
(
my
$i
=
0
;
$i
<
$len
;
$i
++
)
{
my
$constraint
=
$original_constraint
;
$constraint
.=
"
AND
"
if
(
$constraint
);
$constraint
.=
$fs
=
$self
->
_remap
(
$fs
,
$mapper
,
$slice
);
push
@features
,
@$fs
;
}
else
{
# do multiple split queries using start / end constraints
my
$len
=
@coords
;
for
(
my
$i
=
0
;
$i
<
$len
;
$i
++
)
{
my
$constraint
=
$orig_constraint
;
$constraint
.=
"
AND
"
if
(
$constraint
);
$constraint
.=
"
${tab_syn}
.seq_region_id =
"
.
$ids
[
$i
]
.
"
AND
"
.
"
${tab_syn}
.seq_region_start <=
"
.
$coords
[
$i
]
->
end
()
.
"
AND
"
.
"
${tab_syn}
.seq_region_end >=
"
.
$coords
[
$i
]
->
start
();
my
$fs
=
$self
->
generic_fetch
(
$constraint
,
$mapper
,
$slice
);
my
$fs
=
$self
->
generic_fetch
(
$constraint
,
$mapper
,
$slice
);
$fs
=
$self
->
_remap
(
$fs
,
$mapper
,
$slice
);
push
@features
,
@$fs
;
}
}
}
}
#COORD system loop
$fs
=
$self
->
_remap
(
$fs
,
$mapper
,
$slice
);
#if this was a symlinked slice offset the feature coordinates as needed
if
(
$slice
!=
$orig_slice
)
{
foreach
my
$f
(
@features
)
{
#function calls are slow!
if
(
$offset
!=
1
)
{
$f
->
{'
start
'}
+=
$offset
-
1
;
$f
->
{'
end
'}
+=
$offset
-
1
;
push
@features
,
@$fs
;
}
$f
->
{'
slice
'}
=
$orig_slice
;
push
@result_features
,
$f
;
}
}
else
{
push
@result_features
,
@features
;
}
}
#COORD system loop
}
#slice & symmlinked slice loop
$self
->
{'
_slice_feature_cache
'}
->
{
$key
}
=
\
@result_features
;
return
\
@result_features
;
return
\
@features
;
}
#
# Helper function containing some common feature storing functionality
#
...
...
@@ -568,10 +574,9 @@ sub _pre_store {
throw
('
Feature must be attached to Slice to be stored.
');
}
# make sure that the feature coordinates are relative to
# the start of the entire seq_region
# make sure feature coords are relative to start of entire seq_region
if
(
$slice
->
start
!=
1
||
$slice
->
strand
!=
1
)
{
#move
the
feature onto a slice of the entire seq_region
#move feature onto a slice of the entire seq_region
$slice
=
$slice_adaptor
->
fetch_by_region
(
$slice
->
coord_system
->
name
(),
$slice
->
seq_region_name
(),
undef
,
#start
...
...
@@ -587,10 +592,7 @@ sub _pre_store {
}
}
#
# Ensure that this type of feature is known to be stored in this coord
# system.
#
# Ensure this type of feature is known to be stored in this coord system.
my
$cs
=
$slice
->
coord_system
;
my
(
$tab
)
=
$self
->
_tables
();
...
...
@@ -600,23 +602,6 @@ sub _pre_store {
$mcc
->
add_feature_type
(
$cs
,
$tabname
);
# we have to update the meta coord table in both the dna db and the feature
# db so that the feature db can be used independently later
# Actually, the meta coord info should probably only go in the database
# where the features are actually stored, not the dnadb (which is often
# going to be read-only anyway)
# if($db->dnadb() != $db) {
# my $dnadb = $db->dnadb();
# $db->dnadb(undef); # unset the dnadb temporarily
# #get a coord system adaptor from the feature database
# $csa = $db->get_CoordSystemAdaptor();
# $csa->add_feature_table($cs, $tabname);
# $db->dnadb($dnadb); # reinstate the dnadb
# }
my
$seq_region_id
=
$slice_adaptor
->
get_seq_region_id
(
$slice
);
if
(
!
$seq_region_id
)
{
...
...
@@ -767,8 +752,6 @@ sub _logic_name_to_constraint {
my
$an
=
$aa
->
fetch_by_logic_name
(
$logic_name
);
if
(
!
$an
)
{
warning
("
No analysis exists with logic_name = [
$logic_name
].
\n
"
.
"
Returning empty list.
\n
");
return
undef
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment