Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
ensembl-gh-mirror
ensembl
Commits
9e8910b0
Commit
9e8910b0
authored
Feb 16, 2010
by
William McLaren
Browse files
Initial check in
parent
924d9559
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
350 additions
and
0 deletions
+350
-0
modules/Bio/EnsEMBL/DBSQL/StrainSliceAdaptor.pm
modules/Bio/EnsEMBL/DBSQL/StrainSliceAdaptor.pm
+350
-0
No files found.
modules/Bio/EnsEMBL/DBSQL/StrainSliceAdaptor.pm
0 → 100644
View file @
9e8910b0
=head1 LICENSE
Copyright (c) 1999-2009 The European Bioinformatics Institute and
Genome Research Limited. All rights reserved.
This software is distributed under a modified Apache license.
For license details, please see
http://www.ensembl.org/info/about/code_licence.html
=head1 CONTACT
Please email comments or questions to the public Ensembl
developers list at <ensembl-dev@ebi.ac.uk>.
Questions may also be sent to the Ensembl help desk at
<helpdesk@ensembl.org>.
=cut
=head1 NAME
Bio::EnsEMBL::DBSQL::StrainSliceAdaptor - adaptor/factory for MappedSlices
representing alternative assemblies
=head1 SYNOPSIS
my $slice =
$slice_adaptor->fetch_by_region( 'chromosome', 14, 900000, 950000 );
my $msc = Bio::EnsEMBL::MappedSliceContainer->new( -SLICE => $slice );
my $asa = Bio::EnsEMBL::DBSQL::StrainSliceAdaptor->new;
my ($mapped_slice) = @{ $asa->fetch_by_version( $msc, 'NCBIM36' ) };
=head1 DESCRIPTION
NOTE: this code is under development and not fully functional nor tested
yet. Use only for development.
This adaptor is a factory for creating MappedSlices representing
strains and attaching them to a MappedSliceContainer. A mapper will be created
to map between the reference slice and the common container slice coordinate
system.
=head1 METHODS
new
fetch_by_name
=head1 REALTED MODULES
Bio::EnsEMBL::MappedSlice
Bio::EnsEMBL::MappedSliceContainer
Bio::EnsEMBL::AlignStrainSlice
Bio::EnsEMBL::StrainSlice
=cut
package
Bio::EnsEMBL::DBSQL::
StrainSliceAdaptor
;
use
strict
;
use
warnings
;
no
warnings
'
uninitialized
';
use
Bio::EnsEMBL::Utils::
Argument
qw(rearrange)
;
use
Bio::EnsEMBL::Utils::
Exception
qw(throw warning)
;
use
Bio::EnsEMBL::
MappedSlice
;
use
Bio::EnsEMBL::
Mapper
;
use
Bio::EnsEMBL::DBSQL::
BaseAdaptor
;
our
@ISA
=
qw(Bio::EnsEMBL::DBSQL::BaseAdaptor)
;
=head2 new
Example : my $strain_slice_adaptor =
Bio::EnsEMBL::DBSQL::StrainSliceAdaptor->new;
Description : Constructor.
Return type : Bio::EnsEMBL::DBSQL::StrainSliceAdaptor
Exceptions : none
Caller : general
Status : At Risk
: under development
=cut
sub
new
{
my
$caller
=
shift
;
my
$class
=
ref
(
$caller
)
||
$caller
;
my
$self
=
$class
->
SUPER::
new
(
@
_
);
return
$self
;
}
=head2 fetch_by_name
Arg[1] : Bio::EnsEMBL::MappedSliceContainer $container - the container
to attach MappedSlices to
Arg[2] : String $name - the name of the strain to fetch
Example : my ($mapped_slice) = @{ $msc->fetch_by_name('Watson') };
Description : Creates a MappedSlice representing a version of the container's
reference slice with variant alleles from the named strain
Return type : listref of Bio::EnsEMBL::MappedSlice
Exceptions : thrown on wrong or missing arguments
Caller : general, Bio::EnsEMBL::MappedSliceContainer
Status : At Risk
: under development
=cut
sub
fetch_by_name
{
my
$self
=
shift
;
my
$container
=
shift
;
my
$name
=
shift
;
# argueent check
unless
(
$container
and
ref
(
$container
)
and
$container
->
isa
('
Bio::EnsEMBL::MappedSliceContainer
'))
{
throw
("
Need a MappedSliceContainer.
");
}
unless
(
$name
)
{
throw
("
Need a strain name.
");
}
my
$slice
=
$container
->
ref_slice
;
# get a connection to the variation DB
my
$variation_db
=
$self
->
db
->
get_db_adaptor
('
variation
');
unless
(
$variation_db
)
{
warning
("
Variation database must be attached to core database to retrieve variation information
");
return
'';
}
# now get an allele feature adaptor
my
$af_adaptor
=
$variation_db
->
get_AlleleFeatureAdaptor
;
# check we got it
unless
(
defined
$af_adaptor
)
{
warning
("
Not possible to retrieve AlleleFeatureAdaptor from variation database
");
return
'';
}
# now get an individual adaptor
my
$ind_adaptor
=
$variation_db
->
get_IndividualAdaptor
;
# check we got it
unless
(
defined
$ind_adaptor
)
{
warning
("
Not possible to retrieve IndividualAdaptor from variation database
");
return
'';
}
# fetch individual object for this strain name
my
$ind
=
shift
@
{
$ind_adaptor
->
fetch_all_by_name
(
$name
)};
# check we got a result
unless
(
defined
$ind
)
{
warn
("
Strain
"
.
$name
.
"
not found in the database
");
return
'';
}
## MAP STRAIN SLICE TO REF SLICE
################################
# get all allele features for this slice and individual
my
@afs
=
sort
{
$a
->
start
()
<=>
$b
->
start
()}
@
{
$af_adaptor
->
fetch_all_by_Slice
(
$slice
,
$ind
)};
# check we got some data
warning
("
No strain genotype data available for slice
"
.
$slice
->
name
.
"
and strain
"
.
$ind
->
name
)
if
!
defined
$afs
[
0
];
# create a mapper
my
$mapper
=
Bio::EnsEMBL::
Mapper
->
new
('
mapped_slice
',
'
ref_slice
');
# create a mapped_slice object
my
$mapped_slice
=
Bio::EnsEMBL::
MappedSlice
->
new
(
-
ADAPTOR
=>
$self
,
-
CONTAINER
=>
$container
,
-
NAME
=>
$slice
->
name
.
"
\
#strain_
$name
",
);
my
$strain_slice
=
$slice
->
get_by_strain
(
$ind
->
name
);
my
$start_slice
=
$slice
->
start
;
my
$start_strain
=
1
;
my
$sr_name
=
$slice
->
seq_region_name
;
#my $sr_name = 'ref_slice';
my
(
$end_slice
,
$end_strain
,
$allele_length
);
my
$indel_flag
=
0
;
my
$total_length_diff
=
0
;
# go through each AF
foreach
my
$af
(
@afs
)
{
# find out if it changes the length
if
(
$af
->
length_diff
!=
0
)
{
$indel_flag
=
1
;
$total_length_diff
+=
$af
->
length_diff
;
# get the allele length
$allele_length
=
$af
->
length
+
$af
->
length_diff
();
$end_slice
=
$slice
->
start
+
$af
->
start
()
-
2
;
if
(
$end_slice
>=
$start_slice
){
$end_strain
=
$end_slice
-
$start_slice
+
$start_strain
;
#add the sequence that maps
$mapper
->
add_map_coordinates
('
mapped_slice
',
$start_strain
,
$end_strain
,
1
,
$sr_name
,
$start_slice
,
$end_slice
);
#add the indel
$mapper
->
add_indel_coordinates
('
mapped_slice
',
$end_strain
+
1
,
$end_strain
+
$allele_length
,
1
,
$sr_name
,
$end_slice
+
1
,
$end_slice
+
$af
->
length
);
$start_strain
=
$end_strain
+
$allele_length
+
1
;
}
else
{
#add the indel
$mapper
->
add_indel_coordinates
('
mapped_slice
',
$end_strain
+
1
,
$end_strain
+
$allele_length
,
1
,
$sr_name
,
$end_slice
+
1
,
$end_slice
+
$af
->
length
);
$start_strain
+=
$allele_length
;
}
$start_slice
=
$end_slice
+
$af
->
length
+
1
;
}
}
# add the remaining coordinates (or the whole length if no indels found)
$mapper
->
add_map_coordinates
('
mapped_slice
',
$start_strain
,
$start_strain
+
(
$slice
->
end
-
$start_slice
),
1
,
$sr_name
,
$start_slice
,
$slice
->
end
);
# add the slice/mapper pair
$mapped_slice
->
add_Slice_Mapper_pair
(
$strain_slice
,
$mapper
);
## MAP REF_SLICE TO CONTAINER SLICE
###################################
if
(
$total_length_diff
>
0
)
{
# create a new mapper
my
$new_mapper
=
Bio::EnsEMBL::
Mapper
->
new
('
ref_slice
',
'
container
');
# get existing pairs
my
@existing_pairs
=
$container
->
mapper
->
list_pairs
('
container
',
1
,
$container
->
container_slice
->
length
,
'
container
');
my
@new_pairs
=
$mapper
->
list_pairs
('
mapped_slice
',
1
,
$strain_slice
->
length
(),
'
mapped_slice
');
# we need a list of indels (specifically inserts)
my
@indels
;
# go through existing first
foreach
my
$pair
(
@existing_pairs
)
{
if
(
$pair
->
from
->
end
-
$pair
->
from
->
start
!=
$pair
->
to
->
end
-
$pair
->
to
->
start
)
{
my
$indel
;
$indel
->
{'
length_diff
'}
=
(
$pair
->
to
->
end
-
$pair
->
to
->
start
)
-
(
$pair
->
from
->
end
-
$pair
->
from
->
start
);
# we're only interested in inserts here, not deletions
next
unless
$indel
->
{'
length_diff
'}
>
0
;
$indel
->
{'
ref_start
'}
=
$pair
->
from
->
start
;
$indel
->
{'
ref_end
'}
=
$pair
->
from
->
end
;
$indel
->
{'
length
'}
=
$pair
->
to
->
end
-
$pair
->
to
->
start
;
push
@indels
,
$indel
;
}
}
# now new ones
foreach
my
$pair
(
@new_pairs
)
{
if
(
$pair
->
from
->
end
-
$pair
->
from
->
start
!=
$pair
->
to
->
end
-
$pair
->
to
->
start
)
{
my
$indel
;
$indel
->
{'
length_diff
'}
=
(
$pair
->
from
->
end
-
$pair
->
from
->
start
)
-
(
$pair
->
to
->
end
-
$pair
->
to
->
start
);
# we're only interested in inserts here, not deletions
next
unless
$indel
->
{'
length_diff
'}
>
0
;
$indel
->
{'
ref_start
'}
=
$pair
->
to
->
start
;
$indel
->
{'
ref_end
'}
=
$pair
->
to
->
end
;
$indel
->
{'
length
'}
=
$pair
->
from
->
end
-
$pair
->
from
->
start
;
push
@indels
,
$indel
;
}
}
# sort them
@indels
=
sort
{
$a
->
{'
ref_start
'}
<=>
$b
->
{'
ref_start
'}
||
# by position
$b
->
{'
length_diff
'}
<=>
$a
->
{'
length_diff
'}
# then by length diff so we only keep the longest
}
@indels
;
# clean them
my
@new_indels
=
();
my
$p
=
$indels
[
0
];
push
@new_indels
,
$indels
[
0
]
if
scalar
@indels
;
for
my
$i
(
1
..
$#indels
)
{
my
$c
=
$indels
[
$i
];
if
(
$c
->
{'
ref_start
'}
!=
$p
->
{'
ref_start
'}
&&
$c
->
{'
ref_end
'}
!=
$p
->
{'
ref_end
'})
{
push
@new_indels
,
$c
;
$p
=
$c
;
}
}
$start_slice
=
$slice
->
start
;
$start_strain
=
1
;
$sr_name
=
$slice
->
seq_region_name
;
#$sr_name = 'ref_slice';
foreach
my
$indel
(
@new_indels
)
{
$end_slice
=
$indel
->
{'
ref_end
'};
$end_strain
=
$start_strain
+
(
$end_slice
-
$start_slice
);
$allele_length
=
$indel
->
{'
length
'}
+
$indel
->
{'
length_diff
'};
$new_mapper
->
add_map_coordinates
(
$sr_name
,
$start_slice
,
$end_slice
,
1
,
'
container
',
$start_strain
,
$end_strain
);
$new_mapper
->
add_indel_coordinates
(
$sr_name
,
$end_slice
+
1
,
$end_slice
+
$indel
->
{'
length
'},
1
,
'
container
',
$end_strain
+
1
,
$end_strain
+
$allele_length
);
$start_strain
=
$end_strain
+
$allele_length
+
1
;
$start_slice
=
$end_slice
+
$indel
->
{'
length
'}
+
1
;
}
$new_mapper
->
add_map_coordinates
(
$sr_name
,
$start_slice
,
$slice
->
end
,
1
,
'
container
',
$start_strain
,
$start_strain
+
(
$slice
->
end
-
$start_slice
));
# replace the mapper with the new mapper
$container
->
mapper
(
$new_mapper
);
# change the container slice's length according to length diff
$container
->
container_slice
(
$container
->
container_slice
->
expand
(
undef
,
$total_length_diff
,
1
));
}
return
[
$mapped_slice
];
}
1
;
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment