Skip to content
Snippets Groups Projects
Unverified Commit 962bf859 authored by Kieron Taylor's avatar Kieron Taylor Committed by GitHub
Browse files

Merge pull request #287 from at7/var_tidyup_95

Remove deprecated Variation methods
parents 973776da f528f3b6
No related branches found
No related tags found
2 merge requests!342Feature/schema update 96,!342Feature/schema update 96
=head1 LICENSE
Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Copyright [2016-2018] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
=cut
=head1 CONTACT
Please email comments or questions to the public Ensembl
developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
Questions may also be sent to the Ensembl help desk at
<http://www.ensembl.org/Help/Contact>.
=cut
=head1 NAME
Bio::EnsEMBL::AlignStrainSlice - Represents the slice of the genome aligned with certain strains (applying the variations/indels)
=head1 SYNOPSIS
$sa = $db->get_SliceAdaptor;
$slice =
$sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 );
$strainSlice1 = $slice->get_by_Strain($strain_name1);
$strainSlice2 = $slice->get_by_Strain($strain_name2);
my @strainSlices;
push @strainSlices, $strainSlice1;
push @strainSlices, $strainSlice2;
$alignSlice = Bio::EnsEMBL::AlignStrainSlice->new(
-SLICE => $slice,
-STRAINS => \@strainSlices
);
# Get coordinates of variation in alignSlice
my $alleleFeatures = $strainSlice1->get_all_AlleleFeature_Slice();
foreach my $af ( @{$alleleFeatures} ) {
my $new_feature = $alignSlice->alignFeature( $af, $strainSlice1 );
print( "Coordinates of the feature in AlignSlice are: ",
$new_feature->start, "-", $new_feature->end, "\n" );
}
=head1 DESCRIPTION
A AlignStrainSlice object represents a region of a genome align for
certain strains. It can be used to align certain strains to a reference
slice.
=head1 METHODS
=cut
package Bio::EnsEMBL::AlignStrainSlice;
use strict;
use Bio::EnsEMBL::Utils::Argument qw(rearrange);
use Bio::EnsEMBL::Mapper;
use Bio::EnsEMBL::Mapper::RangeRegistry;
use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning);
=head2 new
Arg[1] : Bio::EnsEMBL::Slice $Slice
Arg[2] : listref of Bio::EnsEMBL::Variation::StrainSlice $strainSlice
Example : push @strainSlices, $strainSlice1;
push @strainSlices, $strainSlice2;
.....
push @strainSlices, $strainSliceN;
$alignStrainSlice = Bio::EnsEMBL::AlignStrainSlice->new(-SLICE => $slice,
-STRAIN => \@strainSlices);
Description : Creates a new Bio::EnsEMBL::AlignStrainSlice object that will contain a mapper between
the Slice object, plus all the indels from the different Strains
ReturnType : Bio::EnsEMBL::AlignStrainSlice
Exceptions : none
Caller : general
=cut
sub new{
my $caller = shift;
deprecate("new is deprecated and will be removed in e95.");
my $class = ref($caller) || $caller;
my ($slice, $strainSlices) = rearrange([qw(SLICE STRAINS)],@_);
#check that both StrainSlice and Slice are identical (must have been defined in the same slice)
foreach my $strainSlice (@{$strainSlices}){
if (($strainSlice->start != $slice->start) || ($strainSlice->end != $slice->end) || ($strainSlice->seq_region_name ne $slice->seq_region_name)){
warning("Not possible to create Align object from different Slices");
return [];
}
}
return bless{'slice' => $slice,
'strains' => $strainSlices}, $class;
}
=head2 alignFeature
Arg[1] : Bio::EnsEMBL::Feature $feature
Arg[2] : Bio::EnsEMBL::Variation::StrainSlice $strainSlice
Example : $new_feature = $alignSlice->alignFeature($feature, $strainSlice);
Description : Creates a new Bio::EnsEMBL::Feature object that aligned to
the AlignStrainSlice object.
ReturnType : Bio::EnsEMBL::Feature
Exceptions : none
Caller : general
=cut
sub alignFeature{
my $self = shift;
my $feature = shift;
deprecate("alignFeature is deprecated and will be removed in e95.");
#check that the object is a Feature
if (!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')){
throw("Bio::EnsEMBL::Feature object expected");
}
#and align it to the AlignStrainSlice object
my $mapper_strain = $self->mapper();
my @results;
if ($feature->start > $feature->end){
#this is an Indel, map it with the special method
@results = $mapper_strain->map_indel('Slice',$feature->start, $feature->end, $feature->strand,'Slice');
#and modify the coordinates according to the length of the indel
$results[0]->end($results[0]->start + $feature->length_diff -1);
}
else{
@results = $mapper_strain->map_coordinates('Slice',$feature->start, $feature->end, $feature->strand,'Slice');
}
#get need start and end of the new feature, aligned ot AlignStrainSlice
my @results_ordered = sort {$a->start <=> $b->start} @results;
my %new_feature = %$feature; #make a shallow copy of the Feature
$new_feature{'start'}= $results_ordered[0]->start();
$new_feature{'end'} = $results_ordered[-1]->end(); #get last element of the array, the end of the slice
return bless \%new_feature, ref($feature);
}
#getter for the mapper between the Slice and the different StrainSlice objects
sub mapper{
my $self = shift;
deprecate("mapper is deprecated and will be removed in e95.");
if (!defined $self->{'mapper'}){
#get the alleleFeatures in all the strains
if (!defined $self->{'indels'}){
#when the list of indels is not defined, get them
$self->{'indels'} = $self->_get_indels();
}
my $indels = $self->{'indels'}; #gaps in reference slice
my $mapper = Bio::EnsEMBL::Mapper->new('Slice', 'AlignStrainSlice');
my $start_slice = 1;
my $end_slice;
my $start_align = 1;
my $end_align;
my $length_indel = 0;
my $length_acum_indel = 0;
foreach my $indel (@{$indels}){
$end_slice = $indel->[0] - 1;
$end_align = $indel->[0] - 1 + $length_acum_indel; #we must consider length previous indels
$length_indel = $indel->[1] - $indel->[0] + 1;
$mapper->add_map_coordinates('Slice',$start_slice,$end_slice,1,'AlignStrainSlice',$start_align,$end_align);
$mapper->add_indel_coordinates('Slice',$end_slice + 1,$end_slice,1,'AlignStrainSlice',$end_align + 1,$end_align + $length_indel);
$start_slice = $end_slice + 1;
$start_align = $indel->[1] + 1 + $length_acum_indel; #we must consider legnth previous indels
$length_acum_indel += $length_indel;
}
if ($start_slice <= $self->length){
$mapper->add_map_coordinates('Slice',$start_slice,$self->length,1,'AlignStrainSlice',$start_align,$start_align + $self->length - $start_slice)
}
$self->{'mapper'} = $mapper;
}
return $self->{'mapper'};
}
#returns the length of the AlignSlice: length of the Slice plus the gaps
sub length{
my $self = shift;
deprecate("length is deprecated and will be removed in e95.");
my $length;
if (!defined $self->{'indels'}){
#when the list of indels is not defined, get them
$self->{'indels'} = $self->_get_indels();
}
$length = $self->{'slice'}->length;
map {$length += ($_->[1] - $_->[0] + 1)} @{$self->{'indels'}};
return $length;
}
=head2 strains
Args : None
Description: Returns list with all strains used to
define this AlignStrainSlice object
Returntype : listref of Bio::EnsEMBL::StrainSlice objects
Exceptions : none
Caller : general
=cut
sub strains{
my $self = shift;
deprecate("strains is deprecated and will be removed in e95.");
return $self->{'strains'};
}
=head2 Slice
Args : None
Description: Returns slice where the AlignStrainSlice
is defined
Returntype : Bio::EnsEMBL::Slice object
Exceptions : none
Caller : general
=cut
sub Slice{
my $self = shift;
deprecate("Slice is deprecated and will be removed in e95.");
return $self->{'slice'};
}
#method to retrieve, in order, a list with all the indels in the different strains
sub _get_indels{
my $self = shift;
deprecate("_get_indels is deprecated and will be removed in e95.");
#go throuh all the strains getting ONLY the indels (length_diff <> 0)
my @indels;
foreach my $strainSlice (@{$self->strains}){
my $differences = $strainSlice->get_all_AlleleFeatures_Slice(); #need to check there are differences....
foreach my $af (@{$differences}){
#if length is 0, but is a -, it is still a gap in the strain
if (($af->length_diff != 0) || ($af->length_diff == 0 && $af->allele_string =~ /-/)){
push @indels, $af;
}
}
}
#need to overlap the gaps using the RangeRegistry module
my $range_registry = Bio::EnsEMBL::Mapper::RangeRegistry->new();
foreach my $indel (@indels){
#in the reference and the strain there is a gap
$range_registry->check_and_register(1,$indel->start,$indel->start) if ($indel->length_diff == 0);
#deletion in reference slice
$range_registry->check_and_register(1,$indel->start, $indel->end ) if ($indel->length_diff < 0);
#insertion in reference slice
$range_registry->check_and_register(1,$indel->start,$indel->start + $indel->length_diff - 1) if ($indel->length_diff > 0);
}
#and return all the gap coordinates....
return $range_registry->get_ranges(1);
}
=head2 get_all_Slices
Args : none
Description: This Slice is made of several Bio::EnsEMBL::Variation::StrainSlices
sequence. This method returns these StrainSlices (or part of
them) with the original coordinates
Returntype : listref of Bio::EnsEMBL::Variation::StrainSlice objects
Exceptions : end should be at least as big as start
Caller : general
=cut
sub get_all_Slices {
my $self = shift;
deprecate("get_all_Slices is deprecated and will be removed in e95.");
my @strains;
#add the reference strain
my $dbVar = $self->Slice->adaptor->db->get_db_adaptor('variation');
unless($dbVar) {
warning("Variation database must be attached to core database to " .
"retrieve variation information" );
return '';
}
my $indAdaptor = $dbVar->get_IndividualAdaptor();
my $ref_name = $indAdaptor->get_reference_strain_name;
my $ref_strain = Bio::EnsEMBL::Variation::StrainSlice->new(
-START => $self->Slice->{'start'},
-END => $self->Slice->{'end'},
-STRAND => $self->Slice->{'strand'},
-ADAPTOR => $self->Slice->adaptor(),
-SEQ => $self->Slice->{'seq'},
-SEQ_REGION_NAME => $self->Slice->{'seq_region_name'},
-SEQ_REGION_LENGTH => $self->Slice->{'seq_region_length'},
-COORD_SYSTEM => $self->Slice->{'coord_system'},
-STRAIN_NAME => $ref_name,
);
#this is a fake reference alisce, should not contain any alleleFeature
undef $ref_strain->{'alleleFeatures'};
push @strains, @{$self->strains};
my $new_feature;
my $indel;
my $aligned_features;
my $indels = (); #reference to a hash containing indels in the different strains
#we need to realign all Features in the different Slices and add '-' in the reference Slice
foreach my $strain (@{$self->strains}){
foreach my $af (@{$strain->get_all_AlleleFeatures_Slice()}){
$new_feature = $self->alignFeature($af); #align feature in AlignSlice coordinates
push @{$aligned_features},$new_feature if($new_feature->seq_region_start <= $strain->end); #some features might map outside slice
if ($af->start != $af->end){ #an indel, need to add to the reference, and realign in the strain
#make a shallow copy of the indel - clear it first!
$indel = undef;
%{$indel} = %{$new_feature};
bless $indel, ref($new_feature);
$indel->allele_string('-');
push @{$indels},$indel; #and include in the list of potential indels
}
}
next if (!defined $aligned_features);
undef $strain->{'alleleFeatures'}; #remove all features before adding new aligned features
push @{$strain->{'alleleFeatures'}}, @{$aligned_features};
undef $aligned_features;
}
push @strains, $ref_strain;
#need to add indels in the different strains, if not present
if (defined $indels){
foreach my $strain (@strains){
#inlcude the indels in the StrainSlice object
push @{$strain->{'alignIndels'}},@{$indels};
}
}
return \@strains;
}
1;
......@@ -65,7 +65,6 @@ container slice coordinate system.
Bio::EnsEMBL::MappedSliceContainer
Bio::EnsEMBL::Compara::AlignSlice
Bio::EnsEMBL::Compara::AlignSlice::Slice
Bio::EnsEMBL::AlignStrainSlice
Bio::EnsEMBL::Variation::StrainSlice
=cut
......
This diff is collapsed.
=head1 LICENSE
Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Copyright [2016-2018] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
=cut
=head1 CONTACT
Please email comments or questions to the public Ensembl
developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
Questions may also be sent to the Ensembl help desk at
<http://www.ensembl.org/Help/Contact>.
=cut
package Bio::EnsEMBL::IndividualSliceFactory;
use strict;
use Bio::EnsEMBL::Utils::Argument qw(rearrange);
use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
use Bio::EnsEMBL::Slice;
use Bio::EnsEMBL::Mapper;
use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning);
use Scalar::Util qw(weaken);
=head2 new
=cut
sub new{
my $caller = shift;
deprecate("new is deprecated and will be removed in e95.");
my $class = ref($caller) || $caller;
#creates many IndividualSlice objects from the Population
my ($population_name, $coord_system, $start, $end, $strand, $seq_region_name, $seq_region_length, $adaptor) = rearrange(['POPULATION', 'COORD_SYSTEM','START','END','STRAND','SEQ_REGION_NAME','SEQ_REGION_LENGTH', 'ADAPTOR'],@_);
my $self = bless {
population_name => $population_name,
coord_system => $coord_system,
start => $start,
end => $end,
strand => $strand,
seq_region_name => $seq_region_name,
seq_region_length => $seq_region_length},$class;
$self->adaptor($adaptor);
return $self;
}
sub adaptor {
my $self = shift;
deprecate("adaptor is deprecated and will be removed in e95.");
if(@_) {
my $ad = shift;
if($ad && (!ref($ad) || !$ad->isa('Bio::EnsEMBL::DBSQL::BaseAdaptor'))) {
throw('Adaptor argument must be a Bio::EnsEMBL::DBSQL::BaseAdaptor');
}
weaken($self->{'adaptor'} = $ad);
}
return $self->{'adaptor'}
}
sub get_all_IndividualSlice{
my $self = shift;
deprecate("get_all_IndividualSlice is deprecated and will be removed in e95.");
my $slice;
if(!$self->adaptor) {
warning('Cannot get IndividualSlice features without attached adaptor');
return '';
}
my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
unless($variation_db) {
warning("Variation database must be attached to core database to " .
"retrieve variation information" );
return '';
}
#get the AlleleFeatures in the Population
my $af_adaptor = $variation_db->get_AlleleFeatureAdaptor;
if( $af_adaptor ) {
#set the adaptor to retrieve data from genotype table
$af_adaptor->from_IndividualSlice(1);
#get the Individual for the given strain
my $population_adaptor = $variation_db->get_PopulationAdaptor;
my $individual_adaptor = $variation_db->get_IndividualAdaptor;
if ($population_adaptor && $individual_adaptor){
$slice = Bio::EnsEMBL::Slice->new(-coord_system => $self->{'coord_system'},
-start => $self->{'start'},
-end => $self->{'end'},
-strand => $self->{'strand'},
-seq_region_name => $self->{'seq_region_name'},
-seq_region_length => $self->{'seq_region_length'},
-adaptor => $self->adaptor
);
my $population = $population_adaptor->fetch_by_name($self->{'population_name'});
#check that there is such population in the database
if (defined $population){
#get all the AlleleFeatures in the $population and the Slice given
my $allele_features = $af_adaptor->fetch_all_by_Slice($slice,$population);
#get Individuals in the Population
my $individuals = $individual_adaptor->fetch_all_by_Population($population);
return $self->_rearrange_Individuals_Alleles($individuals,$allele_features);
}
else{
warning("Population not in the database");
return '';
}
}
else{
warning("Not possible to retrieve PopulationAdaptor from the variation database");
return '';
}
}
else{
warning("Not possible to retrieve AlleleFeatureAdaptor from variation database");
return '';
}
}
sub _rearrange_Individuals_Alleles{
my $self = shift;
deprecate("_rearrange_Individuals_Alleles is deprecated and will be removed in e95.");
my $individuals = shift;
my $allele_features;
my $individual_slice;
#create the hash with all the individuals
my %individuals_ids;
#foreach of the individual, create the IndividualSlice object and add it to the mapping hash
foreach my $individual (@{$individuals}){
$individual_slice = Bio::EnsEMBL::Variation::IndividualSlice->new(
-coord_system => $self->{'coord_system'},
-start => $self->{'$start'},
-end => $self->{'end'},
-strand => $self->{'strand'},
-seq_region_name => $self->{'seq_region_name'},
-seq_region_length => $self->{'seq_region_length'},
-individual => $individual->name);
$individuals_ids{$individual->dbID} = $individual_slice;
}
#and rearrange all the AlleleFeatures to the individuals
foreach my $allele_feature (@{$allele_features}){
$individuals_ids{$allele_feature->{'_sample_id'}}->add_AlleleFeature($allele_feature);
}
my @result = values %individuals_ids;
return \@result;
}
1;
......@@ -127,7 +127,6 @@ by an adaptor/factory.
Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor
Bio::EnsEMBL::Compara::AlignSlice
Bio::EnsEMBL::Compara::AlignSlice::Slice
Bio::EnsEMBL::AlignStrainSlice
Bio::EnsEMBL::StrainSlice
=cut
......
......@@ -110,7 +110,6 @@ mapper on the existing MappedSlices.
Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor
Bio::EnsEMBL::Compara::AlignSlice
Bio::EnsEMBL::Compara::AlignSlice::Slice
Bio::EnsEMBL::AlignStrainSlice
Bio::EnsEMBL::StrainSlice
=cut
......
......@@ -84,8 +84,6 @@ use Bio::EnsEMBL::Registry;
use Bio::EnsEMBL::Utils::Iterator;
use Bio::EnsEMBL::DBSQL::MergedAdaptor;
#use Bio::EnsEMBL::IndividualSlice;
#use Bio::EnsEMBL::IndividualSliceFactory;
use Bio::EnsEMBL::Mapper::RangeRegistry;
use Bio::EnsEMBL::SeqRegionSynonym;
use Scalar::Util qw(weaken isweak);
......@@ -1987,250 +1985,6 @@ sub get_all_VariationFeatures_by_Population {
return [];
}
=head2 get_all_IndividualSlice
Deprecated. The method will be removed in e95.
Args : none
Example : my $individualSlice = $slice->get_by_Population($population);
Description : Gets the specific Slice for all the individuls in the population
ReturnType : listref of Bio::EnsEMB::IndividualSlice
Exceptions : none
Caller : general
=cut
sub get_all_IndividualSlice{
my $self = shift;
deprecate("get_all_IndividualSlice is deprecated and will be removed in e95.");
my $individualSliceFactory = Bio::EnsEMBL::IndividualSliceFactory->new(
-START => $self->{'start'},
-END => $self->{'end'},
-STRAND => $self->{'strand'},
-ADAPTOR => $self->adaptor(),
-SEQ_REGION_NAME => $self->{'seq_region_name'},
-SEQ_REGION_LENGTH => $self->{'seq_region_length'},
-COORD_SYSTEM => $self->{'coord_system'},
);
return $individualSliceFactory->get_all_IndividualSlice();
}
=head2 get_by_Individual
Deprecated. The method will be removed in e95.
Arg[1] : Bio::EnsEMBL::Variation::Individual $individual
Example : my $individualSlice = $slice->get_by_Individual($individual);
Description : Gets the specific Slice for the individual
ReturnType : Bio::EnsEMB::IndividualSlice
Exceptions : none
Caller : general
=cut
sub get_by_Individual{
my $self = shift;
my $individual = shift;
deprecate("get_by_Individual is deprecated and will be removed in e95.");
return Bio::EnsEMBL::IndividualSlice->new(
-START => $self->{'start'},
-END => $self->{'end'},
-STRAND => $self->{'strand'},
-ADAPTOR => $self->adaptor(),
# -SEQ => $self->{'seq'},
-SEQ_REGION_NAME => $self->{'seq_region_name'},
-SEQ_REGION_LENGTH => $self->{'seq_region_length'},
-COORD_SYSTEM => $self->{'coord_system'},
-INDIVIDUAL => $individual);
}
sub calculate_theta {
my $self = shift;
my $strains = shift;
my $feature = shift; #optional parameter. Name of the feature in the Slice you want to calculate
deprecate("calculate_theta is deprecated and will be removed in e95.");
if(!$self->adaptor()) {
warning('Cannot get variation features without attached adaptor');
return 0;
}
my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
unless($variation_db) {
warning("Variation database must be attached to core database to " .
"retrieve variation information" );
return 0;
}
#need to get coverage regions for the slice in the different strains
my $coverage_adaptor = $variation_db->get_ReadCoverageAdaptor;
my $strain;
my $differences = [];
my $slices = [];
if ($coverage_adaptor){
my $num_strains = scalar(@{$strains}) +1;
if (!defined $feature){
#we want to calculate for the whole slice
push @{$slices}, $self; #add the slice as the slice to calculate the theta value
}
else{
#we have features, get the slices for the different features
my $features = $self->get_all_Exons();
map {push @{$slices},$_->feature_Slice} @{$features}; #add the slices of the features
}
my $length_regions = 0;
my $snps = 0;
my $theta = 0;
my $last_position = 0;
#get all the differences in the slice coordinates
foreach my $strain_name (@{$strains}){
my $strain = $self->get_by_strain($strain_name); #get the strainSlice for the strain
my $results = $strain->get_all_differences_Slice;
push @{$differences}, @{$results} if (defined $results);
}
#when we finish, we have, in max_level, the regions covered by all the sample
#sort the differences by the genomic position
my @differences_sorted = sort {$a->start <=> $b->start} @{$differences};
foreach my $slice (@{$slices}){
my $regions_covered = $coverage_adaptor->fetch_all_regions_covered($slice,$strains);
if (defined $regions_covered){
foreach my $range (@{$regions_covered}){
$length_regions += ($range->[1] - $range->[0]) + 1; #add the length of the genomic region
for (my $i = $last_position;$i<@differences_sorted;$i++){
if ($differences_sorted[$i]->start >= $range->[0] && $differences_sorted[$i]->end <= $range->[1]){
$snps++; #count differences in the region
}
elsif ($differences_sorted[$i]->end > $range->[1]){
$last_position = $i;
last;
}
}
}
#when all the ranges have been iterated, calculate rho
#this is an intermediate variable called a in the formula
# a = sum i=2..strains 1/i-1
}
}
my $a = _calculate_a($num_strains);
$theta = $snps / ($a * $length_regions);
return $theta;
}
else{
return 0;
}
}
sub _calculate_a {
my $max_level = shift;
deprecate("_calculate_a is deprecated and will be removed in e95.");
my $a = 0;
for (my $i = 2; $i <= $max_level+1;$i++){
$a += 1/($i-1);
}
return $a;
}
sub calculate_pi {
my $self = shift;
my $strains = shift;
my $feature = shift;
deprecate("calculate_pi is deprecated and will be removed in e95.");
if(!$self->adaptor()) {
warning('Cannot get variation features without attached adaptor');
return 0;
}
my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
unless($variation_db) {
warning("Variation database must be attached to core database to " .
"retrieve variation information" );
return 0;
}
#need to get coverage regions for the slice in the different strains
my $coverage_adaptor = $variation_db->get_ReadCoverageAdaptor;
my $differences = [];
my $slices = [];
if ($coverage_adaptor){
my $num_strains = scalar(@{$strains}) +1;
if (!defined $feature){
#we want to calculate for the whole slice
push @{$slices}, $self; #add the slice as the slice to calculate the theta value
}
else{
#we have features, get the slices for the different features
my $features = $self->get_all_Exons();
map {push @{$slices},$_->feature_Slice} @{$features}; #add the slices of the features
}
my @range_differences = ();
my $pi = 0;
my $regions = 0;
my $last_position = 0; #last position visited in the sorted list of differences
my $triallelic = 0;
my $is_triallelic = 0;
foreach my $slice (@{$slices}){
foreach my $strain_name (@{$strains}){
my $strain = $slice->get_by_strain($strain_name); #get the strainSlice for the strain
my $results = $strain->get_all_differences_Slice;
push @{$differences}, @{$results} if (defined $results);
}
my @differences_sorted = sort {$a->start <=> $b->start} @{$differences};
my $regions_covered = $coverage_adaptor->fetch_all_regions_covered($slice,$strains);
#when we finish, we have, in max_level, the regions covered by all the sample
#sort the differences
if (defined $regions_covered){
foreach my $range (@{$regions_covered}){
for (my $i = $last_position;$i<@differences_sorted;$i++){
if ($differences_sorted[$i]->start >= $range->[0] && $differences_sorted[$i]->end <= $range->[1]){
#check wether it is the same region or different
if (!defined $range_differences[0] || ($differences_sorted[$i]->start == $range_differences[0]->start)){
if (defined $range_differences[0] && ($differences_sorted[$i]->allele_string ne $range_differences[0]->allele_string)){
$is_triallelic = 1;
}
push @range_differences, $differences_sorted[$i];
}
else{
#new site, calc pi for the previous one
$pi += 2 * (@range_differences/($num_strains)) * ( 1 - (@range_differences/$num_strains));
if ($is_triallelic) {
$triallelic++;
$is_triallelic = 0;
}
$regions++;
@range_differences = ();
#and start a new range
push @range_differences, $differences_sorted[$i];
}
}
elsif ($differences_sorted[$i]->end > $range->[1]){
$last_position = $i;
last;
}
}
#calculate pi for last site, if any
if (defined $range_differences[0]){
$pi += 2 * (@range_differences/$num_strains) * ( 1 - (@range_differences/$num_strains));
$regions++;
}
}
}
$pi = $pi / $regions; #calculate average pi
print "Regions with variations in region $regions and triallelic $triallelic\n\n";
}
return $pi;
}
else{
return 0;
}
}
=head2 get_all_Genes
Arg [1] : (optional) string $logic_name
......
......@@ -32,7 +32,6 @@ my %result = map{$_ => 1} @result;
my @exceptions = ('/Bio/EnsEMBL/Utils/TranscriptAlleles.pm',
'/Bio/EnsEMBL/Utils/ensembl_init.example',
'/Bio/EnsEMBL/AlignStrainSlice',
'/Bio/EnsEMBL/Feature',
'/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm',
'/Bio/EnsEMBL/DBSQL/DensityFeatureAdaptor.pm',
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment