diff --git a/modules/Bio/EnsEMBL/AlignStrainSlice.pm b/modules/Bio/EnsEMBL/AlignStrainSlice.pm deleted file mode 100644 index e3ace948a9fd2d647eed2d9190a3e446f6524343..0000000000000000000000000000000000000000 --- a/modules/Bio/EnsEMBL/AlignStrainSlice.pm +++ /dev/null @@ -1,365 +0,0 @@ -=head1 LICENSE - -Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute -Copyright [2016-2018] EMBL-European Bioinformatics Institute - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -=cut - - -=head1 CONTACT - - Please email comments or questions to the public Ensembl - developers list at <http://lists.ensembl.org/mailman/listinfo/dev>. - - Questions may also be sent to the Ensembl help desk at - <http://www.ensembl.org/Help/Contact>. - -=cut - -=head1 NAME - -Bio::EnsEMBL::AlignStrainSlice - Represents the slice of the genome aligned with certain strains (applying the variations/indels) - -=head1 SYNOPSIS - - $sa = $db->get_SliceAdaptor; - - $slice = - $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 ); - - $strainSlice1 = $slice->get_by_Strain($strain_name1); - $strainSlice2 = $slice->get_by_Strain($strain_name2); - - my @strainSlices; - push @strainSlices, $strainSlice1; - push @strainSlices, $strainSlice2; - - $alignSlice = Bio::EnsEMBL::AlignStrainSlice->new( - -SLICE => $slice, - -STRAINS => \@strainSlices - ); - - # Get coordinates of variation in alignSlice - my $alleleFeatures = $strainSlice1->get_all_AlleleFeature_Slice(); - - foreach my $af ( @{$alleleFeatures} ) { - my $new_feature = $alignSlice->alignFeature( $af, $strainSlice1 ); - print( "Coordinates of the feature in AlignSlice are: ", - $new_feature->start, "-", $new_feature->end, "\n" ); - } - -=head1 DESCRIPTION - -A AlignStrainSlice object represents a region of a genome align for -certain strains. It can be used to align certain strains to a reference -slice. - -=head1 METHODS - -=cut - -package Bio::EnsEMBL::AlignStrainSlice; -use strict; - -use Bio::EnsEMBL::Utils::Argument qw(rearrange); -use Bio::EnsEMBL::Mapper; -use Bio::EnsEMBL::Mapper::RangeRegistry; -use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning); - -=head2 new - - Arg[1] : Bio::EnsEMBL::Slice $Slice - Arg[2] : listref of Bio::EnsEMBL::Variation::StrainSlice $strainSlice - Example : push @strainSlices, $strainSlice1; - push @strainSlices, $strainSlice2; - ..... - push @strainSlices, $strainSliceN; - $alignStrainSlice = Bio::EnsEMBL::AlignStrainSlice->new(-SLICE => $slice, - -STRAIN => \@strainSlices); - Description : Creates a new Bio::EnsEMBL::AlignStrainSlice object that will contain a mapper between - the Slice object, plus all the indels from the different Strains - ReturnType : Bio::EnsEMBL::AlignStrainSlice - Exceptions : none - Caller : general - -=cut - -sub new{ - my $caller = shift; - deprecate("new is deprecated and will be removed in e95."); - my $class = ref($caller) || $caller; - - my ($slice, $strainSlices) = rearrange([qw(SLICE STRAINS)],@_); - - #check that both StrainSlice and Slice are identical (must have been defined in the same slice) - foreach my $strainSlice (@{$strainSlices}){ - if (($strainSlice->start != $slice->start) || ($strainSlice->end != $slice->end) || ($strainSlice->seq_region_name ne $slice->seq_region_name)){ - warning("Not possible to create Align object from different Slices"); - return []; - } - } - - return bless{'slice' => $slice, - 'strains' => $strainSlices}, $class; -} - -=head2 alignFeature - - Arg[1] : Bio::EnsEMBL::Feature $feature - Arg[2] : Bio::EnsEMBL::Variation::StrainSlice $strainSlice - Example : $new_feature = $alignSlice->alignFeature($feature, $strainSlice); - Description : Creates a new Bio::EnsEMBL::Feature object that aligned to - the AlignStrainSlice object. - ReturnType : Bio::EnsEMBL::Feature - Exceptions : none - Caller : general - -=cut - -sub alignFeature{ - my $self = shift; - my $feature = shift; - deprecate("alignFeature is deprecated and will be removed in e95."); - - #check that the object is a Feature - if (!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')){ - throw("Bio::EnsEMBL::Feature object expected"); - } - #and align it to the AlignStrainSlice object - my $mapper_strain = $self->mapper(); - - my @results; - - if ($feature->start > $feature->end){ - #this is an Indel, map it with the special method - @results = $mapper_strain->map_indel('Slice',$feature->start, $feature->end, $feature->strand,'Slice'); - #and modify the coordinates according to the length of the indel - $results[0]->end($results[0]->start + $feature->length_diff -1); - } - else{ - @results = $mapper_strain->map_coordinates('Slice',$feature->start, $feature->end, $feature->strand,'Slice'); - } - #get need start and end of the new feature, aligned ot AlignStrainSlice - my @results_ordered = sort {$a->start <=> $b->start} @results; - - my %new_feature = %$feature; #make a shallow copy of the Feature - $new_feature{'start'}= $results_ordered[0]->start(); - $new_feature{'end'} = $results_ordered[-1]->end(); #get last element of the array, the end of the slice - - return bless \%new_feature, ref($feature); - -} - - -#getter for the mapper between the Slice and the different StrainSlice objects -sub mapper{ - my $self = shift; - deprecate("mapper is deprecated and will be removed in e95."); - - if (!defined $self->{'mapper'}){ - #get the alleleFeatures in all the strains - if (!defined $self->{'indels'}){ - #when the list of indels is not defined, get them - $self->{'indels'} = $self->_get_indels(); - } - my $indels = $self->{'indels'}; #gaps in reference slice - my $mapper = Bio::EnsEMBL::Mapper->new('Slice', 'AlignStrainSlice'); - my $start_slice = 1; - my $end_slice; - my $start_align = 1; - my $end_align; - my $length_indel = 0; - my $length_acum_indel = 0; - foreach my $indel (@{$indels}){ - $end_slice = $indel->[0] - 1; - $end_align = $indel->[0] - 1 + $length_acum_indel; #we must consider length previous indels - - $length_indel = $indel->[1] - $indel->[0] + 1; - - - $mapper->add_map_coordinates('Slice',$start_slice,$end_slice,1,'AlignStrainSlice',$start_align,$end_align); - - $mapper->add_indel_coordinates('Slice',$end_slice + 1,$end_slice,1,'AlignStrainSlice',$end_align + 1,$end_align + $length_indel); - $start_slice = $end_slice + 1; - $start_align = $indel->[1] + 1 + $length_acum_indel; #we must consider legnth previous indels - - $length_acum_indel += $length_indel; - } - if ($start_slice <= $self->length){ - $mapper->add_map_coordinates('Slice',$start_slice,$self->length,1,'AlignStrainSlice',$start_align,$start_align + $self->length - $start_slice) - } - $self->{'mapper'} = $mapper; - - } - return $self->{'mapper'}; -} - -#returns the length of the AlignSlice: length of the Slice plus the gaps -sub length{ - my $self = shift; - deprecate("length is deprecated and will be removed in e95."); - - my $length; - if (!defined $self->{'indels'}){ - #when the list of indels is not defined, get them - $self->{'indels'} = $self->_get_indels(); - } - $length = $self->{'slice'}->length; - map {$length += ($_->[1] - $_->[0] + 1)} @{$self->{'indels'}}; - return $length; -} - -=head2 strains - - Args : None - Description: Returns list with all strains used to - define this AlignStrainSlice object - Returntype : listref of Bio::EnsEMBL::StrainSlice objects - Exceptions : none - Caller : general - -=cut - -sub strains{ - my $self = shift; - deprecate("strains is deprecated and will be removed in e95."); - - return $self->{'strains'}; -} - -=head2 Slice - - Args : None - Description: Returns slice where the AlignStrainSlice - is defined - Returntype : Bio::EnsEMBL::Slice object - Exceptions : none - Caller : general - -=cut - -sub Slice{ - my $self = shift; - deprecate("Slice is deprecated and will be removed in e95."); - - return $self->{'slice'}; -} -#method to retrieve, in order, a list with all the indels in the different strains -sub _get_indels{ - my $self = shift; - deprecate("_get_indels is deprecated and will be removed in e95."); - - #go throuh all the strains getting ONLY the indels (length_diff <> 0) - my @indels; - foreach my $strainSlice (@{$self->strains}){ - my $differences = $strainSlice->get_all_AlleleFeatures_Slice(); #need to check there are differences.... - foreach my $af (@{$differences}){ - #if length is 0, but is a -, it is still a gap in the strain - if (($af->length_diff != 0) || ($af->length_diff == 0 && $af->allele_string =~ /-/)){ - push @indels, $af; - } - } - } - #need to overlap the gaps using the RangeRegistry module - my $range_registry = Bio::EnsEMBL::Mapper::RangeRegistry->new(); - foreach my $indel (@indels){ - #in the reference and the strain there is a gap - $range_registry->check_and_register(1,$indel->start,$indel->start) if ($indel->length_diff == 0); - #deletion in reference slice - $range_registry->check_and_register(1,$indel->start, $indel->end ) if ($indel->length_diff < 0); - #insertion in reference slice - $range_registry->check_and_register(1,$indel->start,$indel->start + $indel->length_diff - 1) if ($indel->length_diff > 0); - } - #and return all the gap coordinates.... - return $range_registry->get_ranges(1); -} - -=head2 get_all_Slices - - Args : none - Description: This Slice is made of several Bio::EnsEMBL::Variation::StrainSlices - sequence. This method returns these StrainSlices (or part of - them) with the original coordinates - Returntype : listref of Bio::EnsEMBL::Variation::StrainSlice objects - Exceptions : end should be at least as big as start - Caller : general - -=cut - -sub get_all_Slices { - my $self = shift; - - deprecate("get_all_Slices is deprecated and will be removed in e95."); - - my @strains; - #add the reference strain - my $dbVar = $self->Slice->adaptor->db->get_db_adaptor('variation'); - unless($dbVar) { - warning("Variation database must be attached to core database to " . - "retrieve variation information" ); - return ''; - } - my $indAdaptor = $dbVar->get_IndividualAdaptor(); - my $ref_name = $indAdaptor->get_reference_strain_name; - my $ref_strain = Bio::EnsEMBL::Variation::StrainSlice->new( - -START => $self->Slice->{'start'}, - -END => $self->Slice->{'end'}, - -STRAND => $self->Slice->{'strand'}, - -ADAPTOR => $self->Slice->adaptor(), - -SEQ => $self->Slice->{'seq'}, - -SEQ_REGION_NAME => $self->Slice->{'seq_region_name'}, - -SEQ_REGION_LENGTH => $self->Slice->{'seq_region_length'}, - -COORD_SYSTEM => $self->Slice->{'coord_system'}, - -STRAIN_NAME => $ref_name, - ); - #this is a fake reference alisce, should not contain any alleleFeature - undef $ref_strain->{'alleleFeatures'}; - - push @strains, @{$self->strains}; - my $new_feature; - my $indel; - my $aligned_features; - my $indels = (); #reference to a hash containing indels in the different strains - #we need to realign all Features in the different Slices and add '-' in the reference Slice - foreach my $strain (@{$self->strains}){ - foreach my $af (@{$strain->get_all_AlleleFeatures_Slice()}){ - $new_feature = $self->alignFeature($af); #align feature in AlignSlice coordinates - push @{$aligned_features},$new_feature if($new_feature->seq_region_start <= $strain->end); #some features might map outside slice - if ($af->start != $af->end){ #an indel, need to add to the reference, and realign in the strain - #make a shallow copy of the indel - clear it first! - $indel = undef; - %{$indel} = %{$new_feature}; - bless $indel, ref($new_feature); - $indel->allele_string('-'); - push @{$indels},$indel; #and include in the list of potential indels - } - } - next if (!defined $aligned_features); - undef $strain->{'alleleFeatures'}; #remove all features before adding new aligned features - push @{$strain->{'alleleFeatures'}}, @{$aligned_features}; - undef $aligned_features; - } - push @strains, $ref_strain; - #need to add indels in the different strains, if not present - if (defined $indels){ - foreach my $strain (@strains){ - #inlcude the indels in the StrainSlice object - push @{$strain->{'alignIndels'}},@{$indels}; - } - } - return \@strains; -} - -1; diff --git a/modules/Bio/EnsEMBL/DBSQL/AssemblySliceAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/AssemblySliceAdaptor.pm index f99444601613c15089fe14abc8900cc8465c37ff..a74d3e20e58f6c66013b0811bb094499b3ff6dfd 100644 --- a/modules/Bio/EnsEMBL/DBSQL/AssemblySliceAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/AssemblySliceAdaptor.pm @@ -65,7 +65,6 @@ container slice coordinate system. Bio::EnsEMBL::MappedSliceContainer Bio::EnsEMBL::Compara::AlignSlice Bio::EnsEMBL::Compara::AlignSlice::Slice - Bio::EnsEMBL::AlignStrainSlice Bio::EnsEMBL::Variation::StrainSlice =cut diff --git a/modules/Bio/EnsEMBL/IndividualSlice.pm b/modules/Bio/EnsEMBL/IndividualSlice.pm deleted file mode 100644 index 626a5adb85d391a5c44b5b466dcad66510562444..0000000000000000000000000000000000000000 --- a/modules/Bio/EnsEMBL/IndividualSlice.pm +++ /dev/null @@ -1,690 +0,0 @@ -=head1 LICENSE - -Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute -Copyright [2016-2018] EMBL-European Bioinformatics Institute - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -=cut - - -=head1 CONTACT - - Please email comments or questions to the public Ensembl - developers list at <http://lists.ensembl.org/mailman/listinfo/dev>. - - Questions may also be sent to the Ensembl help desk at - <http://www.ensembl.org/Help/Contact>. - -=cut - -=head1 NAME - -Bio::EnsEMBL::IndividualSlice - SubClass of the Slice. Represents the -slice of the genome for a certain individual (applying the alleles for -this individual) - -=head1 SYNOPSIS - - $sa = $db->get_SliceAdaptor; - - $slice = - $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 ); - - $individualSlice = $slice->get_by_Individual($individual_name); - - # Get the sequence from the Individual Slice: will contain IUPAC codes - # for SNPs and Ensembl ambiguity codes for indels - my $seq = $individualSlice->seq(); - print $seq; - - # Get a subSlice of the Strain - my $subSlice_individual = - $individualSlice->sub_Slice( 5_000, 8_000, 1 ) - - # Compare two different individuals in the same Slice - my $sliceIndividual2 = $slice->get_by_Individual($individual_name2); - my $differences = - $individualSlice->get_all_differences_IndividualSlice( - $sliceIndividual2); - - foreach my $af ( @{$differences} ) { - print - "There is a difference between $individual_name " - . "and $individual_name2 at ", - $af->start, "-", $af->end, - " with allele ", $af->allele_string(), "\n"; - } - -=head1 DESCRIPTION - -A IndividualSlice object represents a region of a genome for a certain -individual. It can be used to retrieve sequence or features from a -individual. - -=head1 METHODS - -=cut - -package Bio::EnsEMBL::IndividualSlice; -use vars qw(@ISA); -use strict; - -use Bio::EnsEMBL::Utils::Argument qw(rearrange); -use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp); -use Bio::EnsEMBL::Slice; -use Bio::EnsEMBL::Mapper; -use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning); - -@ISA = qw(Bio::EnsEMBL::Slice); - - -=head2 new - - Arg [1..N] : List of named arguments - Bio::EnsEMBL::CoordSystem COORD_SYSTEM - string SEQ_REGION_NAME, - int START, - int END, - string VERSION (optional, defaults to '') - int STRAND, (optional, defaults to 1) - Bio::EnsEMBL::DBSQL::SliceAdaptor ADAPTOR (optional) - Arg[N+1] : string $individual_name - Example : $individualSlice = Bio::EnsEMBL::IndividualSlice->new(-coord_system => $cs, - -start => 1, - -end => 10000, - -strand => 1, - -seq_region_name => 'X', - -seq_region_length => 12e6, - -individual_name => $individual_name); - Description : Creates a new Bio::EnsEMBL::IndividualSlice object that will contain a shallow copy of the - Slice object, plus additional information such as the individual this Slice refers to - and listref of Bio::EnsEMBL::Variation::AlleleFeatures of differences with the - reference sequence - ReturnType : Bio::EnsEMBL::IndividualSlice - Exceptions : none - Caller : general - -=cut - -sub new{ - my $caller = shift; - deprecate("new is deprecated and will be removed in e95."); - my $class = ref($caller) || $caller; - - #create the IndividualSlice object as the Slice, plus the individual attribute - my ($individual_name, $sample_id) = rearrange(['INDIVIDUAL', 'SAMPLE_ID'],@_); - - my $self = $class->SUPER::new(@_); - - $self->{'individual_name'} = $individual_name; - $self->{'sample_id'} = $sample_id; - - return $self; - -} - -=head2 individual_name - - Arg [1] : (optional) string $individual_name - Example : my $individual_name = $individualSlice->individual_name(); - Description : Getter/Setter for the name of the individual in the slice - ReturnType : string - Exceptions : none - Caller : general - -=cut - -sub individual_name{ - my $self = shift; - deprecate("individual_name is deprecated and will be removed in e95."); - - if (@_){ - $self->{'individual_name'} = shift @_; - } - return $self->{'individual_name'}; -} - -=head2 seq - - Arg [1] : none - Example : print "SEQUENCE = ", $strainSlice->seq(); - Description: Returns the sequence of the region represented by this - StrainSlice formatted as a string. - Returntype : string - Exceptions : none - Caller : general - -=cut - -sub seq { - my $self = shift; - deprecate("seq is deprecated and will be removed in e95."); - - # special case for in-between (insert) coordinates - return '' if($self->start() == $self->end() + 1); - - return $self->{'seq'} if($self->{'seq'}); - - if($self->adaptor()) { - my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor(); - my $reference_sequence = $seqAdaptor->fetch_by_Slice_start_end_strand($self,1,undef,1); #get the reference sequence for that slice - #apply all differences to the reference sequence - - # sort edits in reverse order to remove complication of - # adjusting downstream edits - my @allele_features_ordered; - @allele_features_ordered = sort {$b->start() <=> $a->start() || $b->end() <=> $a->end()} @{$self->{'alleleFeatures'}} if (defined $self->{'alleleFeatures'}); - - foreach my $af (@allele_features_ordered){ - $af->apply_edit($reference_sequence); #change, in the reference sequence, the af - } -# return substr(${$reference_sequence},0,1) if ($self->length == 1); - return ${$reference_sequence}; #returns the reference sequence, applying the alleleFeatures - } - - # no attached sequence, and no db, so just return Ns - return 'N' x $self->length(); -} - -=head2 get_all_differences_Slice - - Args : none - Example : my $differences = $individualSlice->get_all_differences_Slice() - Description : Gets all differences between the IndividualSlice object and the Slice is defined - ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature - Exceptions : none - Caller : general - -=cut - -sub get_all_differences_Slice{ - my $self = shift; - deprecate("get_all_differences_Slice is deprecated and will be removed in e95."); - - my $differences; #reference to the array with the differences between Slice and StrainSlice - my $ref_allele; - foreach my $difference (@{$self->{'alleleFeatures'}}){ - if ($difference->length_diff == 0){ - #the difference is a SNP, check if it is the same as the reference allele - $ref_allele = $self->SUPER::subseq($difference->start,$difference->end,$difference->strand); - $ref_allele = '-' if ($ref_allele eq ''); - if ($ref_allele ne $difference->allele_string){ - #when the alleleFeature is different from the reference allele, add to the differences list - push @{$differences},$difference; - } - } - else{ - push @{$differences},$difference; - } - } - - return $differences; - -} - -=head2 get_all_differences_IndividualSlice - - Arg[1] : Bio::EnsEMBL::IndividualSlice $is - Example : my $differences = $individualSlice->get_all_differences_IndividualSlice($individualslice) - Description : Gets differences between 2 IndividualSlice objects - ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature - Exceptions : thrown on bad argument - Caller : general - -=cut - -sub get_all_differences_IndividualSlice{ - my $self = shift; - deprecate("get_all_differences_IndividualSlice is deprecated and will be removed in e95."); - - my $individualSlice = shift; - - if (!ref($individualSlice) || !$individualSlice->isa('Bio::EnsEMBL::IndividualSlice')){ - throw('Bio::EnsEMBL::IndividualSlice arg expected'); - } - if ( @{$self->{'alleleFeatures'}} == 0 && @{$individualSlice->{'alleleFeatures'}} == 0){ - return undef; #there are no differences in any of the Individuals - - } - my $differences; #differences between individuals - if (@{$individualSlice->{'alleleFeatures'}} == 0){ - #need to create a copy of alleleFeature for the first Individual - foreach my $difference (@{$self->{'alleleFeatures'}}){ - my %vf = %$difference; - push @{$differences},bless \%vf,ref($difference); - } - } - elsif (@{$self->{'alleleFeatures'}} == 0){ - #need to create a copy of AlleleFeature, but changing the allele by the allele in the reference sequence - foreach my $difference (@{$individualSlice->{'alleleFeatures'}}){ - push @{$differences}, $individualSlice->_convert_difference($difference); - } - } - else{ - #both individuals have differences - #create a hash with the differences in the first slice - my %allele_features_self = map {$_->start.'-'.$_->end => $_} @{$self->{'alleleFeatures'}}; - foreach my $difference (@{$individualSlice->{'alleleFeatures'}}){ - #there is no difference in the other individual slice, convert the allele - if (!defined $allele_features_self{$difference->start.'-'.$difference->end}){ - push @{$differences},$individualSlice->_convert_difference($difference); - } - else{ - #if it is defined and have the same allele, delete from the hash since it is not a difference - #between the individuals - if ($allele_features_self{$difference->start.'-'.$difference->end}->allele_string eq $difference->allele_string){ - delete $allele_features_self{$difference->start.'-'.$difference->end}; - } - } - } - #and finally, make a shallow copy of the differences in the first individual - foreach my $difference (values %allele_features_self){ - my %vf = %$difference; - push @{$differences},bless \%vf,ref($difference); - } - - } - #need to map differences to the first individual, self, since the coordinates are in the Slice coordinate system - my $mapper = $self->mapper(); #now that we have the differences, map them in the IndividualSlice - my @results; - foreach my $difference (@{$differences}){ - @results = $mapper->map_coordinates('Slice',$difference->start,$difference->end,$difference->strand,'Slice'); - #we can have 3 possibilities: - #the difference is an insertion and when mapping returns the boundaries of the insertion in the IndividualSlice - if (@results == 2){ - #the first position in the result is the beginning of the insertion - if($results[0]->start < $results[1]->start){ - $difference->start($results[0]->end+1); - $difference->end($results[1]->start-1); - } - else{ - #it is the second position the beginning of the insertion - $difference->start($results[1]->end+1); - $difference->end($results[0]->start-1); - } - $difference->strand($results[0]->strand); - } - else{ - #it can be either a SNP or a deletion, and we have the coordinates in the result, etither a Bio::EnsEMBL::Mapper::Coordinate - # or a Bio::EnsEMBL::Mapper::IndelCoordinate - $difference->start($results[0]->start); - $difference->end($results[0]->end); - $difference->strand($results[0]->strand); - } - } - - return $differences; -} - -#for a given AlleleFeature, converts the allele into the reference allele and returns -#the converted AlleleFeature - -sub _convert_difference{ - my $self = shift; - deprecate("_convert_difference is deprecated and will be removed in e95."); - - my $difference = shift; - my %new_af = %$difference; #make a copy of the alleleFeature - #and change the allele with the one from the reference Slice - $new_af{'allele_string'} = $self->SUPER::subseq($difference->start,$difference->end,$difference->strand); - return bless \%new_af,ref($difference); -} - -=head2 mapper - - Args : none - Description: Getter for the mapper between the between the IndividualSlice and the Slice it refers to. - It is done automatically when necessary to create subSlice or to get the differences between individuals - Returntype : Bio::EnsEMBL::Mapper - Exceptions : none - Caller : Internal function - -=cut - -sub mapper{ - my $self = shift; - deprecate("mapper is deprecated and will be removed in e95."); - - if (@_) { - #allow to create again the mapper - delete $self->{'mapper'}; - } - if(!defined $self->{'mapper'}){ - #create the mapper between the Slice and StrainSlice - my $mapper = Bio::EnsEMBL::Mapper->new('Slice','IndividualSlice'); - #align with Slice - #get all the VariationFeatures in the Individual Slice, from start to end in the Slice - my @allele_features_ordered; - @allele_features_ordered = sort {$a->start() <=> $b->start() || $b->end() <=> $a->end()} @{$self->{'alleleFeatures'}} if (defined $self->{'alleleFeatures'}); - - my $start_slice = 1; - my $end_slice; - my $start_individual = 1; - my $end_individual; - my $length_allele; - my $total_length_diff = 0; - #we will walk from left to right in the slice object, updating the start and end individual every time - #there is a new alleleFeature in the Individual - foreach my $allele_feature (@allele_features_ordered){ - #we have a insertion/deletion: marks the beginning of new slice move coordinates - if ($allele_feature->length_diff != 0){ - $total_length_diff += $allele_feature->length_diff; - $length_allele = $allele_feature->length + $allele_feature->length_diff(); #length of the allele in the Individual - $end_slice = $allele_feature->start() - 1; #set the end of the slice before the alleleFeature - if ($end_slice >= $start_slice){ - #normal cases (not with gaps) - $end_individual = $end_slice - $start_slice + $start_individual; #set the end of the individual from the beginning plus the offset - #add the sequence that maps - $mapper->add_map_coordinates('Slice',$start_slice,$end_slice,1,'IndividualSlice',$start_individual,$end_individual); - #and add the indel - $mapper->add_indel_coordinates('Slice',$end_slice+1,$end_slice + $allele_feature->length,1,'IndividualSlice',$end_individual+1,$end_individual + $length_allele); - $start_individual = $end_individual + $length_allele + 1; #set the beginning of the individual after the allele - } - else{ - #add the indel - $mapper->add_indel_coordinates('Slice',$end_slice+1,$end_slice + $allele_feature->length,1,'IndividualSlice',$end_individual+1,$end_individual + $length_allele); - $start_individual += $length_allele; - } - $start_slice = $end_slice + $allele_feature->length+ 1; #set the beginning of the slice after the variation feature - } - } - if ($start_slice <= $self->length){ - #if we haven't reached the end of the IndividualSlice, add the final map coordinates between the individual and the slice - $mapper->add_map_coordinates('Slice',$start_slice,$self->length,1,'IndividualSlice',$start_individual,$start_individual + $self->length - $start_slice); - } - - $mapper->add_map_coordinates('Slice', -$self->start+1, 0,1, 'IndividualSlice', -$self->start +1,0) if ($self->start > 0); #before individualSlice - $mapper->add_map_coordinates('Slice', $self->length + 1,$self->seq_region_length - ($self->length +1),1, 'IndividualSlice', $self->length + 1 + $total_length_diff,$self->seq_region_length + $total_length_diff - ($self->length +1) ) if ($self->length <= $self->seq_region_length); #after strainSlice - $self->{'mapper'} = $mapper; - } - return $self->{'mapper'}; -} - -=head2 sub_Slice - - Arg 1 : int $start - Arg 2 : int $end - Arge [3] : int $strand - Example : none - Description: Makes another IndividualSlice that covers only part of this IndividualSlice - with the appropriate differences to the reference Slice - If a slice is requested which lies outside of the boundaries - of this function will return undef. This means that - behaviour will be consistant whether or not the slice is - attached to the database (i.e. if there is attached sequence - to the slice). Alternatively the expand() method or the - SliceAdaptor::fetch_by_region method can be used instead. - Returntype : Bio::EnsEMBL::IndividualSlice or undef if arguments are wrong - Exceptions : thrown when trying to get the subSlice in the middle of a - insertion - Caller : general - -=cut - -sub sub_Slice { - my ( $self, $start, $end, $strand ) = @_; - deprecate("sub_Slice is deprecated and will be removed in e95."); - - my $mapper = $self->mapper(); - #map from the Individual to the Slice to get the sub_Slice, and then, apply the differences in the subSlice - my @results = $mapper->map_coordinates('IndividualSlice',$start,$end,$strand,'IndividualSlice'); - my $new_start; - my $new_end; - my $new_strand; - my $new_seq; - #Get need start and end for the subSlice of the IndividualSlice - my @results_ordered = sort {$a->start <=> $b->start} grep {ref($_) eq 'Bio::EnsEMBL::Mapper::Coordinate'} @results; - $new_start = $results_ordered[0]->start(); - $new_strand = $results_ordered[0]->strand() if (ref($results_ordered[0]) eq 'Bio::EnsEMBL::Mapper::Coordinate'); -# $new_strand = $results_ordered[-1]->strand() if (ref($results_ordered[-1]) eq 'Bio::EnsEMBL::Mapper::Coordinate'); - $new_end = $results_ordered[-1]->end(); #get last element of the array, the end of the slice - - my $subSlice = $self->SUPER::sub_Slice($new_start,$new_end,$new_strand); - $subSlice->{'individual_name'} = $self->{'individual_name'}; - - my $new_alleles; #reference to an array that will contain the variationFeatures in the new subSlice - #update the VariationFeatures in the sub_Slice of the Individual - my %af; - my $new_allele_feature; - foreach my $alleleFeature (@{$self->{'alleleFeatures'}}){ - $new_allele_feature = $alleleFeature->transfer($subSlice); - #only transfer the coordinates to the SubSlice that are within the boundaries - if ($new_allele_feature->start >= 1 && $new_allele_feature->end <= $subSlice->length){ - push @{$new_alleles}, $new_allele_feature; - } - } - $subSlice->{'alleleFeatures'} = $new_alleles; - return $subSlice; - -} - -=head2 subseq - - Arg [1] : int $startBasePair - relative to start of slice, which is 1. - Arg [2] : int $endBasePair - relative to start of slice. - Arg [3] : (optional) int $strand - The strand of the individual slice to obtain sequence from. Default - value is 1. - Description: returns string of dna sequence - Returntype : txt - Exceptions : end should be at least as big as start - strand must be set - Caller : general - -=cut - -sub subseq { - my ( $self, $start, $end, $strand ) = @_; - deprecate("subseq is deprecated and will be removed in e95."); - - if ( $end+1 < $start ) { - throw("End coord + 1 is less than start coord"); - } - - # handle 'between' case for insertions - return '' if( $start == $end + 1); - - $strand = 1 unless(defined $strand); - - if ( $strand != -1 && $strand != 1 ) { - throw("Invalid strand [$strand] in call to Slice::subseq."); - } - - my $subseq; - my $seq; - if($self->adaptor){ - my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor(); - $subseq = ${$seqAdaptor->fetch_by_Slice_start_end_strand($self,$start,$end,$strand)}; #get the reference sequence for that slice - #apply all differences to the reference sequence - # sort edits in reverse order to remove complication of - # adjusting downstream edits - my @allele_features_ordered; - @allele_features_ordered = sort {$b->start() <=> $a->start() || $b->end() <=> $a->end()} @{$self->{'alleleFeatures'}} if (defined $self->{'alleleFeatures'}); - my $af_start; - my $af_end; - foreach my $af (@allele_features_ordered){ - if (($af->start - $start +1 > 0) && ($end - $af->end > 0)){ - #save the current start and end of the alleleFeature before changing for apply_edit - $af_start = $af->start; - $af_end = $af->end; - #apply the difference if the feature is in the new slice - $af->start($af->start - $start +1); - $af->end($af->end - $start +1); - $af->apply_edit(\$subseq); #change, in the reference sequence, the af - #restore the initial values of alleleFeature start and end - $af->start($af_start); - $af->end($af_end); - - } - } - } - else { - ## check for gap at the beginning and pad it with Ns - if ($start < 1) { - $subseq = "N" x (1 - $start); - $start = 1; - } - $subseq .= substr ($self->seq(), $start-1, $end - $start + 1); - ## check for gap at the end and pad it with Ns - if ($end > $self->length()) { - $subseq .= "N" x ($end - $self->length()); - } - reverse_comp(\$subseq) if($strand == -1); - } - return $subseq; - -} - -=head2 get_all_Transcripts - - Args : None - Example : @transcripts = @{$individualslice->get_all_Transcripts)}; - Description: Gets all transcripts which overlap this Individual Slice. If you want to - specify a particular analysis or type, then you are better off - using get_all_Genes or get_all_Genes_by_type and iterating - through the transcripts of each gene. - Returntype : reference to a list of Bio::EnsEMBL::Transcripts - Exceptions : none - Caller : general - -=cut - -sub get_all_Transcripts { - my $self = shift; - deprecate("get_all_Transcripts is deprecated and will be removed in e95."); - - my $transcripts = $self->SUPER::get_all_Transcripts(1); - $self->map_to_Individual($transcripts); - - return $transcripts; -} - - -=head2 get_all_Exons - - Arg [1] : (optional) string $dbtype - The dbtype of exons to obtain. This assumes that the db has - been added to the DBAdaptor under this name (using the - DBConnection::add_db_adaptor method). - Example : @exons = @{$individualSlice->get_all_Exons}; - Description: Gets all exons which overlap this IndividualSlice. Note that these exons - will not be associated with any transcripts, so this may not - be terribly useful. - Returntype : reference to a list of Bio::EnsEMBL::Exons - Exceptions : none - Caller : general - -=cut - -sub get_all_Exons { - my $self = shift; - my $dbtype = shift; - - deprecate("get_all_Exons is deprecated and will be removed in e95."); - - my $exons = $self->SUPER::get_all_Exons($dbtype); - $self->map_to_Individual($exons); #map the exons to the Individual - - return $exons; -} - -=head2 get_all_Genes - - Arg [1] : (optional) string $logic_name - The name of the analysis used to generate the genes to retrieve - Arg [2] : (optional) string $dbtype - The dbtype of genes to obtain. This assumes that the db has - been added to the DBAdaptor under this name (using the - DBConnection::add_db_adaptor method). - Example : @genes = @{$individualSlice->get_all_Genes}; - Description: Retrieves all genes that overlap this slice. - Returntype : listref of Bio::EnsEMBL::Genes - Exceptions : none - Caller : none - -=cut - -sub get_all_Genes{ - my ($self, $logic_name, $dbtype) = @_; - deprecate("get_all_Genes is deprecated and will be removed in e95."); - - my $genes = $self->SUPER::get_all_Genes($logic_name, $dbtype, 1); - - $self->map_to_Individual($genes); - - foreach my $gene (@{$genes}){ - $self->map_to_Individual($gene->get_all_Exons); #map the Exons to the Individual - $self->map_to_Individual($gene->get_all_Transcripts); #map the Transcripts to the Individual - } - - return $genes; -} - -=head2 map_to_Individual - - Arg[1] : ref $features - Example : $individualSlice->map_to_Individual($exons); - Description : Gets the features from the Slice and maps it in the IndividualSlice, using the mapper - between Slice and IndividualSlice - ReturnType : None - Exceptions : None - Caller : general - -=cut - -sub map_to_Individual{ - my $self = shift; - my $features = shift; - deprecate("map_to_Individual is deprecated and will be removed in e95."); - - my $mapper = $self->mapper(); - my (@results, @results_ordered, $new_start, $new_end, $new_strand); - #foreach of the transcripts, map them to the IndividualSlice and replace the Slice with the IndividualSlice - foreach my $feature (@{$features}){ - $feature->slice($self); #replace the IndividualSlice as the Slice for this feature (the Slice plus the AlleleFeatures) - #map from the Slice to the Individual Slice - my @results = $mapper->map_coordinates('Slice',$feature->start,$feature->end,$feature->strand,'Slice'); - #from the results, order them but filter out those that are not coordinates - @results_ordered = sort {$a->start <=> $b->start} grep {ref($_) eq 'Bio::EnsEMBL::Mapper::Coordinate'} @results; - $new_start = $results_ordered[0]->start(); - $new_strand = $results_ordered[0]->strand(); - $new_end = $results_ordered[-1]->end(); #get last element of the array, the end of the slice - $feature->start($new_start); #update new coordinates - $feature->end($new_end); - $feature->strand($new_strand); - } -} - -sub alleleFeatures{ - my $self = shift; - deprecate("alleleFeatures is deprecated and will be removed in e95."); - - return $self->{'alleleFeatures'}; -} - -sub add_AlleleFeature{ - my $self = shift; - deprecate("add_AlleleFeatures is deprecated and will be removed in e95."); - - if (@_){ - if(!ref($_[0]) || !$_[0]->isa('Bio::EnsEMBL::Variation::AlleleFeature')) { - throw("Bio::EnsEMBL::Variation::AlleleFeature argument expected"); - } - #add the alleleFeature to the individualSlice - push @{$self->{'alleleFeatures'}},shift; - } -} -1; diff --git a/modules/Bio/EnsEMBL/IndividualSliceFactory.pm b/modules/Bio/EnsEMBL/IndividualSliceFactory.pm deleted file mode 100644 index 4ce38fb9753b93ee57954175694a26635123be98..0000000000000000000000000000000000000000 --- a/modules/Bio/EnsEMBL/IndividualSliceFactory.pm +++ /dev/null @@ -1,174 +0,0 @@ -=head1 LICENSE - -Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute -Copyright [2016-2018] EMBL-European Bioinformatics Institute - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -=cut - - -=head1 CONTACT - - Please email comments or questions to the public Ensembl - developers list at <http://lists.ensembl.org/mailman/listinfo/dev>. - - Questions may also be sent to the Ensembl help desk at - <http://www.ensembl.org/Help/Contact>. - -=cut - -package Bio::EnsEMBL::IndividualSliceFactory; - -use strict; -use Bio::EnsEMBL::Utils::Argument qw(rearrange); -use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp); -use Bio::EnsEMBL::Slice; -use Bio::EnsEMBL::Mapper; -use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning); -use Scalar::Util qw(weaken); - -=head2 new -=cut - -sub new{ - my $caller = shift; - deprecate("new is deprecated and will be removed in e95."); - my $class = ref($caller) || $caller; - - #creates many IndividualSlice objects from the Population - - my ($population_name, $coord_system, $start, $end, $strand, $seq_region_name, $seq_region_length, $adaptor) = rearrange(['POPULATION', 'COORD_SYSTEM','START','END','STRAND','SEQ_REGION_NAME','SEQ_REGION_LENGTH', 'ADAPTOR'],@_); - - my $self = bless { - population_name => $population_name, - coord_system => $coord_system, - start => $start, - end => $end, - strand => $strand, - seq_region_name => $seq_region_name, - seq_region_length => $seq_region_length},$class; - - $self->adaptor($adaptor); - return $self; -} - -sub adaptor { - my $self = shift; - deprecate("adaptor is deprecated and will be removed in e95."); - - if(@_) { - my $ad = shift; - if($ad && (!ref($ad) || !$ad->isa('Bio::EnsEMBL::DBSQL::BaseAdaptor'))) { - throw('Adaptor argument must be a Bio::EnsEMBL::DBSQL::BaseAdaptor'); - } - weaken($self->{'adaptor'} = $ad); - } - - return $self->{'adaptor'} -} - -sub get_all_IndividualSlice{ - my $self = shift; - deprecate("get_all_IndividualSlice is deprecated and will be removed in e95."); - - my $slice; - if(!$self->adaptor) { - warning('Cannot get IndividualSlice features without attached adaptor'); - return ''; - } - my $variation_db = $self->adaptor->db->get_db_adaptor('variation'); - - unless($variation_db) { - warning("Variation database must be attached to core database to " . - "retrieve variation information" ); - return ''; - } - #get the AlleleFeatures in the Population - my $af_adaptor = $variation_db->get_AlleleFeatureAdaptor; - - if( $af_adaptor ) { - #set the adaptor to retrieve data from genotype table - $af_adaptor->from_IndividualSlice(1); - #get the Individual for the given strain - my $population_adaptor = $variation_db->get_PopulationAdaptor; - my $individual_adaptor = $variation_db->get_IndividualAdaptor; - if ($population_adaptor && $individual_adaptor){ - $slice = Bio::EnsEMBL::Slice->new(-coord_system => $self->{'coord_system'}, - -start => $self->{'start'}, - -end => $self->{'end'}, - -strand => $self->{'strand'}, - -seq_region_name => $self->{'seq_region_name'}, - -seq_region_length => $self->{'seq_region_length'}, - -adaptor => $self->adaptor - ); - my $population = $population_adaptor->fetch_by_name($self->{'population_name'}); - #check that there is such population in the database - if (defined $population){ - #get all the AlleleFeatures in the $population and the Slice given - my $allele_features = $af_adaptor->fetch_all_by_Slice($slice,$population); - #get Individuals in the Population - my $individuals = $individual_adaptor->fetch_all_by_Population($population); - return $self->_rearrange_Individuals_Alleles($individuals,$allele_features); - } - else{ - warning("Population not in the database"); - return ''; - - } - } - else{ - warning("Not possible to retrieve PopulationAdaptor from the variation database"); - return ''; - } - } - - else{ - warning("Not possible to retrieve AlleleFeatureAdaptor from variation database"); - return ''; - } -} - -sub _rearrange_Individuals_Alleles{ - my $self = shift; - deprecate("_rearrange_Individuals_Alleles is deprecated and will be removed in e95."); - - my $individuals = shift; - my $allele_features; - my $individual_slice; - #create the hash with all the individuals - my %individuals_ids; - #foreach of the individual, create the IndividualSlice object and add it to the mapping hash - foreach my $individual (@{$individuals}){ - $individual_slice = Bio::EnsEMBL::Variation::IndividualSlice->new( - -coord_system => $self->{'coord_system'}, - -start => $self->{'$start'}, - -end => $self->{'end'}, - -strand => $self->{'strand'}, - -seq_region_name => $self->{'seq_region_name'}, - -seq_region_length => $self->{'seq_region_length'}, - -individual => $individual->name); - - $individuals_ids{$individual->dbID} = $individual_slice; - } - - #and rearrange all the AlleleFeatures to the individuals - foreach my $allele_feature (@{$allele_features}){ - $individuals_ids{$allele_feature->{'_sample_id'}}->add_AlleleFeature($allele_feature); - } - my @result = values %individuals_ids; - return \@result; -} - - -1; diff --git a/modules/Bio/EnsEMBL/MappedSlice.pm b/modules/Bio/EnsEMBL/MappedSlice.pm index ddeaeadd5e92d42cde5f12a293bbc92dd42a49bd..b4e4995eb9640e16642f8836396dee7f4aac9599 100644 --- a/modules/Bio/EnsEMBL/MappedSlice.pm +++ b/modules/Bio/EnsEMBL/MappedSlice.pm @@ -127,7 +127,6 @@ by an adaptor/factory. Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor Bio::EnsEMBL::Compara::AlignSlice Bio::EnsEMBL::Compara::AlignSlice::Slice - Bio::EnsEMBL::AlignStrainSlice Bio::EnsEMBL::StrainSlice =cut diff --git a/modules/Bio/EnsEMBL/MappedSliceContainer.pm b/modules/Bio/EnsEMBL/MappedSliceContainer.pm index bb11b462d8c2199aa90fb2e96fde5e8928917598..70d8d3426d1c3236ce4e79f8aa350b816e6e47d4 100644 --- a/modules/Bio/EnsEMBL/MappedSliceContainer.pm +++ b/modules/Bio/EnsEMBL/MappedSliceContainer.pm @@ -110,7 +110,6 @@ mapper on the existing MappedSlices. Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor Bio::EnsEMBL::Compara::AlignSlice Bio::EnsEMBL::Compara::AlignSlice::Slice - Bio::EnsEMBL::AlignStrainSlice Bio::EnsEMBL::StrainSlice =cut diff --git a/modules/Bio/EnsEMBL/Slice.pm b/modules/Bio/EnsEMBL/Slice.pm index 53fd8e766ae2f9d8bb23ae252b7e8bd4b01b9ab6..4905a9725cb8f1cefbf64a4f934b8b814477b44c 100644 --- a/modules/Bio/EnsEMBL/Slice.pm +++ b/modules/Bio/EnsEMBL/Slice.pm @@ -84,8 +84,6 @@ use Bio::EnsEMBL::Registry; use Bio::EnsEMBL::Utils::Iterator; use Bio::EnsEMBL::DBSQL::MergedAdaptor; -#use Bio::EnsEMBL::IndividualSlice; -#use Bio::EnsEMBL::IndividualSliceFactory; use Bio::EnsEMBL::Mapper::RangeRegistry; use Bio::EnsEMBL::SeqRegionSynonym; use Scalar::Util qw(weaken isweak); @@ -1987,250 +1985,6 @@ sub get_all_VariationFeatures_by_Population { return []; } -=head2 get_all_IndividualSlice - Deprecated. The method will be removed in e95. - Args : none - Example : my $individualSlice = $slice->get_by_Population($population); - Description : Gets the specific Slice for all the individuls in the population - ReturnType : listref of Bio::EnsEMB::IndividualSlice - Exceptions : none - Caller : general - -=cut - -sub get_all_IndividualSlice{ - my $self = shift; - deprecate("get_all_IndividualSlice is deprecated and will be removed in e95."); - - my $individualSliceFactory = Bio::EnsEMBL::IndividualSliceFactory->new( - -START => $self->{'start'}, - -END => $self->{'end'}, - -STRAND => $self->{'strand'}, - -ADAPTOR => $self->adaptor(), - -SEQ_REGION_NAME => $self->{'seq_region_name'}, - -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, - -COORD_SYSTEM => $self->{'coord_system'}, - ); - return $individualSliceFactory->get_all_IndividualSlice(); -} - -=head2 get_by_Individual - Deprecated. The method will be removed in e95. - Arg[1] : Bio::EnsEMBL::Variation::Individual $individual - Example : my $individualSlice = $slice->get_by_Individual($individual); - Description : Gets the specific Slice for the individual - ReturnType : Bio::EnsEMB::IndividualSlice - Exceptions : none - Caller : general - -=cut - -sub get_by_Individual{ - my $self = shift; - my $individual = shift; - deprecate("get_by_Individual is deprecated and will be removed in e95."); - - return Bio::EnsEMBL::IndividualSlice->new( - -START => $self->{'start'}, - -END => $self->{'end'}, - -STRAND => $self->{'strand'}, - -ADAPTOR => $self->adaptor(), -# -SEQ => $self->{'seq'}, - -SEQ_REGION_NAME => $self->{'seq_region_name'}, - -SEQ_REGION_LENGTH => $self->{'seq_region_length'}, - -COORD_SYSTEM => $self->{'coord_system'}, - -INDIVIDUAL => $individual); - -} - - -sub calculate_theta { - my $self = shift; - my $strains = shift; - my $feature = shift; #optional parameter. Name of the feature in the Slice you want to calculate - - deprecate("calculate_theta is deprecated and will be removed in e95."); - - if(!$self->adaptor()) { - warning('Cannot get variation features without attached adaptor'); - return 0; - } - my $variation_db = $self->adaptor->db->get_db_adaptor('variation'); - - unless($variation_db) { - warning("Variation database must be attached to core database to " . - "retrieve variation information" ); - return 0; - } - - #need to get coverage regions for the slice in the different strains - my $coverage_adaptor = $variation_db->get_ReadCoverageAdaptor; - my $strain; - my $differences = []; - my $slices = []; - if ($coverage_adaptor){ - my $num_strains = scalar(@{$strains}) +1; - if (!defined $feature){ - #we want to calculate for the whole slice - push @{$slices}, $self; #add the slice as the slice to calculate the theta value - } - else{ - #we have features, get the slices for the different features - my $features = $self->get_all_Exons(); - map {push @{$slices},$_->feature_Slice} @{$features}; #add the slices of the features - } - my $length_regions = 0; - my $snps = 0; - my $theta = 0; - my $last_position = 0; - #get all the differences in the slice coordinates - foreach my $strain_name (@{$strains}){ - my $strain = $self->get_by_strain($strain_name); #get the strainSlice for the strain - - my $results = $strain->get_all_differences_Slice; - push @{$differences}, @{$results} if (defined $results); - } - #when we finish, we have, in max_level, the regions covered by all the sample - #sort the differences by the genomic position - my @differences_sorted = sort {$a->start <=> $b->start} @{$differences}; - foreach my $slice (@{$slices}){ - my $regions_covered = $coverage_adaptor->fetch_all_regions_covered($slice,$strains); - if (defined $regions_covered){ - foreach my $range (@{$regions_covered}){ - $length_regions += ($range->[1] - $range->[0]) + 1; #add the length of the genomic region - for (my $i = $last_position;$i<@differences_sorted;$i++){ - if ($differences_sorted[$i]->start >= $range->[0] && $differences_sorted[$i]->end <= $range->[1]){ - $snps++; #count differences in the region - } - elsif ($differences_sorted[$i]->end > $range->[1]){ - $last_position = $i; - last; - } - } - } - #when all the ranges have been iterated, calculate rho - #this is an intermediate variable called a in the formula - # a = sum i=2..strains 1/i-1 - } - } - my $a = _calculate_a($num_strains); - $theta = $snps / ($a * $length_regions); - return $theta; - } - else{ - return 0; - } -} - -sub _calculate_a { - my $max_level = shift; - deprecate("_calculate_a is deprecated and will be removed in e95."); - - my $a = 0; - for (my $i = 2; $i <= $max_level+1;$i++){ - $a += 1/($i-1); - } - return $a; -} - -sub calculate_pi { - my $self = shift; - my $strains = shift; - my $feature = shift; - deprecate("calculate_pi is deprecated and will be removed in e95."); - - if(!$self->adaptor()) { - warning('Cannot get variation features without attached adaptor'); - return 0; - } - my $variation_db = $self->adaptor->db->get_db_adaptor('variation'); - - unless($variation_db) { - warning("Variation database must be attached to core database to " . - "retrieve variation information" ); - return 0; - } - - #need to get coverage regions for the slice in the different strains - my $coverage_adaptor = $variation_db->get_ReadCoverageAdaptor; - my $differences = []; - my $slices = []; - if ($coverage_adaptor){ - my $num_strains = scalar(@{$strains}) +1; - if (!defined $feature){ - #we want to calculate for the whole slice - push @{$slices}, $self; #add the slice as the slice to calculate the theta value - } - else{ - #we have features, get the slices for the different features - my $features = $self->get_all_Exons(); - map {push @{$slices},$_->feature_Slice} @{$features}; #add the slices of the features - } - my @range_differences = (); - my $pi = 0; - my $regions = 0; - my $last_position = 0; #last position visited in the sorted list of differences - my $triallelic = 0; - my $is_triallelic = 0; - foreach my $slice (@{$slices}){ - foreach my $strain_name (@{$strains}){ - my $strain = $slice->get_by_strain($strain_name); #get the strainSlice for the strain - my $results = $strain->get_all_differences_Slice; - push @{$differences}, @{$results} if (defined $results); - } - my @differences_sorted = sort {$a->start <=> $b->start} @{$differences}; - - my $regions_covered = $coverage_adaptor->fetch_all_regions_covered($slice,$strains); - #when we finish, we have, in max_level, the regions covered by all the sample - #sort the differences - if (defined $regions_covered){ - foreach my $range (@{$regions_covered}){ - for (my $i = $last_position;$i<@differences_sorted;$i++){ - if ($differences_sorted[$i]->start >= $range->[0] && $differences_sorted[$i]->end <= $range->[1]){ - #check wether it is the same region or different - if (!defined $range_differences[0] || ($differences_sorted[$i]->start == $range_differences[0]->start)){ - if (defined $range_differences[0] && ($differences_sorted[$i]->allele_string ne $range_differences[0]->allele_string)){ - $is_triallelic = 1; - } - push @range_differences, $differences_sorted[$i]; - } - else{ - #new site, calc pi for the previous one - $pi += 2 * (@range_differences/($num_strains)) * ( 1 - (@range_differences/$num_strains)); - if ($is_triallelic) { - $triallelic++; - $is_triallelic = 0; - } - $regions++; - @range_differences = (); - #and start a new range - push @range_differences, $differences_sorted[$i]; - } - } - elsif ($differences_sorted[$i]->end > $range->[1]){ - $last_position = $i; - last; - } - } - #calculate pi for last site, if any - if (defined $range_differences[0]){ - $pi += 2 * (@range_differences/$num_strains) * ( 1 - (@range_differences/$num_strains)); - $regions++; - } - } - } - $pi = $pi / $regions; #calculate average pi - print "Regions with variations in region $regions and triallelic $triallelic\n\n"; - } - return $pi; - } - else{ - return 0; - } - -} - - =head2 get_all_Genes Arg [1] : (optional) string $logic_name diff --git a/modules/t/dependencies.t b/modules/t/dependencies.t index 32754e026148010f01cf01e86f37f191b171b6f6..f230d680069b112a1858a10eb1b016b850eceb12 100644 --- a/modules/t/dependencies.t +++ b/modules/t/dependencies.t @@ -32,7 +32,6 @@ my %result = map{$_ => 1} @result; my @exceptions = ('/Bio/EnsEMBL/Utils/TranscriptAlleles.pm', '/Bio/EnsEMBL/Utils/ensembl_init.example', - '/Bio/EnsEMBL/AlignStrainSlice', '/Bio/EnsEMBL/Feature', '/Bio/EnsEMBL/DBSQL/BaseFeatureAdaptor.pm', '/Bio/EnsEMBL/DBSQL/DensityFeatureAdaptor.pm',