Commit 9e8910b0 authored by William McLaren's avatar William McLaren
Browse files

Initial check in

parent 924d9559
=head1 LICENSE
Copyright (c) 1999-2009 The European Bioinformatics Institute and
Genome Research Limited. All rights reserved.
This software is distributed under a modified Apache license.
For license details, please see
http://www.ensembl.org/info/about/code_licence.html
=head1 CONTACT
Please email comments or questions to the public Ensembl
developers list at <ensembl-dev@ebi.ac.uk>.
Questions may also be sent to the Ensembl help desk at
<helpdesk@ensembl.org>.
=cut
=head1 NAME
Bio::EnsEMBL::DBSQL::StrainSliceAdaptor - adaptor/factory for MappedSlices
representing alternative assemblies
=head1 SYNOPSIS
my $slice =
$slice_adaptor->fetch_by_region( 'chromosome', 14, 900000, 950000 );
my $msc = Bio::EnsEMBL::MappedSliceContainer->new( -SLICE => $slice );
my $asa = Bio::EnsEMBL::DBSQL::StrainSliceAdaptor->new;
my ($mapped_slice) = @{ $asa->fetch_by_version( $msc, 'NCBIM36' ) };
=head1 DESCRIPTION
NOTE: this code is under development and not fully functional nor tested
yet. Use only for development.
This adaptor is a factory for creating MappedSlices representing
strains and attaching them to a MappedSliceContainer. A mapper will be created
to map between the reference slice and the common container slice coordinate
system.
=head1 METHODS
new
fetch_by_name
=head1 REALTED MODULES
Bio::EnsEMBL::MappedSlice
Bio::EnsEMBL::MappedSliceContainer
Bio::EnsEMBL::AlignStrainSlice
Bio::EnsEMBL::StrainSlice
=cut
package Bio::EnsEMBL::DBSQL::StrainSliceAdaptor;
use strict;
use warnings;
no warnings 'uninitialized';
use Bio::EnsEMBL::Utils::Argument qw(rearrange);
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::MappedSlice;
use Bio::EnsEMBL::Mapper;
use Bio::EnsEMBL::DBSQL::BaseAdaptor;
our @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);
=head2 new
Example : my $strain_slice_adaptor =
Bio::EnsEMBL::DBSQL::StrainSliceAdaptor->new;
Description : Constructor.
Return type : Bio::EnsEMBL::DBSQL::StrainSliceAdaptor
Exceptions : none
Caller : general
Status : At Risk
: under development
=cut
sub new {
my $caller = shift;
my $class = ref($caller) || $caller;
my $self = $class->SUPER::new(@_);
return $self;
}
=head2 fetch_by_name
Arg[1] : Bio::EnsEMBL::MappedSliceContainer $container - the container
to attach MappedSlices to
Arg[2] : String $name - the name of the strain to fetch
Example : my ($mapped_slice) = @{ $msc->fetch_by_name('Watson') };
Description : Creates a MappedSlice representing a version of the container's
reference slice with variant alleles from the named strain
Return type : listref of Bio::EnsEMBL::MappedSlice
Exceptions : thrown on wrong or missing arguments
Caller : general, Bio::EnsEMBL::MappedSliceContainer
Status : At Risk
: under development
=cut
sub fetch_by_name {
my $self = shift;
my $container = shift;
my $name = shift;
# argueent check
unless ($container and ref($container) and
$container->isa('Bio::EnsEMBL::MappedSliceContainer')) {
throw("Need a MappedSliceContainer.");
}
unless ($name) {
throw("Need a strain name.");
}
my $slice = $container->ref_slice;
# get a connection to the variation DB
my $variation_db = $self->db->get_db_adaptor('variation');
unless($variation_db) {
warning("Variation database must be attached to core database to retrieve variation information");
return '';
}
# now get an allele feature adaptor
my $af_adaptor = $variation_db->get_AlleleFeatureAdaptor;
# check we got it
unless(defined $af_adaptor) {
warning("Not possible to retrieve AlleleFeatureAdaptor from variation database");
return '';
}
# now get an individual adaptor
my $ind_adaptor = $variation_db->get_IndividualAdaptor;
# check we got it
unless(defined $ind_adaptor) {
warning("Not possible to retrieve IndividualAdaptor from variation database");
return '';
}
# fetch individual object for this strain name
my $ind = shift @{$ind_adaptor->fetch_all_by_name($name)};
# check we got a result
unless(defined $ind) {
warn("Strain ".$name." not found in the database");
return '';
}
## MAP STRAIN SLICE TO REF SLICE
################################
# get all allele features for this slice and individual
my @afs = sort {$a->start() <=> $b->start()} @{$af_adaptor->fetch_all_by_Slice($slice, $ind)};
# check we got some data
warning("No strain genotype data available for slice ".$slice->name." and strain ".$ind->name) if ! defined $afs[0];
# create a mapper
my $mapper = Bio::EnsEMBL::Mapper->new('mapped_slice', 'ref_slice');
# create a mapped_slice object
my $mapped_slice = Bio::EnsEMBL::MappedSlice->new(
-ADAPTOR => $self,
-CONTAINER => $container,
-NAME => $slice->name."\#strain_$name",
);
my $strain_slice = $slice->get_by_strain($ind->name);
my $start_slice = $slice->start;
my $start_strain = 1;
my $sr_name = $slice->seq_region_name;
#my $sr_name = 'ref_slice';
my ($end_slice, $end_strain, $allele_length);
my $indel_flag = 0;
my $total_length_diff = 0;
# go through each AF
foreach my $af(@afs) {
# find out if it changes the length
if($af->length_diff != 0) {
$indel_flag = 1;
$total_length_diff += $af->length_diff;
# get the allele length
$allele_length = $af->length + $af->length_diff();
$end_slice = $slice->start + $af->start() - 2;
if ($end_slice >= $start_slice){
$end_strain = $end_slice - $start_slice + $start_strain;
#add the sequence that maps
$mapper->add_map_coordinates('mapped_slice', $start_strain, $end_strain, 1, $sr_name, $start_slice, $end_slice);
#add the indel
$mapper->add_indel_coordinates('mapped_slice', $end_strain+1, $end_strain+$allele_length, 1, $sr_name,$end_slice+1,$end_slice + $af->length);
$start_strain = $end_strain + $allele_length + 1;
}
else{
#add the indel
$mapper->add_indel_coordinates('mapped_slice', $end_strain+1,$end_strain + $allele_length, 1, $sr_name,$end_slice+1,$end_slice + $af->length);
$start_strain += $allele_length;
}
$start_slice = $end_slice + $af->length+ 1;
}
}
# add the remaining coordinates (or the whole length if no indels found)
$mapper->add_map_coordinates('mapped_slice', $start_strain, $start_strain + ($slice->end - $start_slice), 1, $sr_name, $start_slice, $slice->end);
# add the slice/mapper pair
$mapped_slice->add_Slice_Mapper_pair($strain_slice, $mapper);
## MAP REF_SLICE TO CONTAINER SLICE
###################################
if($total_length_diff > 0) {
# create a new mapper
my $new_mapper = Bio::EnsEMBL::Mapper->new('ref_slice', 'container');
# get existing pairs
my @existing_pairs = $container->mapper->list_pairs('container', 1, $container->container_slice->length, 'container');
my @new_pairs = $mapper->list_pairs('mapped_slice', 1, $strain_slice->length(), 'mapped_slice');
# we need a list of indels (specifically inserts)
my @indels;
# go through existing first
foreach my $pair(@existing_pairs) {
if($pair->from->end - $pair->from->start != $pair->to->end - $pair->to->start) {
my $indel;
$indel->{'length_diff'} = ($pair->to->end - $pair->to->start) - ($pair->from->end - $pair->from->start);
# we're only interested in inserts here, not deletions
next unless $indel->{'length_diff'} > 0;
$indel->{'ref_start'} = $pair->from->start;
$indel->{'ref_end'} = $pair->from->end;
$indel->{'length'} = $pair->to->end - $pair->to->start;
push @indels, $indel;
}
}
# now new ones
foreach my $pair(@new_pairs) {
if($pair->from->end - $pair->from->start != $pair->to->end - $pair->to->start) {
my $indel;
$indel->{'length_diff'} = ($pair->from->end - $pair->from->start) - ($pair->to->end - $pair->to->start);
# we're only interested in inserts here, not deletions
next unless $indel->{'length_diff'} > 0;
$indel->{'ref_start'} = $pair->to->start;
$indel->{'ref_end'} = $pair->to->end;
$indel->{'length'} = $pair->from->end - $pair->from->start;
push @indels, $indel;
}
}
# sort them
@indels = sort {
$a->{'ref_start'} <=> $b->{'ref_start'} || # by position
$b->{'length_diff'} <=> $a->{'length_diff'} # then by length diff so we only keep the longest
} @indels;
# clean them
my @new_indels = ();
my $p = $indels[0];
push @new_indels, $indels[0] if scalar @indels;
for my $i(1..$#indels) {
my $c = $indels[$i];
if($c->{'ref_start'} != $p->{'ref_start'} && $c->{'ref_end'} != $p->{'ref_end'}) {
push @new_indels, $c;
$p = $c;
}
}
$start_slice = $slice->start;
$start_strain = 1;
$sr_name = $slice->seq_region_name;
#$sr_name = 'ref_slice';
foreach my $indel(@new_indels) {
$end_slice = $indel->{'ref_end'};
$end_strain = $start_strain + ($end_slice - $start_slice);
$allele_length = $indel->{'length'} + $indel->{'length_diff'};
$new_mapper->add_map_coordinates($sr_name, $start_slice, $end_slice, 1, 'container', $start_strain, $end_strain);
$new_mapper->add_indel_coordinates($sr_name,$end_slice+1,$end_slice + $indel->{'length'}, 1, 'container', $end_strain+1, $end_strain+$allele_length);
$start_strain = $end_strain + $allele_length + 1;
$start_slice = $end_slice + $indel->{'length'} + 1;
}
$new_mapper->add_map_coordinates($sr_name, $start_slice, $slice->end, 1, 'container', $start_strain, $start_strain + ($slice->end - $start_slice));
# replace the mapper with the new mapper
$container->mapper($new_mapper);
# change the container slice's length according to length diff
$container->container_slice($container->container_slice->expand(undef, $total_length_diff, 1));
}
return [$mapped_slice];
}
1;
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment