Commit 15011dab authored by Ian Longden's avatar Ian Longden
Browse files

Merged mapper fix branch onto head

parent 2faf72c4
......@@ -201,7 +201,7 @@ sub register_all {
sub map {
throw('Incorrect number of arguments.') if(@_ != 6);
my ($self, $frm_seq_region, $frm_start, $frm_end, $frm_strand, $frm_cs) = @_;
my ($self, $frm_seq_region_name, $frm_start, $frm_end, $frm_strand, $frm_cs) = @_;
my $mapper = $self->{'mapper'};
my $asm_cs = $self->{'asm_cs'};
......@@ -209,6 +209,11 @@ sub map {
my $adaptor = $self->{'adaptor'};
my $frm;
my @tmp;
push @tmp, $frm_seq_region_name;
my $seq_region_id = @{$self->adaptor()->seq_regions_to_ids($frm_cs, \@tmp)}[0];
#speed critical section:
#try to do simple pointer equality comparisons of the coord system objects
#first since this is likely to work most of the time and is much faster
......@@ -216,8 +221,8 @@ sub map {
if($frm_cs == $cmp_cs || ($frm_cs != $asm_cs && $frm_cs->equals($cmp_cs))) {
if(!$self->{'cmp_register'}->{$frm_seq_region}) {
$adaptor->register_component($self,$frm_seq_region);
if(!$self->{'cmp_register'}->{$seq_region_id}) {
$adaptor->register_component($self,$seq_region_id);
}
$frm = $COMPONENT;
......@@ -225,7 +230,7 @@ sub map {
# This can be probably be sped up some by only calling registered
# assembled if needed
$adaptor->register_assembled($self,$frm_seq_region,$frm_start,$frm_end);
$adaptor->register_assembled($self,$seq_region_id,$frm_start,$frm_end);
$frm = $ASSEMBLED;
} else {
......@@ -235,7 +240,7 @@ sub map {
" of this AssemblyMapper");
}
return $mapper->map_coordinates($frm_seq_region, $frm_start, $frm_end,
return $mapper->map_coordinates($seq_region_id, $frm_start, $frm_end,
$frm_strand, $frm);
}
......@@ -307,7 +312,7 @@ sub size {
sub fastmap {
throw('Incorrect number of arguments.') if(@_ != 6);
my ($self, $frm_seq_region, $frm_start, $frm_end, $frm_strand, $frm_cs) = @_;
my ($self, $frm_seq_region_name, $frm_start, $frm_end, $frm_strand, $frm_cs) = @_;
my $mapper = $self->{'mapper'};
my $asm_cs = $self->{'asm_cs'};
......@@ -315,6 +320,10 @@ sub fastmap {
my $adaptor = $self->{'adaptor'};
my $frm;
my @tmp;
push @tmp, $frm_seq_region_name;
my $seq_region_id = @{$self->adaptor()->seq_regions_to_ids($frm_cs, \@tmp)}[0];
#speed critical section:
#try to do simple pointer equality comparisons of the coord system objects
#first since this is likely to work most of the time and is much faster
......@@ -322,8 +331,8 @@ sub fastmap {
if($frm_cs == $cmp_cs || ($frm_cs != $asm_cs && $frm_cs->equals($cmp_cs))) {
if(!$self->{'cmp_register'}->{$frm_seq_region}) {
$adaptor->register_component($self,$frm_seq_region);
if(!$self->{'cmp_register'}->{$seq_region_id}) {
$adaptor->register_component($self,$seq_region_id);
}
$frm = $COMPONENT;
......@@ -331,7 +340,7 @@ sub fastmap {
# This can be probably be sped up some by only calling registered
# assembled if needed
$adaptor->register_assembled($self,$frm_seq_region,$frm_start,$frm_end);
$adaptor->register_assembled($self,$seq_region_id,$frm_start,$frm_end);
$frm = $ASSEMBLED;
} else {
......@@ -341,13 +350,13 @@ sub fastmap {
" of this AssemblyMapper");
}
return $mapper->fastmap($frm_seq_region, $frm_start, $frm_end,
return $mapper->fastmap($seq_region_id, $frm_start, $frm_end,
$frm_strand, $frm);
}
=head2 list_seq_regions
=head2 list_ids
Arg [1] : string $frm_seq_region
The name of the sequence region of interest
......@@ -368,34 +377,37 @@ sub fastmap {
=cut
sub list_seq_regions {
sub list_ids {
throw('Incorrect number of arguments.') if(@_ != 5);
my($self, $frm_seq_region, $frm_start, $frm_end, $frm_cs) = @_;
my($self, $frm_seq_region_name, $frm_start, $frm_end, $frm_cs) = @_;
my @tmp;
push @tmp, $frm_seq_region_name;
my $seq_region_id = @{$self->adaptor()->seq_regions_to_ids($frm_cs, \@tmp)}[0];
if($frm_cs->equals($self->component_CoordSystem())) {
if(!$self->have_registered_component($frm_seq_region)) {
$self->adaptor->register_component($self,$frm_seq_region);
if(!$self->have_registered_component($seq_region_id)) {
$self->adaptor->register_component($self,$seq_region_id);
}
#pull out the 'from' identifiers of the mapper pairs. The
#we loaded the assembled side as the 'from' side in the constructor
return
map {$_->from()->id()}
$self->mapper()->list_pairs($frm_seq_region, $frm_start,
$self->mapper()->list_pairs($seq_region_id, $frm_start,
$frm_end, $COMPONENT);
} elsif($frm_cs->equals($self->assembled_CoordSystem())) {
$self->adaptor->register_assembled($self,
$frm_seq_region,$frm_start,$frm_end);
$seq_region_id,$frm_start,$frm_end);
#pull out the 'to' identifiers of the mapper pairs
#we loaded the component side as the 'to' coord system in the constructor
return
map {$_->to->id()}
$self->mapper()->list_pairs($frm_seq_region, $frm_start,
$self->mapper()->list_pairs($seq_region_id, $frm_start,
$frm_end, $ASSEMBLED);
} else {
throw("Coordinate system " . $frm_cs->name . " " . $frm_cs->version .
......@@ -404,8 +416,43 @@ sub list_seq_regions {
}
}
#sub list_seq_regions {
# throw('Incorrect number of arguments.') if(@_ != 5);
# my($self, $frm_seq_region_name, $frm_start, $frm_end, $frm_cs) = @_;
=head2 list_ids
# if($frm_cs->equals($self->component_CoordSystem())) {
# if(!$self->have_registered_component($seq_region_id)) {
# $self->adaptor->register_component($self,$seq_region_id);
# }
# #pull out the 'from' identifiers of the mapper pairs. The
# #we loaded the assembled side as the 'from' side in the constructor
# return
# map {$_->from()->id()}
# $self->mapper()->list_pairs($seq_region_id, $frm_start,
# $frm_end, $COMPONENT);
# } elsif($frm_cs->equals($self->assembled_CoordSystem())) {
# $self->adaptor->register_assembled($self,
# $frm_seq_region,$frm_start,$frm_end);
# #pull out the 'to' identifiers of the mapper pairs
# #we loaded the component side as the 'to' coord system in the constructor
# return
# map {$_->to->id()}
# $self->mapper()->list_pairs($frm_seq_region, $frm_start,
# $frm_end, $ASSEMBLED);
# } else {
# throw("Coordinate system " . $frm_cs->name . " " . $frm_cs->version .
# " is neither the assembled nor the component coordinate system " .
# " of this AssemblyMapper");
# }
#}
=head2 list_seq_regions
Arg [1] : string $frm_seq_region
The name of the sequence region of interest
......@@ -415,7 +462,7 @@ sub list_seq_regions {
The end of the region to transform of interest
Arg [5] : Bio::EnsEMBL::CoordSystem $frm_cs
The coordinate system to obtain overlapping ids of
Example : foreach $id ($asm_mapper->list_ids('X',1,1000,$chr_cs)) {...}
Example : foreach $id ($asm_mapper->list_seq_regions('X',1,1000,$chr_cs)) {...}
Description: Retrieves a list of overlapping seq_region internal identifiers
of another coordinate system. This is the same as the
list_seq_regions method but uses internal identfiers rather
......@@ -427,13 +474,14 @@ sub list_seq_regions {
=cut
sub list_ids {
sub list_seq_regions {
throw('Incorrect number of arguments.') if(@_ != 5);
my($self, $frm_seq_region, $frm_start, $frm_end, $frm_cs) = @_;
#retrieve the seq_region names
my @seq_regs =
$self->list_seq_regions($frm_seq_region,$frm_start,$frm_end,$frm_cs);
my @seq_ids =
$self->list_ids($frm_seq_region,$frm_start,$frm_end,$frm_cs);
#The seq_regions are from the 'to' coordinate system not the
#from coordinate system we used to obtain them
......@@ -445,9 +493,30 @@ sub list_ids {
}
#convert them to ids
return @{$self->adaptor()->seq_regions_to_ids($to_cs, \@seq_regs)};
return @{$self->adaptor()->seq_ids_to_regions(\@seq_ids)};
}
#sub list_ids {
# throw('Incorrect number of arguments.') if(@_ != 5);
# my($self, $frm_seq_region, $frm_start, $frm_end, $frm_cs) = @_;
# #retrieve the seq_region names
# my @seq_regs =
# $self->list_seq_regions($frm_seq_region,$frm_start,$frm_end,$frm_cs);
# #The seq_regions are from the 'to' coordinate system not the
# #from coordinate system we used to obtain them
# my $to_cs;
# if($frm_cs->equals($self->assembled_CoordSystem())) {
# $to_cs = $self->component_CoordSystem();
# } else {
# $to_cs = $self->assembled_CoordSystem();
# }
# #convert them to ids
# return @{$self->adaptor()->seq_regions_to_ids($to_cs, \@seq_regs)};
#}
......
......@@ -251,7 +251,7 @@ sub size {
sub map {
throw('Incorrect number of arguments.') if(@_ < 6);
my ($self, $frm_seq_region, $frm_start,
my ($self, $frm_seq_region_name, $frm_start,
$frm_end, $frm_strand, $frm_cs, $fastmap) = @_;
my $mapper = $self->{'first_last_mapper'};
......@@ -263,6 +263,10 @@ sub map {
my $frm;
my $registry;
my @tmp;
push @tmp, $frm_seq_region_name;
my $seq_region_id = @{$self->adaptor()->seq_regions_to_ids($frm_cs, \@tmp)}[0];
#speed critical section:
#try to do simple pointer equality comparisons of the coord system objects
#first since this is likely to work most of the time and is much faster
......@@ -299,16 +303,13 @@ sub map {
#get a list of ranges in the requested region that have not been registered,
#and register them at the same
#print STDERR "frm_start=$frm_start frm_end=$frm_end" .
# "min_start=$min_start min_end=$min_end\n";
my $ranges;
if($is_insert) {
$ranges = $registry->check_and_register($frm_seq_region, $frm_end,
$ranges = $registry->check_and_register($seq_region_id, $frm_end,
$frm_start, $min_start, $min_end);
} else {
$ranges = $registry->check_and_register($frm_seq_region, $frm_start,
$ranges = $registry->check_and_register($seq_region_id, $frm_start,
$frm_end, $min_start, $min_end);
}
......@@ -318,21 +319,21 @@ sub map {
if($is_insert) {
$ranges = $registry->check_and_register
($frm_seq_region, $frm_end, $frm_start, $min_start, $min_end);
($seq_region_id, $frm_end, $frm_start, $min_start, $min_end);
} else {
$ranges = $registry->check_and_register
($frm_seq_region, $frm_start, $frm_end, $min_start, $min_end);
($seq_region_id, $frm_start, $frm_end, $min_start, $min_end);
}
}
$self->adaptor->register_chained($self,$frm,$frm_seq_region,$ranges);
$self->adaptor->register_chained($self,$frm,$seq_region_id,$ranges);
}
if($fastmap) {
return $mapper->fastmap($frm_seq_region, $frm_start, $frm_end,
return $mapper->fastmap($seq_region_id, $frm_start, $frm_end,
$frm_strand, $frm);
}
return $mapper->map_coordinates($frm_seq_region, $frm_start, $frm_end,
return $mapper->map_coordinates($seq_region_id, $frm_start, $frm_end,
$frm_strand, $frm);
}
......@@ -343,8 +344,7 @@ sub fastmap {
}
=head2 list_seq_regions
=head2 list_ids
Arg [1] : string $frm_seq_region
The name of the sequence region of interest
......@@ -354,11 +354,12 @@ sub fastmap {
The end of the region to transform of interest
Arg [5] : Bio::EnsEMBL::CoordSystem $frm_cs
The coordinate system to obtain overlapping ids of
Example : foreach $id ($asm_mapper->list_ids('X',1,1000,$ctg_cs)) {...}
Example : foreach $id ($asm_mapper->list_ids('X',1,1000,$chr_cs)) {...}
Description: Retrieves a list of overlapping seq_region internal identifiers
of another coordinate system. This is the same as the
list_ids method but uses seq_region names rather internal ids
Returntype : List of strings
of another coordinate system. This is the same as the
list_seq_regions method but uses internal identfiers rather
than seq_region strings
Returntype : List of ints
Exceptions : none
Caller : general
Status : Stable
......@@ -366,9 +367,9 @@ sub fastmap {
=cut
sub list_seq_regions {
sub list_ids {
throw('Incorrect number of arguments.') if(@_ != 5);
my($self, $frm_seq_region, $frm_start, $frm_end, $frm_cs) = @_;
my($self, $frm_seq_region_name, $frm_start, $frm_end, $frm_cs) = @_;
my $is_insert = ($frm_start == $frm_end + 1);
......@@ -387,25 +388,30 @@ sub list_seq_regions {
$min_end = ((($frm_end >> $CHUNKFACTOR) + 1) << $CHUNKFACTOR) - 1;
}
my @tmp;
push @tmp, $frm_seq_region_name;
my $seq_region_id = @{$self->adaptor()->seq_regions_to_ids($frm_cs, \@tmp)}[0];
if($frm_cs->equals($self->{'first_cs'})) {
my $registry = $self->{'first_registry'};
my $ranges;
if($is_insert) {
$ranges = $registry->check_and_register
($frm_seq_region, $frm_end, $frm_start, $min_start, $min_end);
($seq_region_id, $frm_end, $frm_start, $min_start, $min_end);
} else {
$ranges = $registry->check_and_register
($frm_seq_region, $frm_start, $frm_end, $min_start, $min_end);
($seq_region_id, $frm_start, $frm_end, $min_start, $min_end);
}
if(defined($ranges)) {
$self->adaptor->register_chained($self,$FIRST,$frm_seq_region,$ranges);
$self->adaptor->register_chained($self,$FIRST,$seq_region_id,$ranges);
}
return map {$_->to()->id()}
$self->first_last_mapper()->list_pairs($frm_seq_region, $frm_start,
$self->first_last_mapper()->list_pairs($seq_region_id, $frm_start,
$frm_end, $FIRST);
} elsif($frm_cs->equals($self->{'last_cs'})) {
......@@ -414,18 +420,18 @@ sub list_seq_regions {
my $ranges;
if($is_insert) {
$ranges = $registry->check_and_register
($frm_seq_region, $frm_end, $frm_start, $min_start, $min_end);
($seq_region_id, $frm_end, $frm_start, $min_start, $min_end);
} else {
$ranges = $registry->check_and_register
($frm_seq_region, $frm_start, $frm_end, $min_start, $min_end);
($seq_region_id, $frm_start, $frm_end, $min_start, $min_end);
}
if(defined($ranges)) {
$self->adaptor->register_chained($self,$LAST,$frm_seq_region,$ranges);
$self->adaptor->register_chained($self,$LAST,$seq_region_id,$ranges);
}
return map {$_->from()->id()}
$self->first_last_mapper()->list_pairs($frm_seq_region, $frm_start,
$self->first_last_mapper()->list_pairs($seq_region_id, $frm_start,
$frm_end, $LAST);
} else {
throw("Coordinate system " . $frm_cs->name . " " . $frm_cs->version .
......@@ -435,7 +441,7 @@ sub list_seq_regions {
}
=head2 list_ids
=head2 list_seq_regions
Arg [1] : string $frm_seq_region
The name of the sequence region of interest
......@@ -445,25 +451,24 @@ sub list_seq_regions {
The end of the region to transform of interest
Arg [5] : Bio::EnsEMBL::CoordSystem $frm_cs
The coordinate system to obtain overlapping ids of
Example : foreach $id ($asm_mapper->list_ids('X',1,1000,$chr_cs)) {...}
Example : foreach $id ($asm_mapper->list_ids('X',1,1000,$ctg_cs)) {...}
Description: Retrieves a list of overlapping seq_region internal identifiers
of another coordinate system. This is the same as the
list_seq_regions method but uses internal identfiers rather
than seq_region strings
Returntype : List of ints
of another coordinate system. This is the same as the
list_ids method but uses seq_region names rather internal ids
Returntype : List of strings
Exceptions : none
Caller : general
Status : Stable
=cut
sub list_ids {
sub list_seq_regions {
throw('Incorrect number of arguments.') if(@_ != 5);
my($self, $frm_seq_region, $frm_start, $frm_end, $frm_cs) = @_;
#retrieve the seq_region names
my @seq_regs =
$self->list_seq_regions($frm_seq_region,$frm_start,$frm_end,$frm_cs);
$self->list_ids($frm_seq_region,$frm_start,$frm_end,$frm_cs);
#The seq_regions are from the 'to' coordinate system not the
#from coordinate system we used to obtain them
......@@ -474,8 +479,8 @@ sub list_ids {
$to_cs = $self->first_CoordSystem();
}
#convert them to ids
return @{$self->adaptor()->seq_regions_to_ids($to_cs, \@seq_regs)};
#convert them to names
return @{$self->adaptor()->seq_ids_to_regions(\@seq_regs)};
}
......
......@@ -67,7 +67,8 @@ use Bio::EnsEMBL::ChainedAssemblyMapper;
use Bio::EnsEMBL::TopLevelAssemblyMapper;
use Bio::EnsEMBL::Utils::Cache; #CPAN LRU cache
use Bio::EnsEMBL::Utils::Exception qw(deprecate throw);
use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning stack_trace_dump);
#use Bio::EnsEMBL::Utils::Exception qw(deprecate throw);
use Bio::EnsEMBL::Utils::SeqRegionCache;
use integer; #do proper arithmetic bitshifts
......@@ -250,6 +251,12 @@ sub register_assembled {
my $asm_start = shift;
my $asm_end = shift;
my $test = $asm_seq_region;
$test =~ s/\d+//g;
if(length($test) > 0 or $asm_seq_region < 1000){
print STDERR "$asm_seq_region NOT INTEGER\n";
print STDERR stack_trace_dump();
}
if(!ref($asm_mapper) || !$asm_mapper->isa('Bio::EnsEMBL::AssemblyMapper')) {
throw("Bio::EnsEMBL::AssemblyMapper argument expected");
}
......@@ -324,8 +331,8 @@ sub register_assembled {
}
}
my $asm_seq_region_id =
$self->_seq_region_name_to_id($asm_seq_region,$asm_cs_id);
# my $asm_seq_region_id =
# $self->_seq_region_name_to_id($asm_seq_region,$asm_cs_id);
# Retrieve the description of how the assembled region is made from
# component regions for each of the continuous blocks of unregistered,
......@@ -355,7 +362,7 @@ sub register_assembled {
foreach my $region (@chunk_regions) {
my($region_start, $region_end) = @$region;
$sth->bind_param(1,$asm_seq_region_id,SQL_INTEGER);
$sth->bind_param(1,$asm_seq_region,SQL_INTEGER);
$sth->bind_param(2,$region_start,SQL_INTEGER);
$sth->bind_param(3,$region_end,SQL_INTEGER);
$sth->bind_param(4,$cmp_cs_id,SQL_INTEGER);
......@@ -373,12 +380,12 @@ sub register_assembled {
# Load the unregistered regions of the mapper
#
while($sth->fetch()) {
next if($asm_mapper->have_registered_component($cmp_seq_region));
$asm_mapper->register_component($cmp_seq_region);
next if($asm_mapper->have_registered_component($cmp_seq_region_id));
$asm_mapper->register_component($cmp_seq_region_id);
$asm_mapper->mapper->add_map_coordinates(
$asm_seq_region, $asm_start, $asm_end,
$ori,
$cmp_seq_region, $cmp_start, $cmp_end);
$cmp_seq_region_id, $cmp_start, $cmp_end);
my $arr = [ $cmp_seq_region_id, $cmp_seq_region,
$cmp_cs_id, $cmp_seq_region_length ];
......@@ -434,6 +441,44 @@ sub _seq_region_name_to_id {
return $sr_id;
}
sub _seq_region_id_to_name {
my $self = shift;
my $sr_id = shift;
($sr_id) || throw('seq_region_is required');
my $arr = $self->{'sr_id_cache'}->{"$sr_id"};
if( $arr ) {
return $arr->[1];
}
# Get the seq_region name via the id. This would be quicker if we just
# used internal ids instead but stored but then we lose the ability
# the transform accross databases with different internal ids
my $sth = $self->prepare("SELECT name, length ,coord_system_id " .
"FROM seq_region " .
"WHERE seq_region_id = ? ");
$sth->bind_param(1,$sr_id,SQL_INTEGER);
$sth->execute();
print "rows =>".$sth->rows()."\n\n";
if(!$sth->rows() == 1) {
throw("non-existant seq_region [$sr_id]");
}
my ($sr_name, $sr_length, $cs_id) = $sth->fetchrow_array();
$sth->finish();
$arr = [ $sr_id, $sr_name, $cs_id, $sr_length ];
$self->{'sr_name_cache'}->{"$sr_name:$cs_id"} = $arr;
$self->{'sr_id_cache'}->{"$sr_id"} = $arr;
return $sr_name;
}
=head2 register_component
......@@ -476,8 +521,8 @@ sub register_component {
#do nothing if this region is already registered
return if($asm_mapper->have_registered_component($cmp_seq_region));
my $cmp_seq_region_id =
$self->_seq_region_name_to_id($cmp_seq_region, $cmp_cs_id);
# my $cmp_seq_region_id =
# $self->_seq_region_name_to_id($cmp_seq_region, $cmp_cs_id);
# Determine what part of the assembled region this component region makes up
......@@ -497,7 +542,7 @@ sub register_component {
};
my $sth = $self->prepare($q);
$sth->bind_param(1,$cmp_seq_region_id,SQL_INTEGER);
$sth->bind_param(1,$cmp_seq_region,SQL_INTEGER);
$sth->bind_param(2,$asm_cs_id,SQL_INTEGER);
$sth->execute();
......@@ -510,7 +555,7 @@ sub register_component {
#we do not currently support components mapping to multiple assembled
if($sth->rows() != 1) {
throw("Multiple assembled regions for single " .
"component region cmp_seq_region_id=[$cmp_seq_region_id]");
"component region cmp_seq_region_id=[$cmp_seq_region]");
}
my ($asm_start, $asm_end, $asm_seq_region_id,
......@@ -530,7 +575,7 @@ sub register_component {
# (2) Use locality of reference (if they want something in same general
# region it will already be registered).
$self->register_assembled($asm_mapper,$asm_seq_region,$asm_start,$asm_end);
$self->register_assembled($asm_mapper,$asm_seq_region_id,$asm_start,$asm_end);
}
......@@ -578,7 +623,7 @@ sub register_chained {
my $self = shift;
my $casm_mapper = shift;
my $from = shift;
my $seq_region_name = shift;
my $seq_region_id = shift;
my $ranges = shift;
......@@ -690,8 +735,8 @@ sub register_chained {