Commit 445acd25 authored by Ian Longden's avatar Ian Longden
Browse files

first attempt at fix

parent c093ff70
......@@ -201,7 +201,7 @@ sub register_all {
sub map {
throw('Incorrect number of arguments.') if(@_ != 6);
my ($self, $frm_seq_region, $frm_start, $frm_end, $frm_strand, $frm_cs) = @_;
my ($self, $frm_seq_region_name, $frm_start, $frm_end, $frm_strand, $frm_cs) = @_;
my $mapper = $self->{'mapper'};
my $asm_cs = $self->{'asm_cs'};
......@@ -209,6 +209,10 @@ sub map {
my $adaptor = $self->{'adaptor'};
my $frm;
my @tmp;
push @tmp, $frm_seq_region_name;
my $seq_region_id = @{$self->adaptor()->seq_regions_to_ids($frm_cs, \@tmp)}[0];
#speed critical section:
#try to do simple pointer equality comparisons of the coord system objects
#first since this is likely to work most of the time and is much faster
......@@ -216,8 +220,8 @@ sub map {
if($frm_cs == $cmp_cs || ($frm_cs != $asm_cs && $frm_cs->equals($cmp_cs))) {
if(!$self->{'cmp_register'}->{$frm_seq_region}) {
$adaptor->register_component($self,$frm_seq_region);
if(!$self->{'cmp_register'}->{$seq_region_id}) {
$adaptor->register_component($self,$seq_region_id);
}
$frm = $COMPONENT;
......@@ -225,7 +229,7 @@ sub map {
# This can be probably be sped up some by only calling registered
# assembled if needed
$adaptor->register_assembled($self,$frm_seq_region,$frm_start,$frm_end);
$adaptor->register_assembled($self,$seq_region_id,$frm_start,$frm_end);
$frm = $ASSEMBLED;
} else {
......@@ -235,7 +239,7 @@ sub map {
" of this AssemblyMapper");
}
return $mapper->map_coordinates($frm_seq_region, $frm_start, $frm_end,
return $mapper->map_coordinates($seq_region_id, $frm_start, $frm_end,
$frm_strand, $frm);
}
......@@ -307,7 +311,7 @@ sub size {
sub fastmap {
throw('Incorrect number of arguments.') if(@_ != 6);
my ($self, $frm_seq_region, $frm_start, $frm_end, $frm_strand, $frm_cs) = @_;
my ($self, $frm_seq_region_name, $frm_start, $frm_end, $frm_strand, $frm_cs) = @_;
my $mapper = $self->{'mapper'};
my $asm_cs = $self->{'asm_cs'};
......@@ -315,6 +319,10 @@ sub fastmap {
my $adaptor = $self->{'adaptor'};
my $frm;
my @tmp;
push @tmp, $frm_seq_region_name;
my $seq_region_id = @{$self->adaptor()->seq_regions_to_ids($frm_cs, \@tmp)}[0];
#speed critical section:
#try to do simple pointer equality comparisons of the coord system objects
#first since this is likely to work most of the time and is much faster
......@@ -322,8 +330,8 @@ sub fastmap {
if($frm_cs == $cmp_cs || ($frm_cs != $asm_cs && $frm_cs->equals($cmp_cs))) {
if(!$self->{'cmp_register'}->{$frm_seq_region}) {
$adaptor->register_component($self,$frm_seq_region);
if(!$self->{'cmp_register'}->{$seq_region_id}) {
$adaptor->register_component($self,$seq_region_id);
}
$frm = $COMPONENT;
......@@ -331,7 +339,7 @@ sub fastmap {
# This can be probably be sped up some by only calling registered
# assembled if needed
$adaptor->register_assembled($self,$frm_seq_region,$frm_start,$frm_end);
$adaptor->register_assembled($self,$seq_region_id,$frm_start,$frm_end);
$frm = $ASSEMBLED;
} else {
......@@ -341,13 +349,13 @@ sub fastmap {
" of this AssemblyMapper");
}
return $mapper->fastmap($frm_seq_region, $frm_start, $frm_end,
return $mapper->fastmap($seq_region_id, $frm_start, $frm_end,
$frm_strand, $frm);
}
=head2 list_seq_regions
=head2 list_ids
Arg [1] : string $frm_seq_region
The name of the sequence region of interest
......@@ -368,34 +376,37 @@ sub fastmap {
=cut
sub list_seq_regions {
sub list_ids {
throw('Incorrect number of arguments.') if(@_ != 5);
my($self, $frm_seq_region, $frm_start, $frm_end, $frm_cs) = @_;
my($self, $frm_seq_region_name, $frm_start, $frm_end, $frm_cs) = @_;
my @tmp;
push @tmp, $frm_seq_region_name;
my $seq_region_id = @{$self->adaptor()->seq_regions_to_ids($frm_cs, \@tmp)}[0];
if($frm_cs->equals($self->component_CoordSystem())) {
if(!$self->have_registered_component($frm_seq_region)) {
$self->adaptor->register_component($self,$frm_seq_region);
if(!$self->have_registered_component($seq_region_id)) {
$self->adaptor->register_component($self,$seq_region_id);
}
#pull out the 'from' identifiers of the mapper pairs. The
#we loaded the assembled side as the 'from' side in the constructor
return
map {$_->from()->id()}
$self->mapper()->list_pairs($frm_seq_region, $frm_start,
$self->mapper()->list_pairs($seq_region_id, $frm_start,
$frm_end, $COMPONENT);
} elsif($frm_cs->equals($self->assembled_CoordSystem())) {
$self->adaptor->register_assembled($self,
$frm_seq_region,$frm_start,$frm_end);
$seq_region_id,$frm_start,$frm_end);
#pull out the 'to' identifiers of the mapper pairs
#we loaded the component side as the 'to' coord system in the constructor
return
map {$_->to->id()}
$self->mapper()->list_pairs($frm_seq_region, $frm_start,
$self->mapper()->list_pairs($seq_region_id, $frm_start,
$frm_end, $ASSEMBLED);
} else {
throw("Coordinate system " . $frm_cs->name . " " . $frm_cs->version .
......@@ -404,8 +415,43 @@ sub list_seq_regions {
}
}
#sub list_seq_regions {
# throw('Incorrect number of arguments.') if(@_ != 5);
# my($self, $frm_seq_region_name, $frm_start, $frm_end, $frm_cs) = @_;
=head2 list_ids
# if($frm_cs->equals($self->component_CoordSystem())) {
# if(!$self->have_registered_component($seq_region_id)) {
# $self->adaptor->register_component($self,$seq_region_id);
# }
# #pull out the 'from' identifiers of the mapper pairs. The
# #we loaded the assembled side as the 'from' side in the constructor
# return
# map {$_->from()->id()}
# $self->mapper()->list_pairs($seq_region_id, $frm_start,
# $frm_end, $COMPONENT);
# } elsif($frm_cs->equals($self->assembled_CoordSystem())) {
# $self->adaptor->register_assembled($self,
# $frm_seq_region,$frm_start,$frm_end);
# #pull out the 'to' identifiers of the mapper pairs
# #we loaded the component side as the 'to' coord system in the constructor
# return
# map {$_->to->id()}
# $self->mapper()->list_pairs($frm_seq_region, $frm_start,
# $frm_end, $ASSEMBLED);
# } else {
# throw("Coordinate system " . $frm_cs->name . " " . $frm_cs->version .
# " is neither the assembled nor the component coordinate system " .
# " of this AssemblyMapper");
# }
#}
=head2 list_seq_regions
Arg [1] : string $frm_seq_region
The name of the sequence region of interest
......@@ -415,7 +461,7 @@ sub list_seq_regions {
The end of the region to transform of interest
Arg [5] : Bio::EnsEMBL::CoordSystem $frm_cs
The coordinate system to obtain overlapping ids of
Example : foreach $id ($asm_mapper->list_ids('X',1,1000,$chr_cs)) {...}
Example : foreach $id ($asm_mapper->list_seq_regions('X',1,1000,$chr_cs)) {...}
Description: Retrieves a list of overlapping seq_region internal identifiers
of another coordinate system. This is the same as the
list_seq_regions method but uses internal identfiers rather
......@@ -427,13 +473,13 @@ sub list_seq_regions {
=cut
sub list_ids {
sub list_seq_regions {
throw('Incorrect number of arguments.') if(@_ != 5);
my($self, $frm_seq_region, $frm_start, $frm_end, $frm_cs) = @_;
#retrieve the seq_region names
my @seq_regs =
$self->list_seq_regions($frm_seq_region,$frm_start,$frm_end,$frm_cs);
my @seq_ids =
$self->list_ids($frm_seq_region,$frm_start,$frm_end,$frm_cs);
#The seq_regions are from the 'to' coordinate system not the
#from coordinate system we used to obtain them
......@@ -445,9 +491,30 @@ sub list_ids {
}
#convert them to ids
return @{$self->adaptor()->seq_regions_to_ids($to_cs, \@seq_regs)};
return @{$self->adaptor()->seq_ids_to_regions($to_cs, \@seq_ids)};
}
#sub list_ids {
# throw('Incorrect number of arguments.') if(@_ != 5);
# my($self, $frm_seq_region, $frm_start, $frm_end, $frm_cs) = @_;
# #retrieve the seq_region names
# my @seq_regs =
# $self->list_seq_regions($frm_seq_region,$frm_start,$frm_end,$frm_cs);
# #The seq_regions are from the 'to' coordinate system not the
# #from coordinate system we used to obtain them
# my $to_cs;
# if($frm_cs->equals($self->assembled_CoordSystem())) {
# $to_cs = $self->component_CoordSystem();
# } else {
# $to_cs = $self->assembled_CoordSystem();
# }
# #convert them to ids
# return @{$self->adaptor()->seq_regions_to_ids($to_cs, \@seq_regs)};
#}
......
......@@ -251,7 +251,7 @@ sub size {
sub map {
throw('Incorrect number of arguments.') if(@_ < 6);
my ($self, $frm_seq_region, $frm_start,
my ($self, $frm_seq_region_name, $frm_start,
$frm_end, $frm_strand, $frm_cs, $fastmap) = @_;
my $mapper = $self->{'first_last_mapper'};
......@@ -263,6 +263,10 @@ sub map {
my $frm;
my $registry;
my @tmp;
push @tmp, $frm_seq_region_name;
my $seq_region_id = @{$self->adaptor()->seq_regions_to_ids($frm_cs, \@tmp)}[0];
#speed critical section:
#try to do simple pointer equality comparisons of the coord system objects
#first since this is likely to work most of the time and is much faster
......@@ -305,10 +309,10 @@ sub map {
my $ranges;
if($is_insert) {
$ranges = $registry->check_and_register($frm_seq_region, $frm_end,
$ranges = $registry->check_and_register($seq_region_id, $frm_end,
$frm_start, $min_start, $min_end);
} else {
$ranges = $registry->check_and_register($frm_seq_region, $frm_start,
$ranges = $registry->check_and_register($seq_region_id, $frm_start,
$frm_end, $min_start, $min_end);
}
......@@ -318,21 +322,21 @@ sub map {
if($is_insert) {
$ranges = $registry->check_and_register
($frm_seq_region, $frm_end, $frm_start, $min_start, $min_end);
($seq_region_id, $frm_end, $frm_start, $min_start, $min_end);
} else {
$ranges = $registry->check_and_register
($frm_seq_region, $frm_start, $frm_end, $min_start, $min_end);
($seq_region_id, $frm_start, $frm_end, $min_start, $min_end);
}
}
$self->adaptor->register_chained($self,$frm,$frm_seq_region,$ranges);
$self->adaptor->register_chained($self,$frm,$seq_region_id,$ranges);
}
if($fastmap) {
return $mapper->fastmap($frm_seq_region, $frm_start, $frm_end,
return $mapper->fastmap($seq_region_id, $frm_start, $frm_end,
$frm_strand, $frm);
}
return $mapper->map_coordinates($frm_seq_region, $frm_start, $frm_end,
return $mapper->map_coordinates($seq_region_id, $frm_start, $frm_end,
$frm_strand, $frm);
}
......@@ -343,8 +347,7 @@ sub fastmap {
}
=head2 list_seq_regions
=head2 list_ids
Arg [1] : string $frm_seq_region
The name of the sequence region of interest
......@@ -354,11 +357,12 @@ sub fastmap {
The end of the region to transform of interest
Arg [5] : Bio::EnsEMBL::CoordSystem $frm_cs
The coordinate system to obtain overlapping ids of
Example : foreach $id ($asm_mapper->list_ids('X',1,1000,$ctg_cs)) {...}
Example : foreach $id ($asm_mapper->list_ids('X',1,1000,$chr_cs)) {...}
Description: Retrieves a list of overlapping seq_region internal identifiers
of another coordinate system. This is the same as the
list_ids method but uses seq_region names rather internal ids
Returntype : List of strings
of another coordinate system. This is the same as the
list_seq_regions method but uses internal identfiers rather
than seq_region strings
Returntype : List of ints
Exceptions : none
Caller : general
Status : Stable
......@@ -366,9 +370,9 @@ sub fastmap {
=cut
sub list_seq_regions {
sub list_ids {
throw('Incorrect number of arguments.') if(@_ != 5);
my($self, $frm_seq_region, $frm_start, $frm_end, $frm_cs) = @_;
my($self, $frm_seq_region_name, $frm_start, $frm_end, $frm_cs) = @_;
my $is_insert = ($frm_start == $frm_end + 1);
......@@ -387,25 +391,30 @@ sub list_seq_regions {
$min_end = ((($frm_end >> $CHUNKFACTOR) + 1) << $CHUNKFACTOR) - 1;
}
my @tmp;
push @tmp, $frm_seq_region_name;
my $seq_region_id = @{$self->adaptor()->seq_regions_to_ids($frm_cs, \@tmp)}[0];
if($frm_cs->equals($self->{'first_cs'})) {
my $registry = $self->{'first_registry'};
my $ranges;
if($is_insert) {
$ranges = $registry->check_and_register
($frm_seq_region, $frm_end, $frm_start, $min_start, $min_end);
($seq_region_id, $frm_end, $frm_start, $min_start, $min_end);
} else {
$ranges = $registry->check_and_register
($frm_seq_region, $frm_start, $frm_end, $min_start, $min_end);
($seq_region_id, $frm_start, $frm_end, $min_start, $min_end);
}
if(defined($ranges)) {
$self->adaptor->register_chained($self,$FIRST,$frm_seq_region,$ranges);
$self->adaptor->register_chained($self,$FIRST,$seq_region_id,$ranges);
}
return map {$_->to()->id()}
$self->first_last_mapper()->list_pairs($frm_seq_region, $frm_start,
$self->first_last_mapper()->list_pairs($seq_region_id, $frm_start,
$frm_end, $FIRST);
} elsif($frm_cs->equals($self->{'last_cs'})) {
......@@ -414,18 +423,18 @@ sub list_seq_regions {
my $ranges;
if($is_insert) {
$ranges = $registry->check_and_register
($frm_seq_region, $frm_end, $frm_start, $min_start, $min_end);
($seq_region_id, $frm_end, $frm_start, $min_start, $min_end);
} else {
$ranges = $registry->check_and_register
($frm_seq_region, $frm_start, $frm_end, $min_start, $min_end);
($seq_region_id, $frm_start, $frm_end, $min_start, $min_end);
}
if(defined($ranges)) {
$self->adaptor->register_chained($self,$LAST,$frm_seq_region,$ranges);
$self->adaptor->register_chained($self,$LAST,$seq_region_id,$ranges);
}
return map {$_->from()->id()}
$self->first_last_mapper()->list_pairs($frm_seq_region, $frm_start,
$self->first_last_mapper()->list_pairs($seq_region_id, $frm_start,
$frm_end, $LAST);
} else {
throw("Coordinate system " . $frm_cs->name . " " . $frm_cs->version .
......@@ -435,7 +444,7 @@ sub list_seq_regions {
}
=head2 list_ids
=head2 list_seq_regions
Arg [1] : string $frm_seq_region
The name of the sequence region of interest
......@@ -445,25 +454,24 @@ sub list_seq_regions {
The end of the region to transform of interest
Arg [5] : Bio::EnsEMBL::CoordSystem $frm_cs
The coordinate system to obtain overlapping ids of
Example : foreach $id ($asm_mapper->list_ids('X',1,1000,$chr_cs)) {...}
Example : foreach $id ($asm_mapper->list_ids('X',1,1000,$ctg_cs)) {...}
Description: Retrieves a list of overlapping seq_region internal identifiers
of another coordinate system. This is the same as the
list_seq_regions method but uses internal identfiers rather
than seq_region strings
Returntype : List of ints
of another coordinate system. This is the same as the
list_ids method but uses seq_region names rather internal ids
Returntype : List of strings
Exceptions : none
Caller : general
Status : Stable
=cut
sub list_ids {
sub list_seq_regions {
throw('Incorrect number of arguments.') if(@_ != 5);
my($self, $frm_seq_region, $frm_start, $frm_end, $frm_cs) = @_;
#retrieve the seq_region names
my @seq_regs =
$self->list_seq_regions($frm_seq_region,$frm_start,$frm_end,$frm_cs);
$self->list_ids($frm_seq_region,$frm_start,$frm_end,$frm_cs);
#The seq_regions are from the 'to' coordinate system not the
#from coordinate system we used to obtain them
......@@ -474,8 +482,8 @@ sub list_ids {
$to_cs = $self->first_CoordSystem();
}
#convert them to ids
return @{$self->adaptor()->seq_regions_to_ids($to_cs, \@seq_regs)};
#convert them to names
return @{$self->adaptor()->ids_to_seq_region_names($to_cs, \@seq_regs)};
}
......
......@@ -324,8 +324,8 @@ sub register_assembled {
}
}
my $asm_seq_region_id =
$self->_seq_region_name_to_id($asm_seq_region,$asm_cs_id);
# my $asm_seq_region_id =
# $self->_seq_region_name_to_id($asm_seq_region,$asm_cs_id);
# Retrieve the description of how the assembled region is made from
# component regions for each of the continuous blocks of unregistered,
......@@ -355,7 +355,7 @@ sub register_assembled {
foreach my $region (@chunk_regions) {
my($region_start, $region_end) = @$region;
$sth->bind_param(1,$asm_seq_region_id,SQL_INTEGER);
$sth->bind_param(1,$asm_seq_region,SQL_INTEGER);
$sth->bind_param(2,$region_start,SQL_INTEGER);
$sth->bind_param(3,$region_end,SQL_INTEGER);
$sth->bind_param(4,$cmp_cs_id,SQL_INTEGER);
......@@ -434,6 +434,44 @@ sub _seq_region_name_to_id {
return $sr_id;
}
sub _seq_region_id_to_name {
my $self = shift;
my $sr_id = shift;
($sr_id) || throw('seq_region_is required');
my $arr = $self->{'sr_id_cache'}->{"$sr_id"};
if( $arr ) {
return $arr->[1];
}
# Get the seq_region name via the id. This would be quicker if we just
# used internal ids instead but stored but then we lose the ability
# the transform accross databases with different internal ids
my $sth = $self->prepare("SELECT name, length ,coord_system_id " .
"FROM seq_region " .
"WHERE seq_region_id = ? ");
$sth->bind_param(1,$sr_id,SQL_INTEGER);
$sth->execute();
print "rows =>".$sth->rows()."\n\n";
if(!$sth->rows() == 1) {
throw("non-existant seq_region [$sr_id]");
}
my ($sr_name, $sr_length, $cs_id) = $sth->fetchrow_array();
$sth->finish();
$arr = [ $sr_id, $sr_name, $cs_id, $sr_length ];
$self->{'sr_name_cache'}->{"$sr_name:$cs_id"} = $arr;
$self->{'sr_id_cache'}->{"$sr_id"} = $arr;
return $sr_name;
}
=head2 register_component
......@@ -476,8 +514,8 @@ sub register_component {
#do nothing if this region is already registered
return if($asm_mapper->have_registered_component($cmp_seq_region));
my $cmp_seq_region_id =
$self->_seq_region_name_to_id($cmp_seq_region, $cmp_cs_id);
# my $cmp_seq_region_id =
# $self->_seq_region_name_to_id($cmp_seq_region, $cmp_cs_id);
# Determine what part of the assembled region this component region makes up
......@@ -497,7 +535,7 @@ sub register_component {
};
my $sth = $self->prepare($q);
$sth->bind_param(1,$cmp_seq_region_id,SQL_INTEGER);
$sth->bind_param(1,$cmp_seq_region,SQL_INTEGER);
$sth->bind_param(2,$asm_cs_id,SQL_INTEGER);
$sth->execute();
......@@ -510,7 +548,7 @@ sub register_component {
#we do not currently support components mapping to multiple assembled
if($sth->rows() != 1) {
throw("Multiple assembled regions for single " .
"component region cmp_seq_region_id=[$cmp_seq_region_id]");
"component region cmp_seq_region_id=[$cmp_seq_region]");
}
my ($asm_start, $asm_end, $asm_seq_region_id,
......@@ -578,7 +616,7 @@ sub register_chained {
my $self = shift;
my $casm_mapper = shift;
my $from = shift;
my $seq_region_name = shift;
my $seq_region_id = shift;
my $ranges = shift;
......@@ -690,8 +728,8 @@ sub register_chained {
$sth = ($asm_cs->equals($start_cs)) ? $asm2cmp_sth : $cmp2asm_sth;
my $seq_region_id = $self->_seq_region_name_to_id($seq_region_name,
$start_cs->dbID());
# my $seq_region_id = $self->_seq_region_name_to_id($seq_region_name,
# $start_cs->dbID());
my $mid_cs_id;
# check for the simple case where the ChainedMapper is short
......@@ -727,21 +765,21 @@ sub register_chained {
if( defined $mid_cs ) {
$start_mid_mapper->add_map_coordinates
(
$seq_region_name,$start_start, $start_end, $ori,
$seq_region_id,$start_start, $start_end, $ori,
$mid_seq_region, $mid_start, $mid_end
);
} else {
if( $from eq "first" ) {
$combined_mapper->add_map_coordinates
(
$seq_region_name,$start_start, $start_end, $ori,
$seq_region_id,$start_start, $start_end, $ori,
$mid_seq_region, $mid_start, $mid_end
);
} else {
$combined_mapper->add_map_coordinates
(
$mid_seq_region, $mid_start, $mid_end, $ori,
$seq_region_name,$start_start, $start_end
$mid_seq_region_id, $mid_start, $mid_end, $ori,
$seq_region_id,$start_start, $start_end
);
}
}
......@@ -755,7 +793,7 @@ sub register_chained {
push @mid_ranges,[$mid_seq_region_id,$mid_seq_region,
$mid_start,$mid_end];
push @start_ranges, [ $seq_region_name, $start_start, $start_end ];
push @start_ranges, [ $seq_region_id, $start_start, $start_end ];
#the region that we actually register may actually be larger or smaller
#than the region that we wanted to register.
......@@ -763,7 +801,7 @@ sub register_chained {
#extra work later
if($start_start < $start || $start_end > $end) {
$start_registry->check_and_register($seq_region_name,$start_start,
$start_registry->check_and_register($seq_region_id,$start_start,
$start_end);
}
}
......@@ -1299,6 +1337,41 @@ sub seq_regions_to_ids {
}
=head2 seq_ids_to_regions
Arg [1] : listref of seq_region ids
Example : my @ids = @{$asma->ids_to_seq_regions(\@seq_ids)};
Description: Converts a list of seq_region ids to seq region names