Commit 63e22774 authored by Andy Yates's avatar Andy Yates
Browse files

Allowing _ as a location separator

parent 189583d1
...@@ -466,6 +466,10 @@ sub fetch_by_region { ...@@ -466,6 +466,10 @@ sub fetch_by_region {
Suppress warnings from this method Suppress warnings from this method
Arg[3] : boolean $no_fuzz Arg[3] : boolean $no_fuzz
Stop fuzzy matching of sequence regions from occuring Stop fuzzy matching of sequence regions from occuring
Arg[4] : boolean $ucsc
If we are unsuccessful at retriving a location retry taking any
possible chr prefix into account e.g. chrX and X are treated as
equivalents
Example : my $slice = $sa->fetch_by_toplevel_location('X:1-10000') Example : my $slice = $sa->fetch_by_toplevel_location('X:1-10000')
my $slice = $sa->fetch_by_toplevel_location('X:1-10000:-1') my $slice = $sa->fetch_by_toplevel_location('X:1-10000:-1')
Description : Converts an Ensembl location/region into the sequence region Description : Converts an Ensembl location/region into the sequence region
...@@ -482,8 +486,8 @@ sub fetch_by_region { ...@@ -482,8 +486,8 @@ sub fetch_by_region {
=cut =cut
sub fetch_by_toplevel_location { sub fetch_by_toplevel_location {
my ($self, $location, $no_warnings, $no_fuzz) = @_; my ($self, $location, $no_warnings, $no_fuzz, $ucsc) = @_;
return $self->fetch_by_location($location, 'toplevel', undef, $no_warnings, $no_fuzz); return $self->fetch_by_location($location, 'toplevel', undef, $no_warnings, $no_fuzz, $ucsc);
} }
=head2 fetch_by_location =head2 fetch_by_location
...@@ -495,7 +499,7 @@ sub fetch_by_toplevel_location { ...@@ -495,7 +499,7 @@ sub fetch_by_toplevel_location {
specification as a +/- or 1/-1. specification as a +/- or 1/-1.
Location names must be separated by a C<:>. All others can be Location names must be separated by a C<:>. All others can be
separated by C<..>, C<:> or C<->. separated by C<..>, C<:>, C<_> or C<->.
Arg[2] : String $coord_system_name Arg[2] : String $coord_system_name
The coordinate system to retrieve The coordinate system to retrieve
Arg[3] : String $coord_system_version Arg[3] : String $coord_system_version
...@@ -504,6 +508,10 @@ sub fetch_by_toplevel_location { ...@@ -504,6 +508,10 @@ sub fetch_by_toplevel_location {
Suppress warnings from this method Suppress warnings from this method
Arg[5] : boolean $no_fuzz Arg[5] : boolean $no_fuzz
Stop fuzzy matching of sequence regions from occuring Stop fuzzy matching of sequence regions from occuring
Arg[6] : boolean $ucsc
If we are unsuccessful at retriving a location retry taking any
possible chr prefix into account e.g. chrX and X are treated as
equivalents
Example : my $slice = $sa->fetch_by_toplevel_location('X:1-10000') Example : my $slice = $sa->fetch_by_toplevel_location('X:1-10000')
my $slice = $sa->fetch_by_toplevel_location('X:1-10000:-1') my $slice = $sa->fetch_by_toplevel_location('X:1-10000:-1')
Description : Converts an Ensembl location/region into the sequence region Description : Converts an Ensembl location/region into the sequence region
...@@ -520,7 +528,7 @@ sub fetch_by_toplevel_location { ...@@ -520,7 +528,7 @@ sub fetch_by_toplevel_location {
=cut =cut
sub fetch_by_location { sub fetch_by_location {
my ($self, $location, $coord_system_name, $coord_system_version, $no_warnings, $no_fuzz) = @_; my ($self, $location, $coord_system_name, $coord_system_version, $no_warnings, $no_fuzz, $ucsc) = @_;
throw "No coordinate system name specified" unless $coord_system_name; throw "No coordinate system name specified" unless $coord_system_name;
...@@ -535,7 +543,22 @@ sub fetch_by_location { ...@@ -535,7 +543,22 @@ sub fetch_by_location {
} }
my $slice = $self->fetch_by_region($coord_system_name, $seq_region_name, $start, $end, $strand, $coord_system_version, $no_fuzz); my $slice = $self->fetch_by_region($coord_system_name, $seq_region_name, $start, $end, $strand, $coord_system_version, $no_fuzz);
return unless $slice; if(! defined $slice) {
if($ucsc) {
my $ucsc_seq_region_name = $seq_region_name;
$ucsc_seq_region_name =~ s/^chr//;
if($ucsc_seq_region_name ne $seq_region_name) {
$slice = $self->fetch_by_region($coord_system_name, $ucsc_seq_region_name, $start, $end, $strand, $coord_system_version, $no_fuzz);
return if ! defined $slice; #if we had no slice still then bail
}
else {
return; #If it was not different then we didn't have the prefix so just return (same bail as before)
}
}
else {
return; #We didn't have a slice and no UCSC specifics are being triggered
}
}
my $srl = $slice->seq_region_length(); my $srl = $slice->seq_region_length();
my $name = $slice->seq_region_name(); my $name = $slice->seq_region_name();
...@@ -559,7 +582,7 @@ sub fetch_by_location { ...@@ -559,7 +582,7 @@ sub fetch_by_location {
specification as a +/- or 1/-1. specification as a +/- or 1/-1.
Location names must be separated by a C<:>. All others can be Location names must be separated by a C<:>. All others can be
separated by C<..>, C<:> or C<->. separated by C<..>, C<:> C<_>, or C<->.
Arg[2] : boolean $no_warnings Arg[2] : boolean $no_warnings
Suppress warnings from this method Suppress warnings from this method
Arg[3] : boolean $no_errors Arg[3] : boolean $no_errors
...@@ -577,10 +600,10 @@ sub parse_location_to_values { ...@@ -577,10 +600,10 @@ sub parse_location_to_values {
throw 'You must specify a location' if ! $location; throw 'You must specify a location' if ! $location;
#cleanup any nomenclature like 1_000 or 1 000 or 1,000 #cleanup any nomenclature like 1 000 or 1,000
my $number_seps_regex = qr/\s+|,|_/; my $number_seps_regex = qr/\s+|,/;
my $separator_regex = qr/(?:-|[.]{2}|\:)?/; my $separator_regex = qr/(?:-|[.]{2}|\:|_)?/;
my $number_regex = qr/[0-9,_ E]+/xms; my $number_regex = qr/[0-9, E]+/xms;
my $strand_regex = qr/[+-1]|-1/xms; my $strand_regex = qr/[+-1]|-1/xms;
my $regex = qr/^((?:\w|\.|_|-)+) \s* :? \s* ($number_regex)? $separator_regex ($number_regex)? $separator_regex ($strand_regex)? $/xms; my $regex = qr/^((?:\w|\.|_|-)+) \s* :? \s* ($number_regex)? $separator_regex ($number_regex)? $separator_regex ($strand_regex)? $/xms;
......
...@@ -475,15 +475,33 @@ test_toplevel_location('1:100', 'chromosome', '1', 100, 246874334); ...@@ -475,15 +475,33 @@ test_toplevel_location('1:100', 'chromosome', '1', 100, 246874334);
test_toplevel_location('1:', 'chromosome', '1', 1, 246874334); test_toplevel_location('1:', 'chromosome', '1', 1, 246874334);
test_toplevel_location('1', 'chromosome', '1', 1, 246874334); test_toplevel_location('1', 'chromosome', '1', 1, 246874334);
test_toplevel_location('1:1_1000', 'chromosome', '1', 1, 1000);
test_toplevel_location('1:1_', 'chromosome', '1', 1, 246874334);
test_toplevel_location('1:_10', 'chromosome', '1', 1, 10);
test_toplevel_location('1:100', 'chromosome', '1', 100, 246874334);
test_toplevel_location('1:', 'chromosome', '1', 1, 246874334);
test_toplevel_location('1', 'chromosome', '1', 1, 246874334);
test_toplevel_location('1: 1-1,000', 'chromosome', '1', 1, 1000); test_toplevel_location('1: 1-1,000', 'chromosome', '1', 1, 1000);
test_toplevel_location('1: 1-1,000,000', 'chromosome', '1', 1, 1000000); test_toplevel_location('1: 1-1,000,000', 'chromosome', '1', 1, 1000000);
test_toplevel_location('1: 1-1 000 000', 'chromosome', '1', 1, 1000000); test_toplevel_location('1: 1-1 000 000', 'chromosome', '1', 1, 1000000);
test_toplevel_location('1: 1', 'chromosome', '1', 1, 246874334); test_toplevel_location('1: 1', 'chromosome', '1', 1, 246874334);
test_toplevel_location('1: -10', 'chromosome', '1', 1, 10); test_toplevel_location('1: -10', 'chromosome', '1', 1, 10);
test_toplevel_location('1: 100', 'chromosome', '1', 100, 246874334); test_toplevel_location('1: 100', 'chromosome', '1', 100, 246874334);
test_toplevel_location('1:100..2_000_000_000', 'chromosome', '1', 100, 246874334); test_toplevel_location('1:100..2000000000', 'chromosome', '1', 100, 246874334);
test_toplevel_location('1:100..2E9', 'chromosome', '1', 100, 246874334); test_toplevel_location('1:100..2E9', 'chromosome', '1', 100, 246874334);
# Try chr
my $ucsc = 1;
test_toplevel_location('chr1: 1-1,000', 'chromosome', '1', 1, 1000, 1, $ucsc);
test_toplevel_location('chr1: 1-1,000,000', 'chromosome', '1', 1, 1000000, 1, $ucsc);
test_toplevel_location('chr1: 1-1 000 000', 'chromosome', '1', 1, 1000000, 1, $ucsc);
test_toplevel_location('chr1: 1', 'chromosome', '1', 1, 246874334, 1, $ucsc);
test_toplevel_location('chr1: -10', 'chromosome', '1', 1, 10, 1, $ucsc);
test_toplevel_location('chr1: 100', 'chromosome', '1', 100, 246874334, 1, $ucsc);
test_toplevel_location('chr1:100..2000000000', 'chromosome', '1', 100, 246874334, 1, $ucsc);
test_toplevel_location('chr1:100..2E9', 'chromosome', '1', 100, 246874334, 1, $ucsc);
#Try strands #Try strands
test_toplevel_location('1:1-1000:1', 'chromosome', '1', 1, 1000, 1); test_toplevel_location('1:1-1000:1', 'chromosome', '1', 1, 1000, 1);
test_toplevel_location('1:1-1000:-1', 'chromosome', '1', 1, 1000, -1); test_toplevel_location('1:1-1000:-1', 'chromosome', '1', 1, 1000, -1);
...@@ -494,7 +512,7 @@ test_toplevel_location('1:1-1000--1', 'chromosome', '1', 1, 1000, -1); ...@@ -494,7 +512,7 @@ test_toplevel_location('1:1-1000--1', 'chromosome', '1', 1, 1000, -1);
dies_ok { $slice_adaptor->fetch_by_toplevel_location(); } 'Checking calling without a location fails'; dies_ok { $slice_adaptor->fetch_by_toplevel_location(); } 'Checking calling without a location fails';
dies_ok { $slice_adaptor->fetch_by_toplevel_location('', 1); } 'Checking calling with a blank location fails'; dies_ok { $slice_adaptor->fetch_by_toplevel_location('', 1); } 'Checking calling with a blank location fails';
dies_ok { $slice_adaptor->fetch_by_toplevel_location('1:1_000_000_000..100', 1); } 'Checking calling with an excessive start throws an error'; dies_ok { $slice_adaptor->fetch_by_toplevel_location('1:1000000000..100', 1); } 'Checking calling with an excessive start throws an error';
ok(!defined $slice_adaptor->fetch_by_toplevel_location('wibble', 1), 'Checking with a bogus region returns undef'); ok(!defined $slice_adaptor->fetch_by_toplevel_location('wibble', 1), 'Checking with a bogus region returns undef');
ok(!defined $slice_adaptor->fetch_by_toplevel_location('1:-100--50', 1), 'Checking with a bogus region with negative coords returns undef'); ok(!defined $slice_adaptor->fetch_by_toplevel_location('1:-100--50', 1), 'Checking with a bogus region with negative coords returns undef');
...@@ -523,8 +541,10 @@ ok(!defined $slice_adaptor->fetch_by_toplevel_location('1:-100--50', 1), 'Checki ...@@ -523,8 +541,10 @@ ok(!defined $slice_adaptor->fetch_by_toplevel_location('1:-100--50', 1), 'Checki
############# METHODS BELOW HERE ############# METHODS BELOW HERE
sub test_toplevel_location { sub test_toplevel_location {
my ($location, $cs_name, $seq_region_name, $start, $end, $strand) = @_; my ($location, $cs_name, $seq_region_name, $start, $end, $strand, $ucsc) = @_;
my $incoming_slice = $slice_adaptor->fetch_by_toplevel_location($location, 1); my $no_warnings = 1;
my $no_fuzz = undef;
my $incoming_slice = $slice_adaptor->fetch_by_toplevel_location($location, $no_warnings, $no_fuzz, $ucsc);
test_slice($location, $incoming_slice, $cs_name, $seq_region_name, $start, $end, $strand); test_slice($location, $incoming_slice, $cs_name, $seq_region_name, $start, $end, $strand);
return; return;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment