Commit 63e22774 authored by Andy Yates's avatar Andy Yates
Browse files

Allowing _ as a location separator

parent 189583d1
......@@ -466,6 +466,10 @@ sub fetch_by_region {
Suppress warnings from this method
Arg[3] : boolean $no_fuzz
Stop fuzzy matching of sequence regions from occuring
Arg[4] : boolean $ucsc
If we are unsuccessful at retriving a location retry taking any
possible chr prefix into account e.g. chrX and X are treated as
equivalents
Example : my $slice = $sa->fetch_by_toplevel_location('X:1-10000')
my $slice = $sa->fetch_by_toplevel_location('X:1-10000:-1')
Description : Converts an Ensembl location/region into the sequence region
......@@ -482,8 +486,8 @@ sub fetch_by_region {
=cut
sub fetch_by_toplevel_location {
my ($self, $location, $no_warnings, $no_fuzz) = @_;
return $self->fetch_by_location($location, 'toplevel', undef, $no_warnings, $no_fuzz);
my ($self, $location, $no_warnings, $no_fuzz, $ucsc) = @_;
return $self->fetch_by_location($location, 'toplevel', undef, $no_warnings, $no_fuzz, $ucsc);
}
=head2 fetch_by_location
......@@ -495,7 +499,7 @@ sub fetch_by_toplevel_location {
specification as a +/- or 1/-1.
Location names must be separated by a C<:>. All others can be
separated by C<..>, C<:> or C<->.
separated by C<..>, C<:>, C<_> or C<->.
Arg[2] : String $coord_system_name
The coordinate system to retrieve
Arg[3] : String $coord_system_version
......@@ -504,6 +508,10 @@ sub fetch_by_toplevel_location {
Suppress warnings from this method
Arg[5] : boolean $no_fuzz
Stop fuzzy matching of sequence regions from occuring
Arg[6] : boolean $ucsc
If we are unsuccessful at retriving a location retry taking any
possible chr prefix into account e.g. chrX and X are treated as
equivalents
Example : my $slice = $sa->fetch_by_toplevel_location('X:1-10000')
my $slice = $sa->fetch_by_toplevel_location('X:1-10000:-1')
Description : Converts an Ensembl location/region into the sequence region
......@@ -520,7 +528,7 @@ sub fetch_by_toplevel_location {
=cut
sub fetch_by_location {
my ($self, $location, $coord_system_name, $coord_system_version, $no_warnings, $no_fuzz) = @_;
my ($self, $location, $coord_system_name, $coord_system_version, $no_warnings, $no_fuzz, $ucsc) = @_;
throw "No coordinate system name specified" unless $coord_system_name;
......@@ -535,7 +543,22 @@ sub fetch_by_location {
}
my $slice = $self->fetch_by_region($coord_system_name, $seq_region_name, $start, $end, $strand, $coord_system_version, $no_fuzz);
return unless $slice;
if(! defined $slice) {
if($ucsc) {
my $ucsc_seq_region_name = $seq_region_name;
$ucsc_seq_region_name =~ s/^chr//;
if($ucsc_seq_region_name ne $seq_region_name) {
$slice = $self->fetch_by_region($coord_system_name, $ucsc_seq_region_name, $start, $end, $strand, $coord_system_version, $no_fuzz);
return if ! defined $slice; #if we had no slice still then bail
}
else {
return; #If it was not different then we didn't have the prefix so just return (same bail as before)
}
}
else {
return; #We didn't have a slice and no UCSC specifics are being triggered
}
}
my $srl = $slice->seq_region_length();
my $name = $slice->seq_region_name();
......@@ -559,7 +582,7 @@ sub fetch_by_location {
specification as a +/- or 1/-1.
Location names must be separated by a C<:>. All others can be
separated by C<..>, C<:> or C<->.
separated by C<..>, C<:> C<_>, or C<->.
Arg[2] : boolean $no_warnings
Suppress warnings from this method
Arg[3] : boolean $no_errors
......@@ -577,10 +600,10 @@ sub parse_location_to_values {
throw 'You must specify a location' if ! $location;
#cleanup any nomenclature like 1_000 or 1 000 or 1,000
my $number_seps_regex = qr/\s+|,|_/;
my $separator_regex = qr/(?:-|[.]{2}|\:)?/;
my $number_regex = qr/[0-9,_ E]+/xms;
#cleanup any nomenclature like 1 000 or 1,000
my $number_seps_regex = qr/\s+|,/;
my $separator_regex = qr/(?:-|[.]{2}|\:|_)?/;
my $number_regex = qr/[0-9, E]+/xms;
my $strand_regex = qr/[+-1]|-1/xms;
my $regex = qr/^((?:\w|\.|_|-)+) \s* :? \s* ($number_regex)? $separator_regex ($number_regex)? $separator_regex ($strand_regex)? $/xms;
......
......@@ -475,15 +475,33 @@ test_toplevel_location('1:100', 'chromosome', '1', 100, 246874334);
test_toplevel_location('1:', 'chromosome', '1', 1, 246874334);
test_toplevel_location('1', 'chromosome', '1', 1, 246874334);
test_toplevel_location('1:1_1000', 'chromosome', '1', 1, 1000);
test_toplevel_location('1:1_', 'chromosome', '1', 1, 246874334);
test_toplevel_location('1:_10', 'chromosome', '1', 1, 10);
test_toplevel_location('1:100', 'chromosome', '1', 100, 246874334);
test_toplevel_location('1:', 'chromosome', '1', 1, 246874334);
test_toplevel_location('1', 'chromosome', '1', 1, 246874334);
test_toplevel_location('1: 1-1,000', 'chromosome', '1', 1, 1000);
test_toplevel_location('1: 1-1,000,000', 'chromosome', '1', 1, 1000000);
test_toplevel_location('1: 1-1 000 000', 'chromosome', '1', 1, 1000000);
test_toplevel_location('1: 1', 'chromosome', '1', 1, 246874334);
test_toplevel_location('1: -10', 'chromosome', '1', 1, 10);
test_toplevel_location('1: 100', 'chromosome', '1', 100, 246874334);
test_toplevel_location('1:100..2_000_000_000', 'chromosome', '1', 100, 246874334);
test_toplevel_location('1:100..2000000000', 'chromosome', '1', 100, 246874334);
test_toplevel_location('1:100..2E9', 'chromosome', '1', 100, 246874334);
# Try chr
my $ucsc = 1;
test_toplevel_location('chr1: 1-1,000', 'chromosome', '1', 1, 1000, 1, $ucsc);
test_toplevel_location('chr1: 1-1,000,000', 'chromosome', '1', 1, 1000000, 1, $ucsc);
test_toplevel_location('chr1: 1-1 000 000', 'chromosome', '1', 1, 1000000, 1, $ucsc);
test_toplevel_location('chr1: 1', 'chromosome', '1', 1, 246874334, 1, $ucsc);
test_toplevel_location('chr1: -10', 'chromosome', '1', 1, 10, 1, $ucsc);
test_toplevel_location('chr1: 100', 'chromosome', '1', 100, 246874334, 1, $ucsc);
test_toplevel_location('chr1:100..2000000000', 'chromosome', '1', 100, 246874334, 1, $ucsc);
test_toplevel_location('chr1:100..2E9', 'chromosome', '1', 100, 246874334, 1, $ucsc);
#Try strands
test_toplevel_location('1:1-1000:1', 'chromosome', '1', 1, 1000, 1);
test_toplevel_location('1:1-1000:-1', 'chromosome', '1', 1, 1000, -1);
......@@ -494,7 +512,7 @@ test_toplevel_location('1:1-1000--1', 'chromosome', '1', 1, 1000, -1);
dies_ok { $slice_adaptor->fetch_by_toplevel_location(); } 'Checking calling without a location fails';
dies_ok { $slice_adaptor->fetch_by_toplevel_location('', 1); } 'Checking calling with a blank location fails';
dies_ok { $slice_adaptor->fetch_by_toplevel_location('1:1_000_000_000..100', 1); } 'Checking calling with an excessive start throws an error';
dies_ok { $slice_adaptor->fetch_by_toplevel_location('1:1000000000..100', 1); } 'Checking calling with an excessive start throws an error';
ok(!defined $slice_adaptor->fetch_by_toplevel_location('wibble', 1), 'Checking with a bogus region returns undef');
ok(!defined $slice_adaptor->fetch_by_toplevel_location('1:-100--50', 1), 'Checking with a bogus region with negative coords returns undef');
......@@ -523,8 +541,10 @@ ok(!defined $slice_adaptor->fetch_by_toplevel_location('1:-100--50', 1), 'Checki
############# METHODS BELOW HERE
sub test_toplevel_location {
my ($location, $cs_name, $seq_region_name, $start, $end, $strand) = @_;
my $incoming_slice = $slice_adaptor->fetch_by_toplevel_location($location, 1);
my ($location, $cs_name, $seq_region_name, $start, $end, $strand, $ucsc) = @_;
my $no_warnings = 1;
my $no_fuzz = undef;
my $incoming_slice = $slice_adaptor->fetch_by_toplevel_location($location, $no_warnings, $no_fuzz, $ucsc);
test_slice($location, $incoming_slice, $cs_name, $seq_region_name, $start, $end, $strand);
return;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment