From 04bdd5c1b8e814a7fbab715672b07c06ffc0dc71 Mon Sep 17 00:00:00 2001 From: Andrew Yates <ayates@ebi.ac.uk> Date: Tue, 20 Mar 2012 14:41:58 +0000 Subject: [PATCH] Split the parsing of a location string away from the use case --- modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm | 84 ++++++++++++++++------- 1 file changed, 60 insertions(+), 24 deletions(-) diff --git a/modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm index b78c3d362e..1809db2068 100644 --- a/modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm +++ b/modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm @@ -470,17 +470,68 @@ sub fetch_by_region { sub fetch_by_toplevel_location { my ($self, $location, $no_warnings) = @_; - throw 'You must specify a location' if ! $location; + my ($seq_region_name, $start, $end, $strand) = $self->parse_location_to_values($location, $no_warnings); + + if(! $seq_region_name) { + return; + } + + if(defined $start && defined $end && $start > $end) { + throw "Cannot request a slice whose start is greater than its end. Start: $start. End: $end"; + } + + my $coord_system_name = 'toplevel'; + my $slice = $self->fetch_by_region($coord_system_name, $seq_region_name, $start, $end, $strand, undef, 0); + return unless $slice; + + my $srl = $slice->seq_region_length(); + my $name = $slice->seq_region_name(); + if(defined $start && $start > $srl) { + throw "Cannot request a slice whose start ($start) is greater than $srl for $name."; + } + if(defined $end && $end > $srl) { + warning "Requested end ($end) is greater than $srl for $name. Resetting to $srl" if ! $no_warnings; + $slice->{end} = $srl; + } + + return $slice; +} + +=head2 parse_location_to_values + + Arg [1] : string $location + Ensembl formatted location. Can be a format like + C<name:start-end>, C<name:start..end>, C<name:start:end>, + C<name:start>, C<name>. We can also support strand + specification as a +/- or 1/-1. + + Location names must be separated by a C<:>. All others can be + separated by C<..>, C<:> or C<->. + Arg[2] : boolean $no_warnings + Suppress warnings from this method + Example : my ($name, $start, $end, $strand) = $sa->parse_location_to_values('X:1..100:1); + Description : Takes in an Ensembl location String and returns the parsed + values + Returntype : List. Contains name, start, end and strand + +=cut + + +sub parse_location_to_values { + my ($self, $location, $no_warnings) = @_; + + throw 'You must specify a location' if ! $location; + #cleanup any nomenclature like 1_000 or 1 000 or 1,000 my $number_seps_regex = qr/\s+|,|_/; - my $separator = qr/(?:-|[.]{2}|\:)?/; - my $number = qr/[0-9,_ E]+/xms; - my $strand = qr/[+-1]|-1/xms; + my $separator_regex = qr/(?:-|[.]{2}|\:)?/; + my $number_regex = qr/[0-9,_ E]+/xms; + my $strand_regex = qr/[+-1]|-1/xms; - my $regex = qr/^(\w+) \s* :? \s* ($number)? $separator ($number)? $separator ($strand)? $/xms; - - if(my ($seq_region_name, $start, $end, $strand) = $location =~ $regex) { + my $regex = qr/^(\w+) \s* :? \s* ($number_regex)? $separator_regex ($number_regex)? $separator_regex ($strand_regex)? $/xms; + my ($seq_region_name, $start, $end, $strand); + if(($seq_region_name, $start, $end, $strand) = $location =~ $regex) { if(defined $strand) { if(!looks_like_number($strand)) { @@ -505,24 +556,9 @@ sub fetch_by_toplevel_location { if(defined $start && defined $end && $start > $end) { throw "Cannot request a slice whose start is greater than its end. Start: $start. End: $end"; } - - my $coord_system_name = 'toplevel'; - my $slice = $self->fetch_by_region($coord_system_name, $seq_region_name, $start, $end, $strand, undef, 0); - return unless $slice; - - my $srl = $slice->seq_region_length(); - my $name = $slice->seq_region_name(); - if(defined $start && $start > $srl) { - throw "Cannot request a slice whose start ($start) is greater than $srl for $name."; - } - if(defined $end && $end > $srl) { - warning "Requested end ($end) is greater than $srl for $name. Resetting to $srl" if ! $no_warnings; - $slice->{end} = $srl; - } - - return $slice; } - return; + + return ($seq_region_name, $start, $end, $strand); } =head2 fetch_by_region_unique -- GitLab