From 3aea631831fd00254db1aa32c262dee61001f360 Mon Sep 17 00:00:00 2001
From: Andrew Yates <ayates@ebi.ac.uk>
Date: Wed, 21 Sep 2011 10:53:58 +0000
Subject: [PATCH] Modifications to ensure that user input is not
 mis-interpreted as wanting whole chromosomes.

---
 modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm |  7 ++++++-
 modules/t/sliceAdaptor.t                  | 10 +++++++++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm b/modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm
index 07eb2491e8..701d7cbed5 100644
--- a/modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm
+++ b/modules/Bio/EnsEMBL/DBSQL/SliceAdaptor.pm
@@ -439,11 +439,15 @@ sub fetch_by_region {
 =head2 fetch_by_toplevel_location
 
   Arg [1]     : string $location
+                Ensembl formatted location. Can be a format like 
+                C<name:start-end>, C<name:start..end>, C<name:start> and
+                C<name>.
   Example     : my $slice = $sa->fetch_by_toplevel_location('X:1-10000')
   Description : Converts an Ensembl location/region into the sequence region
                 name, start and end and passes them onto C<fetch_by_region()>. 
                 The code assumes that the required slice is on the top level
-                coordinate system.
+                coordinate system. The code assumes that location formatting
+                is not perfect and will perform basic cleanup before parsing.
   Returntype  : Bio::EnsEMBL::Slice
   Exceptions  : If $location is false otherwise see C<fetch_by_region()>
   Caller      : General
@@ -455,6 +459,7 @@ sub fetch_by_toplevel_location {
   my ($self, $location) = @_;
   throw 'You must specify a location' if ! $location;
   my $regex = qr/^(\w+) :? (\d+)? (?:-|[.]{2})? (\d+)?/xms;
+  $location =~ s/\s+|,//g;
   my ($seq_region_name, $start, $end) = $location =~ $regex;
   my $coord_system_name = 'toplevel';
   return $self->fetch_by_region($coord_system_name, $seq_region_name, $start, $end, undef, undef, 0);
diff --git a/modules/t/sliceAdaptor.t b/modules/t/sliceAdaptor.t
index fe627ea37f..bd97d8315e 100644
--- a/modules/t/sliceAdaptor.t
+++ b/modules/t/sliceAdaptor.t
@@ -1,7 +1,7 @@
 use strict;
 use warnings;
 
-use Test::More tests => 129;
+use Test::More tests => 159;
 
 use Bio::EnsEMBL::Test::MultiTestDB;
 use Bio::EnsEMBL::DBSQL::SliceAdaptor;
@@ -458,6 +458,14 @@ test_toplevel_location('1:..10', 'chromosome', '1', 1, 10);
 test_toplevel_location('1:100', 'chromosome', '1', 100, 246874334);
 test_toplevel_location('1:', 'chromosome', '1', 1, 246874334);
 test_toplevel_location('1', 'chromosome', '1', 1, 246874334);
+
+test_toplevel_location('1: 1-1,000', 'chromosome', '1', 1, 1000);
+test_toplevel_location('1: 1-1,000,000', 'chromosome', '1', 1, 1000000);
+test_toplevel_location('1: 1-1 000 000', 'chromosome', '1', 1, 1000000);
+test_toplevel_location('1: 1', 'chromosome', '1', 1, 246874334);
+test_toplevel_location('1: -10', 'chromosome', '1', 1, 10);
+test_toplevel_location('1: 100', 'chromosome', '1', 100, 246874334);
+
 dies_ok { $slice_adaptor->fetch_by_toplevel_location(); } 'Checking calling without a location fails';
 dies_ok { $slice_adaptor->fetch_by_toplevel_location(''); } 'Checking calling with a blank location fails';
 ok(!defined $slice_adaptor->fetch_by_toplevel_location('wibble'), 'Checking with a bogus region returns undef');
-- 
GitLab