Unverified Commit 61442151 authored by Marek Szuba's avatar Marek Szuba Committed by GitHub
Browse files

Merge pull request #393 from Ensembl/bugfix/circular_chromosome_spanning

ENSCORESW-3132: Limited implementation of Intervals with start > end …
parents faba0de9 bc387e64
......@@ -35,7 +35,7 @@ Bio::EnsEMBL::Utils::Interval
=head1 SYNOPSIS
# let's get an interval spanning 9e5 bp and associated it with some data
my $i2 = Bio::EnsEMBL::Utils::Interval->new(1e5, 1e6, { 'key1' => 'value1', 'key2' => 'value2' });
my $i1 = Bio::EnsEMBL::Utils::Interval->new(1e5, 1e6, { 'key1' => 'value1', 'key2' => 'value2' });
# and another one which overlaps with the previous,
# but with scalar associated data
......@@ -50,12 +50,18 @@ Bio::EnsEMBL::Utils::Interval
print "I1 and I2 do not overlap\n";
}
# If an interval is defined with a start > end, then it is assumed
# to be spanning the origin on a circular chromosome
my $i3 = Bio::EnsEMBL::Utilities::Interval->new(1e5, 1e2);
warn "Interval spans the origin" if $i3->spans_origin;
etc.
=head1 DESCRIPTION
A class representing an interval defined on a genomic region. Instances of this
class can store arbitrarily defined data.
class can store arbitrarily defined data. If created with start > end, then it
is assumed that this interval is on a circular chromosome spanning the origin.
=head1 METHODS
......@@ -78,9 +84,12 @@ use Bio::EnsEMBL::Utils::Exception qw(throw);
Arg [3] : (optional) $data
The data associated with the interval, can be anything
Example : my $i = Bio::EnsEMBL::Utils::Interval(1e2, 2e2, { 'key' => 'value' });
my $i2 = Bio::EnsEMBL::Utilities::Interval(1e5, 1e2);
$i->spans_origin # returns 0
$i2->spans_origin # returns 1
Description : Constructor. Creates a new instance
Returntype : Bio::EnsEMBL::Utils::Interval
Exceptions : none
Exceptions : Throws an exception if start and end are not defined.
Caller : general
=cut
......@@ -92,9 +101,17 @@ sub new {
my ($start, $end, $data) = @_;
throw 'Must specify interval boundaries [start, end]'
unless defined $start and defined $end;
throw 'start must be <= end' if $start > $end;
my $spans_origin = 0;
if ($start > $end) {
$spans_origin = 1;
}
my $self = bless({ start => $start, end => $end, data => $data }, $class);
my $self = bless({ start => $start,
end => $end,
data => $data ,
spans_origin => $spans_origin},
$class);
return $self;
}
......@@ -146,6 +163,23 @@ sub data {
return $self->{data};
}
=head2 spans_origin
Arg [] : none
Description : Returns whether this interval was created spanning zero
(more particularly: if the interval was instantiated with start > end)
Returntype : boolean
Exceptions : none
Caller : general
=cut
sub spans_origin {
my $self = shift;
return $self->{spans_origin};
}
=head2 is_empty
Arg [] : none
......@@ -159,7 +193,11 @@ sub data {
sub is_empty {
my $self = shift;
return $self->start >= $self->end;
if ($self->spans_origin) {
return ($self->end >= $self->start);
} else {
return ($self->start >= $self->end);
}
}
=head2 is_point
......@@ -194,7 +232,11 @@ sub contains {
return 0 if $self->is_empty or not defined $point;
throw 'point must be a number' unless looks_like_number($point);
return ($point >= $self->start and $point <= $self->end);
if ($self->spans_origin) {
return ($point >= $self->start or $point <= $self->end);
} else {
return ($point >= $self->start and $point <= $self->end);
}
}
=head2 intersects
......@@ -210,16 +252,26 @@ sub contains {
sub intersects {
my ($self, $interval) = @_;
assert_ref($interval, 'Bio::EnsEMBL::Utils::Interval');
return ($self->start <= $interval->end and $interval->start <= $self->end);
if ($self->spans_origin and $interval->spans_origin) {
return 1;
} elsif ($self->spans_origin or $interval->spans_origin) {
return ($interval->end >= $self->start or $interval->start <= $self->end);
} else {
return ($self->start <= $interval->end and $interval->start <= $self->end);
}
}
=head2 is_right_of
Arg [1] : An instance of Bio::EnsEMBL::Utils::Interval or a scalar
Description : Checks if this current interval is entirely to the right of a point.
Description : Checks if this current interval is entirely to the right of a point
or Interval.
More formally, the method will return true, if for every point x from
the current interval the inequality x > point holds.
the current interval the inequality x > point holds, where point
is either a single scalar, or point is the end of another Interval.
If spans_origin is true for either this Interval or an Interval
passed in, then this method returns false.
Returntype : boolean
Exceptions : none
Caller : general
......@@ -232,18 +284,26 @@ sub is_right_of {
return 0 unless defined $other;
if ( looks_like_number($other) ) {
return $self->start > $other;
return $self->spans_origin ?
throw "is_right_of not defined for an interval that spans the origin" :
$self->start > $other;
} elsif ($self->spans_origin or $other->spans_origin) {
throw "is_right_of not defined for an interval that spans the origin";
} else {
return $self->start > $other->end;
}
return $self->start > $other->end;
}
=head2 is_left_of
Arg [1] : An instance of Bio::EnsEMBL::Utils::Interval or a scalar
Description : Checks if this current interval is entirely to the left of a point.
Description : Checks if this current interval is entirely to the left of a point
or Interval.
More formally, the method will return true, if for every point x from
the current interval the inequality x < point holds.
the current interval the inequality x < point holds, where point
is either a single scalar, or point is the start of another Interval.
If spans_origin is true for either this Interval or an Interval
passed in, then this method returns false
Returntype : boolean
Exceptions : none
Caller : general
......@@ -256,10 +316,14 @@ sub is_left_of {
return 0 unless defined $other;
if ( looks_like_number($other) ) {
return $self->end < $other;
return $self->spans_origin ?
throw "is_left_of not defined for an interval that spans the origin" :
$self->end < $other;
} elsif ($self->spans_origin or $other->spans_origin) {
throw "is_left_of not defined for an interval that spans the origin";
} else {
return $self->end < $other->start;
}
return $self->end < $other->start;
}
1;
......
......@@ -62,6 +62,9 @@ be added to or removed from the tree during its life cycle.
Implementation heavily inspired by https://github.com/tylerkahn/intervaltree-python
This implementation does not support Intervals having a start > end - i.e.
intervals spanning the origin of a circular chromosome.
=head1 METHODS
=cut
......@@ -297,6 +300,9 @@ sub _divide_intervals {
my ($s_center, $s_left, $s_right) = ([], [], []);
foreach my $interval (@{$intervals}) {
if ($interval->spans_origin) {
throw "Cannot build a tree containing an interval that spans the origin";
}
if ($interval->end < $x_center) {
push @{$s_left}, $interval;
} elsif ($interval->start > $x_center) {
......
......@@ -48,6 +48,7 @@ use Carp;
use Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node;
use Bio::EnsEMBL::Utils::Interval;
use Bio::EnsEMBL::Utils::Exception qw(throw);
=head2 new
......@@ -108,7 +109,7 @@ sub size {
Example : $tree->insert(Bio::EnsEMBL::Utils::Interval->new(10, 20, 'data'));
Description : Insert an interval in the tree
Returntype : scalar (1), upon success
Exceptions : none
Exceptions : thrown if Interval spans origin (has start > end)
Caller : general
=cut
......@@ -116,6 +117,9 @@ sub size {
sub insert {
my ($self, $i) = @_;
if ($i->spans_origin) {
throw "Cannot insert an interval that spans the origin into a mutable tree";
}
# base case: empty tree, assign new node to root
unless (defined $self->root) {
$self->root(Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node->new($self, $i));
......
......@@ -25,8 +25,6 @@ use_ok 'Bio::EnsEMBL::Utils::Interval';
throws_ok { Bio::EnsEMBL::Utils::Interval->new() } qr/specify.+?boundaries/, 'Throws with no arguments';
throws_ok { Bio::EnsEMBL::Utils::Interval->new(1) } qr/specify.+?boundaries/, 'Throws with an undefined argument';
throws_ok { Bio::EnsEMBL::Utils::Interval->new(10, 1) } qr/start.+?end/, 'Throws with invalid arguments';
throws_ok { Bio::EnsEMBL::Utils::Interval->new(100, 10) } qr/start.+?end/, 'Throws with invalid arguments';
# degenerate (point) case
my $i = Bio::EnsEMBL::Utils::Interval->new(10, 10);
......@@ -36,27 +34,82 @@ ok($i->is_point, 'interval is point');
# a normal interval, start < end
$i = Bio::EnsEMBL::Utils::Interval->new(100, 200);
# an interval spanning the origin, start > end
my $i_span = Bio::EnsEMBL::Utils::Interval->new(200,100);
isa_ok($i, 'Bio::EnsEMBL::Utils::Interval');
isa_ok($i_span, 'Bio::EnsEMBL::Utils::Interval');
is($i->spans_origin, 0, 'spans_origin false for non-spanning interval');
is($i_span->spans_origin, 1, 'spans_origin true for spanning interval');
is($i->start, 100, 'start position');
is($i->end, 200, 'end position');
is($i_span->start, 200, 'spanning start position');
is($i_span->end, 100, 'spanning end position');
ok(!$i->is_empty, 'interval not empty');
ok(!$i->is_point, 'interval\'s not a point');
ok(!$i_span->is_empty, 'interval not empty');
ok(!$i_span->is_point, 'interval\'s not a point');
ok($i->contains(100) && $i->contains(200) && $i->contains(150), 'interval contains points');
ok(!$i->contains(99) && !$i->contains(201), 'interval does not contain points');
ok($i_span->contains(100) && $i_span->contains(200) && $i_span->contains(250), 'spanning interval contains points');
ok(!$i_span->contains(101) && !$i_span->contains(199), 'spanning interval does not contain points');
# check is_right_of/is_left_of with point/interval
ok(!$i->is_right_of && !$i->is_left_of, 'interval is not left/right of nothing');
ok(!$i_span->is_right_of && !$i_span->is_left_of, 'spanning interval is not left/right of nothing');
ok($i->is_right_of(99), 'interval right of point');
ok(!$i->is_right_of(100) && !$i->is_right_of(150) && !$i->is_right_of(201), 'interval not right of point');
ok($i->is_left_of(201), 'interval left of point');
ok(!$i->is_left_of(99) && !$i->is_left_of(150) && !$i->is_left_of(200), 'interval not left of point');
throws_ok { $i_span->is_right_of(150) }
qr/is_right_of not defined for an interval that spans the origin/,
'exception calling is_right_of with a spanning interval and a point';
throws_ok { $i_span->is_left_of(150) }
qr/is_left_of not defined for an interval that spans the origin/,
'exception calling is_left_of with a spanning interval and a point';
my $j = Bio::EnsEMBL::Utils::Interval->new(50, 99);
my $k = Bio::EnsEMBL::Utils::Interval->new(50, 150);
my $l = Bio::EnsEMBL::Utils::Interval->new(201, 250);
my $m = Bio::EnsEMBL::Utils::Interval->new(101, 199);
my $n_span = Bio::EnsEMBL::Utils::Interval->new(201,100);
# non-spanning with non-spanning query
ok($i->is_right_of($j), 'interval right of another');
ok(!$i->is_right_of($k) && !$i->is_right_of($l), 'interval not right of others');
ok($i->is_left_of($l), 'interval left of another');
ok(!$i->is_left_of($j) && !$i->is_left_of($k), 'interval not left of others');
# non-spanning with spanning query
throws_ok { $i->is_right_of($n_span) }
qr/is_right_of not defined for an interval that spans the origin/,
'exception calling is_right_of with a spanning interval';
throws_ok { $i->is_left_of($n_span) }
qr/is_left_of not defined for an interval that spans the origin/,
'exception calling is_left_of with a spanning interval';
# spanning with non-spanning query
throws_ok { $i_span->is_right_of($m) }
qr/is_right_of not defined for an interval that spans the origin/,
'exception calling is_right_of with a spanning interval';
throws_ok { $i_span->is_left_of($m) }
qr/is_left_of not defined for an interval that spans the origin/,
'exception calling is_left_of with a spanning interval';
# spanning with spanning query
throws_ok { $i_span->is_right_of($n_span) }
qr/is_right_of not defined for an interval that spans the origin/,
'exception calling is_right_of with a spanning interval';
throws_ok { $i_span->is_left_of($n_span) }
qr/is_left_of not defined for an interval that spans the origin/,
'exception calling is_left_of with a spanning interval';
# check interval data
$j = Bio::EnsEMBL::Utils::Interval->new(100, 200, [100, 200]);
is_deeply($j->data, [100, 200], 'interval data');
......@@ -64,17 +117,31 @@ is_deeply($j->data, [100, 200], 'interval data');
# check intersection with other intervals
$k = Bio::EnsEMBL::Utils::Interval->new(50, 150);
ok($i->intersects($k), 'intervals intersect');
ok($i_span->intersects($k), 'spanning interval and interval intersect');
$k = Bio::EnsEMBL::Utils::Interval->new(150, 250);
ok($i->intersects($k), 'intervals intersect');
$k = Bio::EnsEMBL::Utils::Interval->new(50, 99);
ok(!$i->intersects($k), 'intervals do not intersect');
$k = Bio::EnsEMBL::Utils::Interval->new(201, 250);
ok(!$i->intersects($k), 'intervals do not intersect');
$k = Bio::EnsEMBL::Utils::Interval->new(101,199);
ok(!$i_span->intersects($k), 'spanning interval and interval do not intersect');
ok($i_span->intersects($n_span), 'two spanning intervals intersect');
ok($i->intersects($n_span), 'interval and spanning interval intersect');
my $o_span = Bio::EnsEMBL::Utils::Interval->new(201,99);
ok(!$i->intersects($o_span), 'interval and spanning interval do not intersect');
use_ok 'Bio::EnsEMBL::Utils::Tree::Interval::Immutable::Node';
use_ok 'Bio::EnsEMBL::Utils::Tree::Interval::Immutable';
my $intervals_with_span = [ Bio::EnsEMBL::Utils::Interval->new(20, 30),
Bio::EnsEMBL::Utils::Interval->new(30, 20)];
throws_ok { my $impossible_tree = Bio::EnsEMBL::Utils::Tree::Interval::Immutable->new($intervals_with_span) }
qr/Cannot build a tree containing an interval that spans the origin/,
'exception when building an interval tree with an interval that spans the origin';
my $intervals = [ Bio::EnsEMBL::Utils::Interval->new(121626874, 122092717),
Bio::EnsEMBL::Utils::Interval->new(121637917, 121658918),
Bio::EnsEMBL::Utils::Interval->new(122096077, 124088369) ];
......
......@@ -122,6 +122,11 @@ is(scalar @{$search_result}, 2, 'Number of search results');
is($search_result->[0]->data, 'data1', 'Search result');
is($search_result->[1]->data, 'data2', 'Search result');
$tree = Bio::EnsEMBL::Utils::Tree::Interval::Mutable->new();
throws_ok { $tree->insert(make_interval(200, 100, 'spanning_interval')) }
qr/Cannot insert an interval that spans the origin into a mutable tree/,
'exception when trying to insert an interval that spans the origin';
$tree = Bio::EnsEMBL::Utils::Tree::Interval::Mutable->new();
map { $tree->insert($_) } @{$intervals};
is($tree->size(), scalar @{$intervals}, 'Tree size');
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment