Commit bc387e64 authored by Brandon Walts's avatar Brandon Walts
Browse files

ENSCORESW-3132: Limited implementation of Intervals with start > end (i.e....

ENSCORESW-3132: Limited implementation of Intervals with start > end (i.e. spanning the origin of a circular chromosome)
parent faba0de9
...@@ -35,7 +35,7 @@ Bio::EnsEMBL::Utils::Interval ...@@ -35,7 +35,7 @@ Bio::EnsEMBL::Utils::Interval
=head1 SYNOPSIS =head1 SYNOPSIS
# let's get an interval spanning 9e5 bp and associated it with some data # let's get an interval spanning 9e5 bp and associated it with some data
my $i2 = Bio::EnsEMBL::Utils::Interval->new(1e5, 1e6, { 'key1' => 'value1', 'key2' => 'value2' }); my $i1 = Bio::EnsEMBL::Utils::Interval->new(1e5, 1e6, { 'key1' => 'value1', 'key2' => 'value2' });
# and another one which overlaps with the previous, # and another one which overlaps with the previous,
# but with scalar associated data # but with scalar associated data
...@@ -50,12 +50,18 @@ Bio::EnsEMBL::Utils::Interval ...@@ -50,12 +50,18 @@ Bio::EnsEMBL::Utils::Interval
print "I1 and I2 do not overlap\n"; print "I1 and I2 do not overlap\n";
} }
# If an interval is defined with a start > end, then it is assumed
# to be spanning the origin on a circular chromosome
my $i3 = Bio::EnsEMBL::Utilities::Interval->new(1e5, 1e2);
warn "Interval spans the origin" if $i3->spans_origin;
etc. etc.
=head1 DESCRIPTION =head1 DESCRIPTION
A class representing an interval defined on a genomic region. Instances of this A class representing an interval defined on a genomic region. Instances of this
class can store arbitrarily defined data. class can store arbitrarily defined data. If created with start > end, then it
is assumed that this interval is on a circular chromosome spanning the origin.
=head1 METHODS =head1 METHODS
...@@ -78,9 +84,12 @@ use Bio::EnsEMBL::Utils::Exception qw(throw); ...@@ -78,9 +84,12 @@ use Bio::EnsEMBL::Utils::Exception qw(throw);
Arg [3] : (optional) $data Arg [3] : (optional) $data
The data associated with the interval, can be anything The data associated with the interval, can be anything
Example : my $i = Bio::EnsEMBL::Utils::Interval(1e2, 2e2, { 'key' => 'value' }); Example : my $i = Bio::EnsEMBL::Utils::Interval(1e2, 2e2, { 'key' => 'value' });
my $i2 = Bio::EnsEMBL::Utilities::Interval(1e5, 1e2);
$i->spans_origin # returns 0
$i2->spans_origin # returns 1
Description : Constructor. Creates a new instance Description : Constructor. Creates a new instance
Returntype : Bio::EnsEMBL::Utils::Interval Returntype : Bio::EnsEMBL::Utils::Interval
Exceptions : none Exceptions : Throws an exception if start and end are not defined.
Caller : general Caller : general
=cut =cut
...@@ -92,9 +101,17 @@ sub new { ...@@ -92,9 +101,17 @@ sub new {
my ($start, $end, $data) = @_; my ($start, $end, $data) = @_;
throw 'Must specify interval boundaries [start, end]' throw 'Must specify interval boundaries [start, end]'
unless defined $start and defined $end; unless defined $start and defined $end;
throw 'start must be <= end' if $start > $end;
my $spans_origin = 0;
if ($start > $end) {
$spans_origin = 1;
}
my $self = bless({ start => $start, end => $end, data => $data }, $class); my $self = bless({ start => $start,
end => $end,
data => $data ,
spans_origin => $spans_origin},
$class);
return $self; return $self;
} }
...@@ -146,6 +163,23 @@ sub data { ...@@ -146,6 +163,23 @@ sub data {
return $self->{data}; return $self->{data};
} }
=head2 spans_origin
Arg [] : none
Description : Returns whether this interval was created spanning zero
(more particularly: if the interval was instantiated with start > end)
Returntype : boolean
Exceptions : none
Caller : general
=cut
sub spans_origin {
my $self = shift;
return $self->{spans_origin};
}
=head2 is_empty =head2 is_empty
Arg [] : none Arg [] : none
...@@ -159,7 +193,11 @@ sub data { ...@@ -159,7 +193,11 @@ sub data {
sub is_empty { sub is_empty {
my $self = shift; my $self = shift;
return $self->start >= $self->end; if ($self->spans_origin) {
return ($self->end >= $self->start);
} else {
return ($self->start >= $self->end);
}
} }
=head2 is_point =head2 is_point
...@@ -194,7 +232,11 @@ sub contains { ...@@ -194,7 +232,11 @@ sub contains {
return 0 if $self->is_empty or not defined $point; return 0 if $self->is_empty or not defined $point;
throw 'point must be a number' unless looks_like_number($point); throw 'point must be a number' unless looks_like_number($point);
return ($point >= $self->start and $point <= $self->end); if ($self->spans_origin) {
return ($point >= $self->start or $point <= $self->end);
} else {
return ($point >= $self->start and $point <= $self->end);
}
} }
=head2 intersects =head2 intersects
...@@ -210,16 +252,26 @@ sub contains { ...@@ -210,16 +252,26 @@ sub contains {
sub intersects { sub intersects {
my ($self, $interval) = @_; my ($self, $interval) = @_;
assert_ref($interval, 'Bio::EnsEMBL::Utils::Interval'); assert_ref($interval, 'Bio::EnsEMBL::Utils::Interval');
return ($self->start <= $interval->end and $interval->start <= $self->end); if ($self->spans_origin and $interval->spans_origin) {
return 1;
} elsif ($self->spans_origin or $interval->spans_origin) {
return ($interval->end >= $self->start or $interval->start <= $self->end);
} else {
return ($self->start <= $interval->end and $interval->start <= $self->end);
}
} }
=head2 is_right_of =head2 is_right_of
Arg [1] : An instance of Bio::EnsEMBL::Utils::Interval or a scalar Arg [1] : An instance of Bio::EnsEMBL::Utils::Interval or a scalar
Description : Checks if this current interval is entirely to the right of a point. Description : Checks if this current interval is entirely to the right of a point
or Interval.
More formally, the method will return true, if for every point x from More formally, the method will return true, if for every point x from
the current interval the inequality x > point holds. the current interval the inequality x > point holds, where point
is either a single scalar, or point is the end of another Interval.
If spans_origin is true for either this Interval or an Interval
passed in, then this method returns false.
Returntype : boolean Returntype : boolean
Exceptions : none Exceptions : none
Caller : general Caller : general
...@@ -232,18 +284,26 @@ sub is_right_of { ...@@ -232,18 +284,26 @@ sub is_right_of {
return 0 unless defined $other; return 0 unless defined $other;
if ( looks_like_number($other) ) { if ( looks_like_number($other) ) {
return $self->start > $other; return $self->spans_origin ?
throw "is_right_of not defined for an interval that spans the origin" :
$self->start > $other;
} elsif ($self->spans_origin or $other->spans_origin) {
throw "is_right_of not defined for an interval that spans the origin";
} else {
return $self->start > $other->end;
} }
return $self->start > $other->end;
} }
=head2 is_left_of =head2 is_left_of
Arg [1] : An instance of Bio::EnsEMBL::Utils::Interval or a scalar Arg [1] : An instance of Bio::EnsEMBL::Utils::Interval or a scalar
Description : Checks if this current interval is entirely to the left of a point. Description : Checks if this current interval is entirely to the left of a point
or Interval.
More formally, the method will return true, if for every point x from More formally, the method will return true, if for every point x from
the current interval the inequality x < point holds. the current interval the inequality x < point holds, where point
is either a single scalar, or point is the start of another Interval.
If spans_origin is true for either this Interval or an Interval
passed in, then this method returns false
Returntype : boolean Returntype : boolean
Exceptions : none Exceptions : none
Caller : general Caller : general
...@@ -256,10 +316,14 @@ sub is_left_of { ...@@ -256,10 +316,14 @@ sub is_left_of {
return 0 unless defined $other; return 0 unless defined $other;
if ( looks_like_number($other) ) { if ( looks_like_number($other) ) {
return $self->end < $other; return $self->spans_origin ?
throw "is_left_of not defined for an interval that spans the origin" :
$self->end < $other;
} elsif ($self->spans_origin or $other->spans_origin) {
throw "is_left_of not defined for an interval that spans the origin";
} else {
return $self->end < $other->start;
} }
return $self->end < $other->start;
} }
1; 1;
......
...@@ -62,6 +62,9 @@ be added to or removed from the tree during its life cycle. ...@@ -62,6 +62,9 @@ be added to or removed from the tree during its life cycle.
Implementation heavily inspired by https://github.com/tylerkahn/intervaltree-python Implementation heavily inspired by https://github.com/tylerkahn/intervaltree-python
This implementation does not support Intervals having a start > end - i.e.
intervals spanning the origin of a circular chromosome.
=head1 METHODS =head1 METHODS
=cut =cut
...@@ -297,6 +300,9 @@ sub _divide_intervals { ...@@ -297,6 +300,9 @@ sub _divide_intervals {
my ($s_center, $s_left, $s_right) = ([], [], []); my ($s_center, $s_left, $s_right) = ([], [], []);
foreach my $interval (@{$intervals}) { foreach my $interval (@{$intervals}) {
if ($interval->spans_origin) {
throw "Cannot build a tree containing an interval that spans the origin";
}
if ($interval->end < $x_center) { if ($interval->end < $x_center) {
push @{$s_left}, $interval; push @{$s_left}, $interval;
} elsif ($interval->start > $x_center) { } elsif ($interval->start > $x_center) {
......
...@@ -48,6 +48,7 @@ use Carp; ...@@ -48,6 +48,7 @@ use Carp;
use Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node; use Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node;
use Bio::EnsEMBL::Utils::Interval; use Bio::EnsEMBL::Utils::Interval;
use Bio::EnsEMBL::Utils::Exception qw(throw);
=head2 new =head2 new
...@@ -108,7 +109,7 @@ sub size { ...@@ -108,7 +109,7 @@ sub size {
Example : $tree->insert(Bio::EnsEMBL::Utils::Interval->new(10, 20, 'data')); Example : $tree->insert(Bio::EnsEMBL::Utils::Interval->new(10, 20, 'data'));
Description : Insert an interval in the tree Description : Insert an interval in the tree
Returntype : scalar (1), upon success Returntype : scalar (1), upon success
Exceptions : none Exceptions : thrown if Interval spans origin (has start > end)
Caller : general Caller : general
=cut =cut
...@@ -116,6 +117,9 @@ sub size { ...@@ -116,6 +117,9 @@ sub size {
sub insert { sub insert {
my ($self, $i) = @_; my ($self, $i) = @_;
if ($i->spans_origin) {
throw "Cannot insert an interval that spans the origin into a mutable tree";
}
# base case: empty tree, assign new node to root # base case: empty tree, assign new node to root
unless (defined $self->root) { unless (defined $self->root) {
$self->root(Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node->new($self, $i)); $self->root(Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node->new($self, $i));
......
...@@ -25,8 +25,6 @@ use_ok 'Bio::EnsEMBL::Utils::Interval'; ...@@ -25,8 +25,6 @@ use_ok 'Bio::EnsEMBL::Utils::Interval';
throws_ok { Bio::EnsEMBL::Utils::Interval->new() } qr/specify.+?boundaries/, 'Throws with no arguments'; throws_ok { Bio::EnsEMBL::Utils::Interval->new() } qr/specify.+?boundaries/, 'Throws with no arguments';
throws_ok { Bio::EnsEMBL::Utils::Interval->new(1) } qr/specify.+?boundaries/, 'Throws with an undefined argument'; throws_ok { Bio::EnsEMBL::Utils::Interval->new(1) } qr/specify.+?boundaries/, 'Throws with an undefined argument';
throws_ok { Bio::EnsEMBL::Utils::Interval->new(10, 1) } qr/start.+?end/, 'Throws with invalid arguments';
throws_ok { Bio::EnsEMBL::Utils::Interval->new(100, 10) } qr/start.+?end/, 'Throws with invalid arguments';
# degenerate (point) case # degenerate (point) case
my $i = Bio::EnsEMBL::Utils::Interval->new(10, 10); my $i = Bio::EnsEMBL::Utils::Interval->new(10, 10);
...@@ -36,27 +34,82 @@ ok($i->is_point, 'interval is point'); ...@@ -36,27 +34,82 @@ ok($i->is_point, 'interval is point');
# a normal interval, start < end # a normal interval, start < end
$i = Bio::EnsEMBL::Utils::Interval->new(100, 200); $i = Bio::EnsEMBL::Utils::Interval->new(100, 200);
# an interval spanning the origin, start > end
my $i_span = Bio::EnsEMBL::Utils::Interval->new(200,100);
isa_ok($i, 'Bio::EnsEMBL::Utils::Interval'); isa_ok($i, 'Bio::EnsEMBL::Utils::Interval');
isa_ok($i_span, 'Bio::EnsEMBL::Utils::Interval');
is($i->spans_origin, 0, 'spans_origin false for non-spanning interval');
is($i_span->spans_origin, 1, 'spans_origin true for spanning interval');
is($i->start, 100, 'start position'); is($i->start, 100, 'start position');
is($i->end, 200, 'end position'); is($i->end, 200, 'end position');
is($i_span->start, 200, 'spanning start position');
is($i_span->end, 100, 'spanning end position');
ok(!$i->is_empty, 'interval not empty'); ok(!$i->is_empty, 'interval not empty');
ok(!$i->is_point, 'interval\'s not a point'); ok(!$i->is_point, 'interval\'s not a point');
ok(!$i_span->is_empty, 'interval not empty');
ok(!$i_span->is_point, 'interval\'s not a point');
ok($i->contains(100) && $i->contains(200) && $i->contains(150), 'interval contains points'); ok($i->contains(100) && $i->contains(200) && $i->contains(150), 'interval contains points');
ok(!$i->contains(99) && !$i->contains(201), 'interval does not contain points'); ok(!$i->contains(99) && !$i->contains(201), 'interval does not contain points');
ok($i_span->contains(100) && $i_span->contains(200) && $i_span->contains(250), 'spanning interval contains points');
ok(!$i_span->contains(101) && !$i_span->contains(199), 'spanning interval does not contain points');
# check is_right_of/is_left_of with point/interval # check is_right_of/is_left_of with point/interval
ok(!$i->is_right_of && !$i->is_left_of, 'interval is not left/right of nothing'); ok(!$i->is_right_of && !$i->is_left_of, 'interval is not left/right of nothing');
ok(!$i_span->is_right_of && !$i_span->is_left_of, 'spanning interval is not left/right of nothing');
ok($i->is_right_of(99), 'interval right of point'); ok($i->is_right_of(99), 'interval right of point');
ok(!$i->is_right_of(100) && !$i->is_right_of(150) && !$i->is_right_of(201), 'interval not right of point'); ok(!$i->is_right_of(100) && !$i->is_right_of(150) && !$i->is_right_of(201), 'interval not right of point');
ok($i->is_left_of(201), 'interval left of point'); ok($i->is_left_of(201), 'interval left of point');
ok(!$i->is_left_of(99) && !$i->is_left_of(150) && !$i->is_left_of(200), 'interval not left of point'); ok(!$i->is_left_of(99) && !$i->is_left_of(150) && !$i->is_left_of(200), 'interval not left of point');
throws_ok { $i_span->is_right_of(150) }
qr/is_right_of not defined for an interval that spans the origin/,
'exception calling is_right_of with a spanning interval and a point';
throws_ok { $i_span->is_left_of(150) }
qr/is_left_of not defined for an interval that spans the origin/,
'exception calling is_left_of with a spanning interval and a point';
my $j = Bio::EnsEMBL::Utils::Interval->new(50, 99); my $j = Bio::EnsEMBL::Utils::Interval->new(50, 99);
my $k = Bio::EnsEMBL::Utils::Interval->new(50, 150); my $k = Bio::EnsEMBL::Utils::Interval->new(50, 150);
my $l = Bio::EnsEMBL::Utils::Interval->new(201, 250); my $l = Bio::EnsEMBL::Utils::Interval->new(201, 250);
my $m = Bio::EnsEMBL::Utils::Interval->new(101, 199);
my $n_span = Bio::EnsEMBL::Utils::Interval->new(201,100);
# non-spanning with non-spanning query
ok($i->is_right_of($j), 'interval right of another'); ok($i->is_right_of($j), 'interval right of another');
ok(!$i->is_right_of($k) && !$i->is_right_of($l), 'interval not right of others'); ok(!$i->is_right_of($k) && !$i->is_right_of($l), 'interval not right of others');
ok($i->is_left_of($l), 'interval left of another'); ok($i->is_left_of($l), 'interval left of another');
ok(!$i->is_left_of($j) && !$i->is_left_of($k), 'interval not left of others'); ok(!$i->is_left_of($j) && !$i->is_left_of($k), 'interval not left of others');
# non-spanning with spanning query
throws_ok { $i->is_right_of($n_span) }
qr/is_right_of not defined for an interval that spans the origin/,
'exception calling is_right_of with a spanning interval';
throws_ok { $i->is_left_of($n_span) }
qr/is_left_of not defined for an interval that spans the origin/,
'exception calling is_left_of with a spanning interval';
# spanning with non-spanning query
throws_ok { $i_span->is_right_of($m) }
qr/is_right_of not defined for an interval that spans the origin/,
'exception calling is_right_of with a spanning interval';
throws_ok { $i_span->is_left_of($m) }
qr/is_left_of not defined for an interval that spans the origin/,
'exception calling is_left_of with a spanning interval';
# spanning with spanning query
throws_ok { $i_span->is_right_of($n_span) }
qr/is_right_of not defined for an interval that spans the origin/,
'exception calling is_right_of with a spanning interval';
throws_ok { $i_span->is_left_of($n_span) }
qr/is_left_of not defined for an interval that spans the origin/,
'exception calling is_left_of with a spanning interval';
# check interval data # check interval data
$j = Bio::EnsEMBL::Utils::Interval->new(100, 200, [100, 200]); $j = Bio::EnsEMBL::Utils::Interval->new(100, 200, [100, 200]);
is_deeply($j->data, [100, 200], 'interval data'); is_deeply($j->data, [100, 200], 'interval data');
...@@ -64,17 +117,31 @@ is_deeply($j->data, [100, 200], 'interval data'); ...@@ -64,17 +117,31 @@ is_deeply($j->data, [100, 200], 'interval data');
# check intersection with other intervals # check intersection with other intervals
$k = Bio::EnsEMBL::Utils::Interval->new(50, 150); $k = Bio::EnsEMBL::Utils::Interval->new(50, 150);
ok($i->intersects($k), 'intervals intersect'); ok($i->intersects($k), 'intervals intersect');
ok($i_span->intersects($k), 'spanning interval and interval intersect');
$k = Bio::EnsEMBL::Utils::Interval->new(150, 250); $k = Bio::EnsEMBL::Utils::Interval->new(150, 250);
ok($i->intersects($k), 'intervals intersect'); ok($i->intersects($k), 'intervals intersect');
$k = Bio::EnsEMBL::Utils::Interval->new(50, 99); $k = Bio::EnsEMBL::Utils::Interval->new(50, 99);
ok(!$i->intersects($k), 'intervals do not intersect'); ok(!$i->intersects($k), 'intervals do not intersect');
$k = Bio::EnsEMBL::Utils::Interval->new(201, 250); $k = Bio::EnsEMBL::Utils::Interval->new(201, 250);
ok(!$i->intersects($k), 'intervals do not intersect'); ok(!$i->intersects($k), 'intervals do not intersect');
$k = Bio::EnsEMBL::Utils::Interval->new(101,199);
ok(!$i_span->intersects($k), 'spanning interval and interval do not intersect');
ok($i_span->intersects($n_span), 'two spanning intervals intersect');
ok($i->intersects($n_span), 'interval and spanning interval intersect');
my $o_span = Bio::EnsEMBL::Utils::Interval->new(201,99);
ok(!$i->intersects($o_span), 'interval and spanning interval do not intersect');
use_ok 'Bio::EnsEMBL::Utils::Tree::Interval::Immutable::Node'; use_ok 'Bio::EnsEMBL::Utils::Tree::Interval::Immutable::Node';
use_ok 'Bio::EnsEMBL::Utils::Tree::Interval::Immutable'; use_ok 'Bio::EnsEMBL::Utils::Tree::Interval::Immutable';
my $intervals_with_span = [ Bio::EnsEMBL::Utils::Interval->new(20, 30),
Bio::EnsEMBL::Utils::Interval->new(30, 20)];
throws_ok { my $impossible_tree = Bio::EnsEMBL::Utils::Tree::Interval::Immutable->new($intervals_with_span) }
qr/Cannot build a tree containing an interval that spans the origin/,
'exception when building an interval tree with an interval that spans the origin';
my $intervals = [ Bio::EnsEMBL::Utils::Interval->new(121626874, 122092717), my $intervals = [ Bio::EnsEMBL::Utils::Interval->new(121626874, 122092717),
Bio::EnsEMBL::Utils::Interval->new(121637917, 121658918), Bio::EnsEMBL::Utils::Interval->new(121637917, 121658918),
Bio::EnsEMBL::Utils::Interval->new(122096077, 124088369) ]; Bio::EnsEMBL::Utils::Interval->new(122096077, 124088369) ];
......
...@@ -122,6 +122,11 @@ is(scalar @{$search_result}, 2, 'Number of search results'); ...@@ -122,6 +122,11 @@ is(scalar @{$search_result}, 2, 'Number of search results');
is($search_result->[0]->data, 'data1', 'Search result'); is($search_result->[0]->data, 'data1', 'Search result');
is($search_result->[1]->data, 'data2', 'Search result'); is($search_result->[1]->data, 'data2', 'Search result');
$tree = Bio::EnsEMBL::Utils::Tree::Interval::Mutable->new();
throws_ok { $tree->insert(make_interval(200, 100, 'spanning_interval')) }
qr/Cannot insert an interval that spans the origin into a mutable tree/,
'exception when trying to insert an interval that spans the origin';
$tree = Bio::EnsEMBL::Utils::Tree::Interval::Mutable->new(); $tree = Bio::EnsEMBL::Utils::Tree::Interval::Mutable->new();
map { $tree->insert($_) } @{$intervals}; map { $tree->insert($_) } @{$intervals};
is($tree->size(), scalar @{$intervals}, 'Tree size'); is($tree->size(), scalar @{$intervals}, 'Tree size');
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment