From 0bfeb666fcfc61186abc964007e84eb7ca2ff4e4 Mon Sep 17 00:00:00 2001
From: ens-bwalts <bwalts@ebi.ac.uk>
Date: Wed, 20 Nov 2019 10:53:25 +0000
Subject: [PATCH] factor interval tree construction out into a separate method

---
 modules/Bio/EnsEMBL/Mapper.pm | 110 ++++++++++++++--------------------
 1 file changed, 44 insertions(+), 66 deletions(-)

diff --git a/modules/Bio/EnsEMBL/Mapper.pm b/modules/Bio/EnsEMBL/Mapper.pm
index 699dca486c..5f3d03cbc0 100644
--- a/modules/Bio/EnsEMBL/Mapper.pm
+++ b/modules/Bio/EnsEMBL/Mapper.pm
@@ -305,42 +305,11 @@ sub map_coordinates {
 
   my $last_used_pair;
 
-  # my best guess is that lr stands for "list of regions"
-  my $lr = $hash->{ uc($id) };
-
   # if we don't already have an interval tree built for this id,
   # build one now
   if ( !defined( $self->{"_tree_$type"}->{ uc($id) } )) {
-    # create set of intervals to be checked for overlap
-    my $from_intervals;
-
-    foreach my $i (@{$lr}) {
-      my $start = $i->{$from}{start};
-      my $end = $i->{$from}{end};
-
-      if ($end < $start) {
-        my $tmp = $start;
-        $start = $end;
-        $end = $tmp;
-      }
-
-      push @{$from_intervals}, Bio::EnsEMBL::Utils::Interval->new($start, $end, $i);
-    }
-
-    #
-    # Create the interval tree defined on the above set of intervals
-    #
-    # Two options:
-    #
-    # 1. Use immutable interval tree implementation
-    # 2. Use mutable interval tree implementation
-    #
-    # As of release/99, we have more experience with the immutable
-    # interval tree, so we will stick with this one. A future
-    # refactoring effort may wish to replace this with a dynamically
-    # maintained mutable interval tree, rather than simply throwing
-    # trees away and rebuilding when the underlying interval set changes
-    $self->{"_tree_$type"}->{ uc($id) } = Bio::EnsEMBL::Utils::Tree::Interval::Immutable->new($from_intervals);
+    $self->{"_tree_$type"}->{ uc($id) } = _build_immutable_tree($from,
+                                                                $hash->{ uc($id) });
   }
   # query the interval tree (either cached or created new) for overlapping intervals
   my $overlap = $self->{"_tree_$type"}->{ uc($id) }->query($start, $end);
@@ -828,42 +797,12 @@ sub map_indel {
   }
   my @indel_coordinates;
 
-  my $lr = $hash->{ uc($id) };
-
   # if we don't already have an interval tree built for this id,
   # build one now
   if ( !defined $self->{"_tree_$type"}->{ uc($id) } ) {
-    # create set of intervals to be checked for overlap
-    my $from_intervals;
-
-    foreach my $i (@{$lr}) {
-      my $start = $i->{$from}{start};
-      my $end = $i->{$from}{end};
-
-      if ($end < $start) {
-        my $tmp = $start;
-        $start = $end;
-        $end = $tmp;
-      }
-
-      push @{$from_intervals}, Bio::EnsEMBL::Utils::Interval->new($start, $end, $i);
-    }
-
-    #
-    # Create the interval tree defined on the above set of intervals
-    #
-    # Two options:
-    #
-    # 1. Use immutable interval tree implementation
-    # 2. Use mutable interval tree implementation
-    #
-    # As of release/99, we have more experience with the immutable
-    # interval tree, so we will stick with this one. A future
-    # refactoring effort may wish to replace this with a dynamically
-    # maintained mutable interval tree, rather than simply throwing
-    # trees away and rebuilding when the underlying interval set changes
-    $self->{"_tree_$type"}->{ uc($id) } = Bio::EnsEMBL::Utils::Tree::Interval::Immutable->new($from_intervals);
-}
+    $self->{"_tree_$type"}->{ uc($id) } = _build_immutable_tree($from,
+                                                                $hash->{ uc($id) });
+  }
   # query the interval tree (either cached or created new) for overlapping intervals
   my $overlap = $self->{"_tree_$type"}->{ uc($id) }->query($start, $end);
 
@@ -1240,5 +1179,44 @@ sub _is_sorted {
    return $self->{'_is_sorted'};
 }
 
+# _build_immutable_tree
+#
+#  Arg 1       string $pair_side - the from or to half of each pair to be
+#              the source of intervals
+#  Arg 2       listref $pair_list - a list of Bio::EnsEMBL::Mapper::Pair
+#  Function    builds a Bio::EnsEMBL::Utils::Tree::Interval::Immutable with
+#              intervals corresponding to the chosen side (from or to) of
+#              each Pair in $pair_list and a pointer to each Pair
+#  Returntype  Bio::EnsEMBL::Utils::Tree::Interval::Immutable
+#  Exceptions  none
+#  Caller      internal
+
+sub _build_immutable_tree {
+  my ($pair_side, $pair_list) = @_;
+  # create set of intervals for the tree
+  my $from_intervals;
+
+  foreach my $i (@{$pair_list}) {
+    my $start = $i->{$pair_side}{start};
+    my $end = $i->{$pair_side}{end};
+
+    if ($end < $start) {
+      my $tmp = $start;
+      $start = $end;
+      $end = $tmp;
+    }
+
+    push @{$from_intervals}, Bio::EnsEMBL::Utils::Interval->new($start, $end, $i);
+  }
+
+  # Create the interval tree defined on the above set of intervals
+  #
+  # As of release/99, we have more experience with the immutable
+  # interval tree, so we will stick with this one. A future
+  # refactoring effort may wish to replace this with a dynamically
+  # maintained mutable interval tree, rather than simply throwing
+  # trees away and rebuilding when the underlying interval set changes
+  return Bio::EnsEMBL::Utils::Tree::Interval::Immutable->new($from_intervals);
+}
 
 1;
-- 
GitLab