From ac77391be76126beccc1ea3481befb953480a569 Mon Sep 17 00:00:00 2001
From: Andrew Yates <ayates@ebi.ac.uk>
Date: Sun, 1 Apr 2012 22:04:08 +0000
Subject: [PATCH] Replacing the 6 coordinate methods with versions which can
 operate on transcripts which have not been persisted to a database. The DBID
 is still used for caching purposes

---
 modules/Bio/EnsEMBL/Exon.pm | 512 ++++++++++++++++++------------------
 1 file changed, 252 insertions(+), 260 deletions(-)

diff --git a/modules/Bio/EnsEMBL/Exon.pm b/modules/Bio/EnsEMBL/Exon.pm
index 3266b535bd..6d34bcf321 100755
--- a/modules/Bio/EnsEMBL/Exon.pm
+++ b/modules/Bio/EnsEMBL/Exon.pm
@@ -398,33 +398,32 @@ sub strand {
 =cut
 
 sub cdna_start {
-  my $self = shift;
-  my ($transcript) = @_;
+  my ($self, $transcript) = @_;
+  assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript');
 
-  if (    !defined($transcript)
-       || !ref($transcript)
-       || !$transcript->isa('Bio::EnsEMBL::Transcript') )
-  {
-    throw("Argument is not a transcript");
+  my $id = $transcript->dbID();
+  
+  if(defined $id && exists $self->{cdna_start}->{$id}) {
+    return $self->{cdna_start}->{$id};
   }
-
-  my $transcript_id = $transcript->dbID();
-
-  if ( !exists( $self->{'cdna_start'}->{$transcript_id} ) ) {
-    my @coords =
-      $transcript->genomic2cdna( $self->start(), $self->end(),
-                                 $self->strand() );
-
-    if ( @coords && !$coords[0]->isa('Bio::EnsEMBL::Mapper::Gap') ) {
-      $self->{'cdna_start'}->{$transcript_id} = $coords[0]->start();
-    } elsif (@coords) {
-      throw("First part of exon maps into a gap");
-    } else {
-      throw("Can not map exon");
-    }
+  
+  my $cdna_start;
+  my @coords = $transcript->genomic2cdna($self->start(), $self->end(), $self->strand());
+  if(@coords && !$coords[0]->isa('Bio::EnsEMBL::Mapper::Gap')) {
+    $cdna_start = $coords[0]->start();
+  }
+  elsif(@coords) {
+    throw "Last part of exon maps into gap";
+  }
+  else {
+    throw "Can not map exon";
+  }
+  
+  if(defined $id) {
+    $self->{cdna_start}->{$id} = $cdna_start;
   }
 
-  return $self->{'cdna_start'}->{$transcript_id};
+  return $cdna_start;
 } ## end sub cdna_start
 
 =head2 cdna_end
@@ -448,33 +447,32 @@ sub cdna_start {
 =cut
 
 sub cdna_end {
-  my $self = shift;
-  my ($transcript) = @_;
+  my ($self, $transcript) = @_;
+  assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript');
 
-  if (    !defined($transcript)
-       || !ref($transcript)
-       || !$transcript->isa('Bio::EnsEMBL::Transcript') )
-  {
-    throw("Argument is not a transcript");
+  my $id = $transcript->dbID();
+  
+  if(defined $id && exists $self->{cdna_end}->{$id}) {
+    return $self->{cdna_end}->{$id};
   }
-
-  my $transcript_id = $transcript->dbID();
-
-  if ( !exists( $self->{'cdna_end'}->{$transcript_id} ) ) {
-    my @coords =
-      $transcript->genomic2cdna( $self->start(), $self->end(),
-                                 $self->strand() );
-
-    if ( @coords && !$coords[-1]->isa('Bio::EnsEMBL::Mapper::Gap') ) {
-      $self->{'cdna_end'}->{$transcript_id} = $coords[-1]->end();
-    } elsif (@coords) {
-      throw("Last part of exon maps into gap");
-    } else {
-      throw("Can not map exon");
-    }
+  
+  my $cdna_end;
+  my @coords = $transcript->genomic2cdna($self->start(), $self->end(), $self->strand());
+  if(@coords && !$coords[-1]->isa('Bio::EnsEMBL::Mapper::Gap')) {
+    $cdna_end = $coords[-1]->end();
+  }
+  elsif(@coords) {
+    throw "Last part of exon maps into gap";
+  }
+  else {
+    throw "Can not map exon";
+  }
+  
+  if(defined $id) {
+    $self->{cdna_end}->{$id} = $cdna_end;
   }
 
-  return $self->{'cdna_end'}->{$transcript_id};
+  return $cdna_end;
 } ## end sub cdna_end
 
 =head2 cdna_coding_start
@@ -497,56 +495,55 @@ sub cdna_end {
 =cut
 
 sub cdna_coding_start {
-  my $self = shift;
-  my ($transcript) = @_;
+  my ($self, $transcript) = @_;
+  assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript');
 
-  if (    !defined($transcript)
-       || !ref($transcript)
-       || !$transcript->isa('Bio::EnsEMBL::Transcript') )
-  {
-    throw("Argument is not a transcript");
+  my $id = $transcript->dbID();
+  
+  if(defined $id && exists $self->{cdna_coding_start}->{$id}) {
+    return $self->{cdna_coding_start}->{$id};
   }
+  
+  my $cdna_coding_start;
+  my $transcript_coding_start = $transcript->cdna_coding_start();
+  if(defined $transcript_coding_start) {
+    my $cdna_start = $self->cdna_start($transcript);
+    
+    if ( $transcript_coding_start < $cdna_start ) {
+      # Coding region starts upstream of this exon...
 
-  my $transcript_id = $transcript->dbID();
-
-  if ( !exists( $self->{'cdna_coding_start'}->{$transcript_id} ) ) {
-    my $transcript_coding_start = $transcript->cdna_coding_start();
-
-    if ( !defined($transcript_coding_start) ) {
-      # This is a non-coding transcript.
-      $self->{'cdna_coding_start'}->{$transcript_id} = undef;
-      $self->{'cdna_coding_end'}->{$transcript_id}   = undef;
+      if ( $transcript->cdna_coding_end() < $cdna_start ) {
+        # ... and also ends upstream of this exon.
+        $cdna_coding_start = undef;
+      }
+      else {
+        # ... and does not end upstream of this exon.
+        $cdna_coding_start = $cdna_start;
+      }
     } else {
-      my $cdna_start = $self->cdna_start($transcript);
+      # Coding region starts either within or downstream of this
+      # exon.
 
-      if ( $transcript_coding_start < $cdna_start ) {
-        # Coding region starts upstream of this exon...
-
-        if ( $transcript->cdna_coding_end() < $cdna_start ) {
-          # ... and also ends upstream of this exon.
-          $self->{'cdna_coding_start'}->{$transcript_id} = undef;
-        } else {
-          # ... and does not end upstream of this exon.
-          $self->{'cdna_coding_start'}->{$transcript_id} = $cdna_start;
-        }
-      } else {
-        # Coding region starts either within or downstream of this
-        # exon.
-
-        if ( $transcript_coding_start <= $self->cdna_end($transcript) )
-        {
-          # Coding region starts within this exon.
-          $self->{'cdna_coding_start'}->{$transcript_id} =
-            $transcript_coding_start;
-        } else {
-          # Coding region starts downstream of this exon.
-          $self->{'cdna_coding_start'}->{$transcript_id} = undef;
-        }
+      if ( $transcript_coding_start <= $self->cdna_end($transcript) ) {
+        # Coding region starts within this exon.
+        $cdna_coding_start = $transcript_coding_start;
       }
-    } ## end else [ if ( !defined($transcript_coding_start...
-  } ## end if ( !exists( $self->{...
-
-  return $self->{'cdna_coding_start'}->{$transcript_id};
+      else {
+        # Coding region starts downstream of this exon.
+        $cdna_coding_start = undef;
+      }
+    }
+  }
+  else {
+    $cdna_coding_start = undef;
+  }
+  
+  if(defined $id) {
+    $self->{cdna_coding_start}->{$id} = $cdna_coding_start;
+    $self->{cdna_coding_end}->{$id} = undef if ! defined $cdna_coding_start;
+  }
+  
+  return $cdna_coding_start;
 } ## end sub cdna_coding_start
 
 =head2 cdna_coding_end
@@ -569,56 +566,56 @@ sub cdna_coding_start {
 =cut
 
 sub cdna_coding_end {
-  my $self = shift;
-  my ($transcript) = @_;
+  my ($self, $transcript) = @_;
+  assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript');
 
-  if (    !defined($transcript)
-       || !ref($transcript)
-       || !$transcript->isa('Bio::EnsEMBL::Transcript') )
-  {
-    throw("Argument is not a transcript");
+  my $id = $transcript->dbID();
+  
+  if(defined $id && exists $self->{cdna_coding_end}->{$id}) {
+    return $self->{cdna_coding_end}->{$id};
   }
-
-  my $transcript_id = $transcript->dbID();
-
-  if ( !exists( $self->{'cdna_coding_end'}->{$transcript_id} ) ) {
-    my $transcript_coding_end = $transcript->cdna_coding_end();
-
-    if ( !defined($transcript_coding_end) ) {
-      # This is a non-coding transcript.
-      $self->{'cdna_coding_start'}->{$transcript_id} = undef;
-      $self->{'cdna_coding_end'}->{$transcript_id}   = undef;
-    } else {
-      my $cdna_end = $self->cdna_end($transcript);
-
-      if ( $transcript_coding_end > $cdna_end ) {
-        # Coding region ends downstream of this exon...
-
-        if ( $transcript->cdna_coding_start() > $cdna_end ) {
-          # ... and also starts downstream of this exon.
-          $self->{'cdna_coding_end'}->{$transcript_id} = undef;
-        } else {
-          # ... and does not start downstream of this exon.
-          $self->{'cdna_coding_end'}->{$transcript_id} = $cdna_end;
-        }
-      } else {
-        # Coding region ends either within or upstream of this
-        # exon.
-
-        if ( $transcript_coding_end >= $self->cdna_start($transcript) )
-        {
-          # Coding region ends within this exon.
-          $self->{'cdna_coding_end'}->{$transcript_id} =
-            $transcript_coding_end;
-        } else {
-          # Coding region ends upstream of this exon.
-          $self->{'cdna_coding_end'}->{$transcript_id} = undef;
-        }
+  
+  my $cdna_coding_end;
+  my $transcript_coding_end = $transcript->cdna_coding_end();
+  if(defined $transcript_coding_end) {
+    my $cdna_end = $self->cdna_end($transcript);
+    
+    if ( $transcript_coding_end > $cdna_end ) {
+      
+      # Coding region ends downstream of this exon...
+      if ( $transcript->cdna_coding_start() > $cdna_end ) {
+        # ... and also starts downstream of this exon.
+        $cdna_coding_end = undef;
+      } 
+      else {
+        # ... and does not start downstream of this exon.
+        $cdna_coding_end = $cdna_end;
       }
-    } ## end else [ if ( !defined($transcript_coding_end...
-  } ## end if ( !exists( $self->{...
-
-  return $self->{'cdna_coding_end'}->{$transcript_id};
+    }  
+    else {
+      # Coding region ends either within or upstream of this
+      # exon.
+
+      if ( $transcript_coding_end >= $self->cdna_start($transcript) ) {
+        # Coding region ends within this exon.
+        $cdna_coding_end = $transcript_coding_end;
+      } 
+      else {
+        # Coding region ends upstream of this exon.
+        $cdna_coding_end = undef;
+      }
+    }
+  }
+  else {
+    $cdna_coding_end = undef;
+  }
+  
+  if(defined $id) {
+    $self->{cdna_coding_end}->{$id} = $cdna_coding_end;
+    $self->{cdna_coding_start}->{$id} = undef if ! defined $cdna_coding_end;
+  }
+  
+  return $cdna_coding_end;
 } ## end sub cdna_coding_end
 
 =head2 coding_region_start
@@ -644,57 +641,56 @@ sub cdna_coding_end {
 # of cdna_coding_start().
 
 sub coding_region_start {
-  my $self = shift;
-  my ($transcript) = @_;
+  my ($self, $transcript) = @_;
+  assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript');
 
-  if (    !defined($transcript)
-       || !ref($transcript)
-       || !$transcript->isa('Bio::EnsEMBL::Transcript') )
-  {
-    throw("Argument is not a transcript");
+  my $id = $transcript->dbID();
+  
+  if(defined $id && exists $self->{coding_region_start}->{$id}) {
+    return $self->{coding_region_start}->{$id};
   }
-
-  my $transcript_id = $transcript->dbID();
-
-  if ( !exists( $self->{'coding_region_start'}->{$transcript_id} ) ) {
-    my $transcript_coding_start = $transcript->coding_region_start();
-
-    if ( !defined($transcript_coding_start) ) {
-      # This is a non-coding transcript.
-      $self->{'coding_region_start'}->{$transcript_id} = undef;
-      $self->{'coding_region_end'}->{$transcript_id}   = undef;
-    } else {
-      my $start = $self->start();
-
-      if ( $transcript_coding_start < $start ) {
-        # Coding region starts upstream of this exon...
-
-        if ( $transcript->coding_region_end() < $start ) {
-          # ... and also ends upstream of this exon.
-          $self->{'coding_region_start'}->{$transcript_id} = undef;
-          $self->{'coding_region_end'}->{$transcript_id}   = undef;
-        } else {
-          # ... and does not end upstream of this exon.
-          $self->{'coding_region_start'}->{$transcript_id} = $start;
-        }
-      } else {
-        # Coding region starts either within or downstream of this
-        # exon.
-
-        if ( $transcript_coding_start <= $self->end() ) {
-          # Coding region starts within this exon.
-          $self->{'coding_region_start'}->{$transcript_id} =
-            $transcript_coding_start;
-        } else {
-          # Coding region starts downstream of this exon.
-          $self->{'coding_region_start'}->{$transcript_id} = undef;
-          $self->{'coding_region_end'}->{$transcript_id}   = undef;
-        }
+  
+  my $coding_region_start;
+  my $transcript_coding_start = $transcript->cdna_coding_start();
+  if(defined $transcript_coding_start) {
+    my $start = $self->start();
+    
+    if ( $transcript_coding_start < $start ) {
+      # Coding region starts upstream of this exon...
+
+      if ( $transcript->coding_region_end() < $start ) {
+        # ... and also ends upstream of this exon.
+        $coding_region_start = undef;
+      } 
+      else {
+        # ... and does not end upstream of this exon.
+        $coding_region_start = $start;
       }
-    } ## end else [ if ( !defined($transcript_coding_start...
-  } ## end if ( !exists( $self->{...
-
-  return $self->{'coding_region_start'}->{$transcript_id};
+    } 
+    else {
+      # Coding region starts either within or downstream of this
+      # exon.
+
+      if ( $transcript_coding_start <= $self->end() ) {
+        # Coding region starts within this exon.
+        $coding_region_start = $transcript_coding_start;
+      } 
+      else {
+        # Coding region starts downstream of this exon.
+        $coding_region_start = undef;
+      }
+    }
+  }
+  else {
+    $coding_region_start = undef;
+  }
+  
+  if(defined $id) {
+    $self->{coding_region_start}->{$id} = $coding_region_start;
+    $self->{coding_region_end}->{$id} = undef if ! defined $coding_region_start;
+  }
+  
+  return $coding_region_start;
 } ## end sub coding_region_start
 
 =head2 coding_region_end
@@ -720,57 +716,54 @@ sub coding_region_start {
 # of cdna_coding_end().
 
 sub coding_region_end {
-  my $self = shift;
-  my ($transcript) = @_;
+  my ($self, $transcript) = @_;
+  assert_ref($transcript, 'Bio::EnsEMBL::Transcript', 'transcript');
 
-  if (    !defined($transcript)
-       || !ref($transcript)
-       || !$transcript->isa('Bio::EnsEMBL::Transcript') )
-  {
-    throw("Argument is not a transcript");
+  my $id = $transcript->dbID();
+  
+  if(defined $id && exists $self->{coding_region_end}->{$id}) {
+    return $self->{coding_region_end}->{$id};
   }
-
-  my $transcript_id = $transcript->dbID();
-
-  if ( !exists( $self->{'coding_region_end'}->{$transcript_id} ) ) {
-    my $transcript_coding_end = $transcript->coding_region_end();
-
-    if ( !defined($transcript_coding_end) ) {
-      # This is a non-coding transcript.
-      $self->{'coding_region_start'}->{$transcript_id} = undef;
-      $self->{'coding_region_end'}->{$transcript_id}   = undef;
-    } else {
-      my $end = $self->end();
-
-      if ( $transcript_coding_end > $end ) {
-        # Coding region ends downstream of this exon...
-
-        if ( $transcript->coding_region_start() > $end ) {
-          # ... and also starts downstream of this exon.
-          $self->{'coding_region_start'}->{$transcript_id} = undef;
-          $self->{'coding_region_end'}->{$transcript_id}   = undef;
-        } else {
-          # ... and does not start downstream of this exon.
-          $self->{'coding_region_end'}->{$transcript_id} = $end;
-        }
-      } else {
-        # Coding region ends either within or upstream of this
-        # exon.
-
-        if ( $transcript_coding_end >= $self->start() ) {
-          # Coding region ends within this exon.
-          $self->{'coding_region_end'}->{$transcript_id} =
-            $transcript_coding_end;
-        } else {
-          # Coding region ends upstream of this exon.
-          $self->{'coding_region_start'}->{$transcript_id} = undef;
-          $self->{'coding_region_end'}->{$transcript_id}   = undef;
-        }
+  
+  my $coding_region_end;
+  my $transcript_coding_end = $transcript->coding_region_end();
+  if(defined $transcript_coding_end) {
+    
+    my $end = $self->end();
+    if($transcript_coding_end > $end) {
+      # Coding region ends downstream of this exon...
+
+      if ( $transcript->coding_region_start() > $end ) {
+        # ... and also starts downstream of this exon.
+        $coding_region_end = undef;
+      } 
+      else {
+        # ... and does not start downstream of this exon.
+        $coding_region_end = $end;
       }
-    } ## end else [ if ( !defined($transcript_coding_end...
-  } ## end if ( !exists( $self->{...
-
-  return $self->{'coding_region_end'}->{$transcript_id};
+    }
+    else {
+      # Coding region ends either within or upstream of this
+      # exon.
+      if ( $transcript_coding_end >= $self->start() ) {
+        $coding_region_end = $transcript_coding_end;
+      }
+      else {
+        $coding_region_end = undef;
+      }
+    }
+  }
+  else {
+    # This is a non-coding transcript.
+    $coding_region_end = undef;
+  }
+  
+  if(defined $id) {
+    $self->{coding_region_end}->{$id} = $coding_region_end;
+    $self->{coding_region_start}->{$id} = undef if ! defined $coding_region_end;
+  }
+  
+  return $coding_region_end;
 } ## end sub coding_region_end
 
 =head2 slice
@@ -926,8 +919,8 @@ sub transform {
 
   # catch for old style transform calls
   if( !@_  || ( ref $_[0] && 
-	       ($_[0]->isa( "Bio::EnsEMBL::Slice" ) or $_[0]->isa( "Bio::EnsEMBL::LRGSlice" ))
-	      )) {
+         ($_[0]->isa( "Bio::EnsEMBL::Slice" ) or $_[0]->isa( "Bio::EnsEMBL::LRGSlice" ))
+        )) {
     deprecate('Calling transform without a coord system name is deprecated.');
     return $self->_deprecated_transform(@_);
   }
@@ -1025,7 +1018,7 @@ sub add_supporting_features {
     } 
     
     if ((defined $self->slice() && defined $feature->slice())&&
-	    ( $self->slice()->name() ne $feature->slice()->name())){
+      ( $self->slice()->name() ne $feature->slice()->name())){
       throw("Supporting feat not in same coord system as exon\n" .
             "exon is attached to [".$self->slice()->name()."]\n" .
             "feat is attached to [".$feature->slice()->name()."]");
@@ -1034,8 +1027,8 @@ sub add_supporting_features {
     foreach my $added_feature ( @{ $self->{_supporting_evidence} } ){
       # compare objects
       if ( $feature == $added_feature ){
-	# this feature has already been added
-	next FEATURE;
+  # this feature has already been added
+  next FEATURE;
       }
     }
     
@@ -1128,9 +1121,9 @@ sub find_supporting_evidence {
     } 
     else {
       if ($f->entire_seq()->name eq $self->slice()->name) {
-	if ($f->end >= $self->start && $f->start <= $self->end && $f->strand == $self->strand) {
-	  $self->add_supporting_features($f);
-	}
+  if ($f->end >= $self->start && $f->start <= $self->end && $f->strand == $self->strand) {
+    $self->add_supporting_features($f);
+  }
       }
     }
   }
@@ -1344,8 +1337,8 @@ sub peptide {
     else {
       my ($e_id, $tr_id) = ($self->stable_id(), $tr->stable_id());
       throw("Error. Exon maps to multiple locations in peptide and those". 
-	    " locations are not continuous." .
-	    " Is this exon [$e_id] a member of this transcript [$tr_id]?");
+      " locations are not continuous." .
+      " Is this exon [$e_id] a member of this transcript [$tr_id]?");
     }
   } 
   elsif(scalar(@coords) == 1) {
@@ -1401,7 +1394,7 @@ sub _merge_ajoining_coords {
   }
   
   my $new_coord = Bio::EnsEMBL::Mapper::Coordinate->new(
-	$coord->id(), $start, $last_end, $coord->strand(), $coord->rank());
+  $coord->id(), $start, $last_end, $coord->strand(), $coord->rank());
   return $new_coord;
 }
 
@@ -1446,29 +1439,29 @@ sub seq {
     }
 
     if ($self->slice->is_circular() ) {
-	if (    $self->slice->start > $self->slice->end) {
+  if (    $self->slice->start > $self->slice->end) {
 # Normally exons overlapping chromosome origin will have negative feature start, but slice will be from 1 .. length 
 # But in case you got an exon attached to  a sub slice try this
-	    my $mid_point = $self->slice()->seq_region_length() - $self->slice()->start() + 1;
-	    my $seq1 =	$self->slice()->subseq( $self->start(), $mid_point, $self->strand() );
+      my $mid_point = $self->slice()->seq_region_length() - $self->slice()->start() + 1;
+      my $seq1 =  $self->slice()->subseq( $self->start(), $mid_point, $self->strand() );
 
-	    my $seq2 = $self->slice()->subseq( $mid_point + 1, $self->end(), $self->strand() );
+      my $seq2 = $self->slice()->subseq( $mid_point + 1, $self->end(), $self->strand() );
 
-	    $seq = $self->strand() > 0 ? "$seq1$seq2" : "$seq2$seq1";
-	} elsif ( $self->start < 0 ||  $self->start > $self->end) {
+      $seq = $self->strand() > 0 ? "$seq1$seq2" : "$seq2$seq1";
+  } elsif ( $self->start < 0 ||  $self->start > $self->end) {
 # Normally exons overlapping chromosome origin will be 0 based, and can have negative start
 # But if you go via sub_Slice it gives you chromosome based coordinates, i.e it will have start greater then end
-	    my $start_point = $self->slice->seq_region_length + $self->slice->start;
-	    my $mid_point = $self->slice->seq_region_length;
-	    my $seq1 =	$self->slice->subseq( $self->start, $mid_point, $self->strand);
-	    my $seq2 = $self->slice->subseq(1, $self->end, $self->strand );
-	    $seq = $self->strand > 0 ? "$seq1$seq2" : "$seq2$seq1";
-	} else {
+      my $start_point = $self->slice->seq_region_length + $self->slice->start;
+      my $mid_point = $self->slice->seq_region_length;
+      my $seq1 =  $self->slice->subseq( $self->start, $mid_point, $self->strand);
+      my $seq2 = $self->slice->subseq(1, $self->end, $self->strand );
+      $seq = $self->strand > 0 ? "$seq1$seq2" : "$seq2$seq1";
+  } else {
 # End this is the case for genes not overlapping the origin
-	    $seq = $self->slice()->subseq( $self->start(), $self->end(), $self->strand() );
-	}
+      $seq = $self->slice()->subseq( $self->start(), $self->end(), $self->strand() );
+  }
     } else {
-	$seq = $self->slice()->subseq( $self->start(), $self->end(), $self->strand() );
+  $seq = $self->slice()->subseq( $self->start(), $self->end(), $self->strand() );
     }
 
     $self->{'_seq_cache'} = $seq;
@@ -1666,4 +1659,3 @@ sub type {
 
 
 1;
-
-- 
GitLab