From ab5e317d8de88d61ef603e24a83c0da631edd8d2 Mon Sep 17 00:00:00 2001
From: Ewan Birney <birney@sanger.ac.uk>
Date: Thu, 21 Feb 2002 15:15:09 +0000
Subject: [PATCH] added peptide ---> contig coordinate mapping on Transcript.
 Not tested. Mainly to make sure I shift the code off my laptop

---
 modules/Bio/EnsEMBL/Exon.pm       | 42 +++++++++++++
 modules/Bio/EnsEMBL/StickyExon.pm | 97 +++++++++++++++++++++++++++++++
 modules/Bio/EnsEMBL/Transcript.pm | 79 +++++++++++++++++++++++++
 3 files changed, 218 insertions(+)

diff --git a/modules/Bio/EnsEMBL/Exon.pm b/modules/Bio/EnsEMBL/Exon.pm
index 86bc4af599..5e5c910725 100755
--- a/modules/Bio/EnsEMBL/Exon.pm
+++ b/modules/Bio/EnsEMBL/Exon.pm
@@ -1224,6 +1224,48 @@ sub get_cdna {
   return $temp_seq;
 }
 
+=head1
+
+  Arg  1   : integer start - relative to the exon
+  Arg  2   : integer end   - relative to the exon
+
+  Function : Provides a list of Bio::EnsEMBL::SeqFeatures which
+             is the genomic coordinates of this start/end on the exon
+             For simple exons this is one feature - for Stickies this
+             is overridden
+
+  Returns  : list of Bio::EnsEMBL::SeqFeature
+
+
+=cut
+
+sub contig_seqfeatures_from_relative_position {
+  my ($self,$start,$end) = @_;
+
+  if( !defined $end ) {
+    $self->throw("Have not defined all the methods!");
+  }
+
+  # easy
+  if( $start < 1 ) {
+    $self->warn("Attempting to fetch start less than 1 ($start)");
+    $start = 1;
+  }
+
+  if( $end > $self->length ) {
+    $self->warn("Attempting to fetch end greater than end of exon ($end)");
+    $end = $self->length;
+  }
+
+  my $sf = Bio::EnsEMBL::SeqFeature->new();
+  $sf->start($self->start + $start -1);
+  $sf->end($self->start + $end -1 );
+  $sf->strand($self->strand);
+  $sf->seqname($self->contig->id);
+
+  return ($sf);
+}
+
 
 # Inherited methods
 # but you do have all the SeqFeature documentation: reproduced here
diff --git a/modules/Bio/EnsEMBL/StickyExon.pm b/modules/Bio/EnsEMBL/StickyExon.pm
index 6f29ee15f4..2904b5cdd4 100755
--- a/modules/Bio/EnsEMBL/StickyExon.pm
+++ b/modules/Bio/EnsEMBL/StickyExon.pm
@@ -123,6 +123,103 @@ sub each_component_Exon{
 }
 
 
+
+=head1
+
+  Arg  1   : integer start - relative to the exon
+  Arg  2   : integer end   - relative to the exon
+
+  Function : Provides a list of Bio::EnsEMBL::SeqFeatures which
+             is the genomic coordinates of this start/end on the exon
+             For simple exons this is one feature  for Stickies this
+             is overridden and gives out a list of Bio::EnsEMBL::SeqFeatures
+
+  Returns  : list of Bio::EnsEMBL::SeqFeature
+
+
+=cut
+
+sub contig_seqfeatures_from_relative_position {
+  my ($self,$start,$end) = @_;
+
+  if( !defined $end ) {
+    $self->throw("Have not defined all the methods!");
+  }
+
+  # easy
+  if( $start < 1 ) {
+    $self->warn("Attempting to fetch start less than 1 ($start)");
+    $start = 1;
+  }
+
+  if( $end > $self->length ) {
+    $self->warn("Attempting to fetch end greater than end of exon ($end)");
+    $end = $self->length;
+  }
+
+  my @out;
+  my $sf;
+  my @exons = $self->each_component_Exon();
+  my $len = 0;
+  while( scalar(@exons) > 0 ) {
+    if( $exons[0]->length + $len > $start ) {
+       last;
+    } else {
+       my $discard = shift @exons;
+       $len += $discard;
+    }
+  }
+
+  # handle the first component exon
+
+  if( scalar(@exons) == 0 ) {
+     return @out;
+  }
+  
+  $sf = Bio::EnsEMBL::SeqFeature->new();
+  $sf->seqname($exons[0]->contig->id);
+  $sf->strand($exons[0]->strand);
+  $sf->start($exons[0]->start + $start - $len);
+
+  if( $end < $len + $exons[0]->length ) {
+      $sf->end($exons[0]->start + $end - $len);
+      return $sf;
+  } else {
+      $sf->end($exons[0]->end);
+      push(@out,$sf);
+  }
+
+
+  while( scalar(@exons) ) {
+     if( $exons[0]->length + $len > $end ) {
+        last;
+     }
+     $sf = Bio::EnsEMBL::SeqFeature->new();
+     $sf->seqname($exons[0]->contig->id);
+     $sf->strand($exons[0]->strand);
+     $sf->start($exons[0]->start);
+     $sf->start($exons[0]->end);
+     push(@out,$sf);
+     $len += $exons[0]->length;
+  }
+
+  if( scalar(@exons) == 0 ) {
+     return @out;
+  }
+
+  # handle the last exon
+
+  $sf = Bio::EnsEMBL::SeqFeature->new();
+  $sf->seqname($exons[0]->contig->id);
+  $sf->strand($exons[0]->strand);
+  $sf->start($exons[0]->start);
+  $sf->start($exons[0]->start + $end - $len);
+
+
+
+  return @out;
+}
+
 =head2 add_component_Exon
 
  Title   : add_component_Exon
diff --git a/modules/Bio/EnsEMBL/Transcript.pm b/modules/Bio/EnsEMBL/Transcript.pm
index 8a824baebb..967b23c01d 100755
--- a/modules/Bio/EnsEMBL/Transcript.pm
+++ b/modules/Bio/EnsEMBL/Transcript.pm
@@ -1192,6 +1192,85 @@ sub pep_coords {
 
 
 
+=head1
+
+  Arg  1   : integer start - relative to peptide
+  Arg  2   : integer end   - relative to peptide
+
+  Function : Provides a list of Bio::EnsEMBL::SeqFeatures which
+             is the genomic coordinates of this start/end on the peptide
+
+
+  Returns  : list of Bio::EnsEMBL::SeqFeature
+
+=cut
+
+sub convert_peptide_coordinate_to_contig {
+  my ($self,$start,$end) = @_;
+
+  if( !defined $end ) {
+    $self->throw("Must call with start/end");
+  }
+
+  # move start end into translate cDNA coordinates now.
+  # much easier!
+  $start = 3* $start;
+  $end   = 3* $end;
+
+  if( !defined $self->translation ) {
+    $self->throw("No translation, so no peptide coordinate!");
+  }
+
+  # get out exons, walk along until we hit first exon
+  # calculate remaining distance.
+
+  my $start_exon;
+  my @exons = $self->get_all_Exons;
+  while( my $exon = shift  @exons ) {
+    if( $exon = $self->translation->start_exon ) { # in memory reliance
+      my $start_exon = $exon;
+      last;
+    }
+  }
+
+  my $current_len_exon = $start_exon->length - $self->translation->start -1;
+  my $trans_len = 0;
+  my @out;
+
+  my $offset_into_exon = $self->translation->start;
+
+  my $exon;
+  while( $exon = shift @exons && $start < $trans_len + $current_len_exon ) {
+      ;
+  }
+
+  unshift(@exons,$exon);
+
+  # main loop!
+  while( my $exon = shift @exons && $end  < $trans_len + $current_len_exon ) {
+    # definitely want something in this exon. Find start position
+    my $start_in_exon;
+    my $end_in_exon;
+
+    if( $start > $trans_len ) {
+      $start_in_exon = $offset_into_exon + $start - $trans_len +1;
+    } else {
+      $start_in_exon = $offset_into_exon;
+    }
+    
+    if( $end < $trans_len + $current_len_exon ) {
+      $end_in_exon = $offset_into_exon -1 + ($end - $trans_len+1);
+    } else {
+      $end_in_exon = $exon->length;
+    }
+    
+
+    # the main reason for make this an attribute of the exon is to handle stickies
+    push(@out,$exon->contig_seqfeatures_from_relative_position($start_in_exon,$end_in_exon));
+  }
+
+  return @out;
+}
 
 sub find_coord {
   my ($self,$coord,$type) = @_;
-- 
GitLab