From ab5e317d8de88d61ef603e24a83c0da631edd8d2 Mon Sep 17 00:00:00 2001 From: Ewan Birney <birney@sanger.ac.uk> Date: Thu, 21 Feb 2002 15:15:09 +0000 Subject: [PATCH] added peptide ---> contig coordinate mapping on Transcript. Not tested. Mainly to make sure I shift the code off my laptop --- modules/Bio/EnsEMBL/Exon.pm | 42 +++++++++++++ modules/Bio/EnsEMBL/StickyExon.pm | 97 +++++++++++++++++++++++++++++++ modules/Bio/EnsEMBL/Transcript.pm | 79 +++++++++++++++++++++++++ 3 files changed, 218 insertions(+) diff --git a/modules/Bio/EnsEMBL/Exon.pm b/modules/Bio/EnsEMBL/Exon.pm index 86bc4af599..5e5c910725 100755 --- a/modules/Bio/EnsEMBL/Exon.pm +++ b/modules/Bio/EnsEMBL/Exon.pm @@ -1224,6 +1224,48 @@ sub get_cdna { return $temp_seq; } +=head1 + + Arg 1 : integer start - relative to the exon + Arg 2 : integer end - relative to the exon + + Function : Provides a list of Bio::EnsEMBL::SeqFeatures which + is the genomic coordinates of this start/end on the exon + For simple exons this is one feature - for Stickies this + is overridden + + Returns : list of Bio::EnsEMBL::SeqFeature + + +=cut + +sub contig_seqfeatures_from_relative_position { + my ($self,$start,$end) = @_; + + if( !defined $end ) { + $self->throw("Have not defined all the methods!"); + } + + # easy + if( $start < 1 ) { + $self->warn("Attempting to fetch start less than 1 ($start)"); + $start = 1; + } + + if( $end > $self->length ) { + $self->warn("Attempting to fetch end greater than end of exon ($end)"); + $end = $self->length; + } + + my $sf = Bio::EnsEMBL::SeqFeature->new(); + $sf->start($self->start + $start -1); + $sf->end($self->start + $end -1 ); + $sf->strand($self->strand); + $sf->seqname($self->contig->id); + + return ($sf); +} + # Inherited methods # but you do have all the SeqFeature documentation: reproduced here diff --git a/modules/Bio/EnsEMBL/StickyExon.pm b/modules/Bio/EnsEMBL/StickyExon.pm index 6f29ee15f4..2904b5cdd4 100755 --- a/modules/Bio/EnsEMBL/StickyExon.pm +++ b/modules/Bio/EnsEMBL/StickyExon.pm @@ -123,6 +123,103 @@ sub each_component_Exon{ } + +=head1 + + Arg 1 : integer start - relative to the exon + Arg 2 : integer end - relative to the exon + + Function : Provides a list of Bio::EnsEMBL::SeqFeatures which + is the genomic coordinates of this start/end on the exon + For simple exons this is one feature for Stickies this + is overridden and gives out a list of Bio::EnsEMBL::SeqFeatures + + Returns : list of Bio::EnsEMBL::SeqFeature + + +=cut + +sub contig_seqfeatures_from_relative_position { + my ($self,$start,$end) = @_; + + if( !defined $end ) { + $self->throw("Have not defined all the methods!"); + } + + # easy + if( $start < 1 ) { + $self->warn("Attempting to fetch start less than 1 ($start)"); + $start = 1; + } + + if( $end > $self->length ) { + $self->warn("Attempting to fetch end greater than end of exon ($end)"); + $end = $self->length; + } + + my @out; + my $sf; + my @exons = $self->each_component_Exon(); + my $len = 0; + while( scalar(@exons) > 0 ) { + if( $exons[0]->length + $len > $start ) { + last; + } else { + my $discard = shift @exons; + $len += $discard; + } + } + + # handle the first component exon + + if( scalar(@exons) == 0 ) { + return @out; + } + + $sf = Bio::EnsEMBL::SeqFeature->new(); + $sf->seqname($exons[0]->contig->id); + $sf->strand($exons[0]->strand); + $sf->start($exons[0]->start + $start - $len); + + if( $end < $len + $exons[0]->length ) { + $sf->end($exons[0]->start + $end - $len); + return $sf; + } else { + $sf->end($exons[0]->end); + push(@out,$sf); + } + + + while( scalar(@exons) ) { + if( $exons[0]->length + $len > $end ) { + last; + } + $sf = Bio::EnsEMBL::SeqFeature->new(); + $sf->seqname($exons[0]->contig->id); + $sf->strand($exons[0]->strand); + $sf->start($exons[0]->start); + $sf->start($exons[0]->end); + push(@out,$sf); + $len += $exons[0]->length; + } + + if( scalar(@exons) == 0 ) { + return @out; + } + + # handle the last exon + + $sf = Bio::EnsEMBL::SeqFeature->new(); + $sf->seqname($exons[0]->contig->id); + $sf->strand($exons[0]->strand); + $sf->start($exons[0]->start); + $sf->start($exons[0]->start + $end - $len); + + + + return @out; +} + =head2 add_component_Exon Title : add_component_Exon diff --git a/modules/Bio/EnsEMBL/Transcript.pm b/modules/Bio/EnsEMBL/Transcript.pm index 8a824baebb..967b23c01d 100755 --- a/modules/Bio/EnsEMBL/Transcript.pm +++ b/modules/Bio/EnsEMBL/Transcript.pm @@ -1192,6 +1192,85 @@ sub pep_coords { +=head1 + + Arg 1 : integer start - relative to peptide + Arg 2 : integer end - relative to peptide + + Function : Provides a list of Bio::EnsEMBL::SeqFeatures which + is the genomic coordinates of this start/end on the peptide + + + Returns : list of Bio::EnsEMBL::SeqFeature + +=cut + +sub convert_peptide_coordinate_to_contig { + my ($self,$start,$end) = @_; + + if( !defined $end ) { + $self->throw("Must call with start/end"); + } + + # move start end into translate cDNA coordinates now. + # much easier! + $start = 3* $start; + $end = 3* $end; + + if( !defined $self->translation ) { + $self->throw("No translation, so no peptide coordinate!"); + } + + # get out exons, walk along until we hit first exon + # calculate remaining distance. + + my $start_exon; + my @exons = $self->get_all_Exons; + while( my $exon = shift @exons ) { + if( $exon = $self->translation->start_exon ) { # in memory reliance + my $start_exon = $exon; + last; + } + } + + my $current_len_exon = $start_exon->length - $self->translation->start -1; + my $trans_len = 0; + my @out; + + my $offset_into_exon = $self->translation->start; + + my $exon; + while( $exon = shift @exons && $start < $trans_len + $current_len_exon ) { + ; + } + + unshift(@exons,$exon); + + # main loop! + while( my $exon = shift @exons && $end < $trans_len + $current_len_exon ) { + # definitely want something in this exon. Find start position + my $start_in_exon; + my $end_in_exon; + + if( $start > $trans_len ) { + $start_in_exon = $offset_into_exon + $start - $trans_len +1; + } else { + $start_in_exon = $offset_into_exon; + } + + if( $end < $trans_len + $current_len_exon ) { + $end_in_exon = $offset_into_exon -1 + ($end - $trans_len+1); + } else { + $end_in_exon = $exon->length; + } + + + # the main reason for make this an attribute of the exon is to handle stickies + push(@out,$exon->contig_seqfeatures_from_relative_position($start_in_exon,$end_in_exon)); + } + + return @out; +} sub find_coord { my ($self,$coord,$type) = @_; -- GitLab