From 687e973b6373ab73b6e532e92275df891d25dce5 Mon Sep 17 00:00:00 2001 From: Graham McVicker <mcvicker@sanger.ac.uk> Date: Mon, 26 Jan 2004 17:56:09 +0000 Subject: [PATCH] moved mapping functions out of Transcript and into utility class; added genomic2cds method --- modules/Bio/EnsEMBL/Transcript.pm | 354 +++++----------------- modules/Bio/EnsEMBL/TranscriptMapper.pm | 374 ++++++++++++++++++++++++ 2 files changed, 449 insertions(+), 279 deletions(-) create mode 100644 modules/Bio/EnsEMBL/TranscriptMapper.pm diff --git a/modules/Bio/EnsEMBL/Transcript.pm b/modules/Bio/EnsEMBL/Transcript.pm index bb36bb3b03..b07d4702ee 100755 --- a/modules/Bio/EnsEMBL/Transcript.pm +++ b/modules/Bio/EnsEMBL/Transcript.pm @@ -1,11 +1,10 @@ # -# BioPerl module for Transcript +# Ensembl module for Transcript # -# Cared for by Ewan Birney <birney@sanger.ac.uk> +# Copyright (c) 1999-2004 Ensembl # # You may distribute this module under the same terms as perl itself - -# POD documentation - main docs before the code +# =head1 NAME @@ -47,7 +46,8 @@ use Bio::EnsEMBL::Feature; use Bio::EnsEMBL::Exon; use Bio::EnsEMBL::Translation; use Bio::Tools::CodonTable; -use Bio::EnsEMBL::Mapper; +use Bio::EnsEMBL::TranscriptMapper; + use Bio::EnsEMBL::Utils::Argument qw( rearrange ); use Bio::EnsEMBL::Utils::Exception qw( deprecate warning throw ); @@ -619,48 +619,47 @@ sub coding_region_end { =cut sub add_Exon{ - my ($self,$exon) = @_; + my ($self,$exon) = @_; - #yup - we are going to be picky here... - unless(defined $exon && ref $exon && $exon->isa("Bio::EnsEMBL::Exon") ) { - throw("[$exon] is not a Bio::EnsEMBL::Exon!"); - } + #yup - we are going to be picky here... + unless(defined $exon && ref $exon && $exon->isa("Bio::EnsEMBL::Exon") ) { + throw("[$exon] is not a Bio::EnsEMBL::Exon!"); + } - - $self->{'_trans_exon_array'} ||= []; - - my $ea = $self->{'_trans_exon_array'}; - if( @$ea ) { - if( $exon->strand() == 1 ) { - if( $exon->start() > $ea->[$#$ea]->end() ) { - push(@{$self->{'_trans_exon_array'}},$exon); - } else { - # insert it at correct place - for( my $i=0; $i <= $#$ea; $i++ ) { - if( $exon->end() < $ea->[$i]->start() ) { - splice( @$ea, $i, 0, $exon ); - last; - } - } - } - } else { - if( $exon->end() < $ea->[$#$ea]->start() ) { - push(@{$self->{'_trans_exon_array'}},$exon); - } else { - # insert it at correct place - for( my $i=0; $i <= $#$ea; $i++ ) { - if( $exon->start() > $ea->[$i]->end() ) { - splice( @$ea, $i, 0, $exon ); - last; - } - } - } - } - } else { - push( @$ea, $exon ); - } - # recalculate start, end, slice, strand - $self->recalculate_coordinates(); + $self->{'_trans_exon_array'} ||= []; + + my $ea = $self->{'_trans_exon_array'}; + if( @$ea ) { + if( $exon->strand() == 1 ) { + if( $exon->start() > $ea->[$#$ea]->end() ) { + push(@{$self->{'_trans_exon_array'}},$exon); + } else { + # insert it at correct place + for( my $i=0; $i <= $#$ea; $i++ ) { + if( $exon->end() < $ea->[$i]->start() ) { + splice( @$ea, $i, 0, $exon ); + last; + } + } + } + } else { + if( $exon->end() < $ea->[$#$ea]->start() ) { + push(@{$self->{'_trans_exon_array'}},$exon); + } else { + # insert it at correct place + for( my $i=0; $i <= $#$ea; $i++ ) { + if( $exon->start() > $ea->[$i]->end() ) { + splice( @$ea, $i, 0, $exon ); + last; + } + } + } + } + } else { + push( @$ea, $exon ); + } + # recalculate start, end, slice, strand + $self->recalculate_coordinates(); } @@ -1038,7 +1037,7 @@ sub get_all_cdna_SNPs { sub flush_Exons{ my ($self,@args) = @_; - $self->{'_exon_coord_mapper'} = undef; + $self->{'transcript_mapper'} = undef; $self->{'coding_region_start'} = undef; $self->{'coding_region_end'} = undef; $self->{'cdna_coding_start'} = undef; @@ -1239,271 +1238,70 @@ sub seq { } +=head2 pep2genomic -=head1 pep2genomic - - Arg 1 : integer start - relative to peptide - Arg 2 : integer end - relative to peptide - - Function : Provides a list of Bio::EnsEMBL::SeqFeatures which - is the genomic coordinates of this start/end on the peptide - - Returns : list of Bio::EnsEMBL::SeqFeature + Description: See Bio::EnsEMBL::TranscriptMapper::pep2genomic =cut sub pep2genomic { - my ($self,$start,$end) = @_; - - if( !defined $end ) { - throw("Must call with start/end"); - } - - # move start end into translate cDNA coordinates now. - # much easier! - $start = 3* $start-2 + ($self->cdna_coding_start - 1); - $end = 3* $end + ($self->cdna_coding_start - 1); - - return $self->cdna2genomic( $start, $end ); + my $self = shift; + return $self->get_TranscriptMapper()->pep2genomic(@_); } - =head2 genomic2pep - Arg [1] : $start - The start position in genomic coordinates - Arg [2] : $end - The end position in genomic coordinates - Arg [3] : $strand - The strand of the genomic coordinates - Arg [4] : (optional) $contig - The contig the coordinates are on. This can be a slice - or RawContig, but must be the same object in memory as - the contig(s) of this transcripts exon(s), because of the - use of object identity. If no contig argument is specified the - contig of the first exon is used, which is fine for slice - coordinates but may cause incorrect mappings in raw contig - coords if this transcript spans multiple contigs. - Example : @coords = $transcript->genomic2pep($start, $end, $strand); - Description: Converts genomic coordinates to peptide coordinates. The - return value is a list of coordinates and gaps. - Returntype : list of Bio::EnsEMBL::Mapper::Coordinate and - Bio::EnsEMBL::Mapper::Gap objects - Exceptions : none - Caller : general + Description: See Bio::EnsEMBL::TranscriptMapper::genomic2pep =cut sub genomic2pep { - my ($self, $start, $end, $strand, $contig) = @_; - - unless(defined $start && defined $end && defined $strand) { - throw("start, end and strand arguments are required"); - } - - my @coords = $self->genomic2cdna($start, $end, $strand, $contig); - - my @out; - - my $exons = $self->get_all_Exons; - my $start_phase; - if(@$exons) { - $start_phase = $exons->[0]->phase; - } else { - $start_phase = -1; - } - - foreach my $coord (@coords) { - if($coord->isa('Bio::EnsEMBL::Mapper::Gap')) { - push @out, $coord; - } else { - my $start = $coord->start; - my $end = $coord->end; - my $cdna_cstart = $self->cdna_coding_start; - my $cdna_cend = $self->cdna_coding_end; - - if($coord->strand == -1 || $end < $cdna_cstart || $start > $cdna_cend) { - #is all gap - does not map to peptide - my $gap = new Bio::EnsEMBL::Mapper::Gap; - $gap->start($start); - $gap->end($end); - push @out, $gap; - } else { - #we know area is at least partially overlapping CDS - - my $cds_start = $start - $cdna_cstart + 1; - my $cds_end = $end - $cdna_cstart + 1; - - if($start < $cdna_cstart) { - #start of coordinates are in the 5prime UTR - my $gap = new Bio::EnsEMBL::Mapper::Gap; - my $gap_len = $cdna_cstart - $start; - $gap->start($start); - $gap->end($cdna_cstart - 1); - #start is now relative to start of CDS - $cds_start = 1; - push @out, $gap; - } - - my $end_gap = undef; - if($end > $cdna_cend) { - #end of coordinates are in the 3prime UTR - $end_gap = new Bio::EnsEMBL::Mapper::Gap; - $end_gap->start($cdna_cend + 1); - $end_gap->end($end); - #adjust end to relative to CDS start - $cds_end = $cdna_cend - $cdna_cstart + 1; - } - - #start and end are now entirely in CDS and relative to CDS start - - #take into account possible N padding at beginning of CDS - my $shift = ($start_phase > 0) ? $start_phase : 0; - - #convert to peptide coordinates - my $pep_start = int(($cds_start + $shift + 2) / 3); - my $pep_end = int(($cds_end + $shift + 2) / 3); - $coord->start($pep_start); - $coord->end($pep_end); - - push @out, $coord; - - if($end_gap) { - #push out the region which was in the 3prime utr - push @out, $end_gap; - } - } - } - } - - return @out; + my $self = shift; + return $self->get_TranscriptMapper()->genomic2pep(@_); } - =head2 cdna2genomic - Arg [1] : $start - The start position in cdna coordinates - Arg [2] : $end - The end position in cdna coordinates - Example : @coords = $transcript->cdna2genomic($start, $end); - Description: Converts cdna coordinates to genomic coordinates. The - return value is a list of coordinates and gaps. - Returntype : list of Bio::EnsEMBL::Mapper::Coordinate and - Bio::EnsEMBL::Mapper::Gap objects - Exceptions : none - Caller : general + Description: See Bio::EnsEMBL::TranscriptMapper::cdna2genomic =cut sub cdna2genomic { - my ($self,$start,$end) = @_; - - if( !defined $end ) { - throw("Must call with start/end"); - } - - my $mapper = $self->_get_cdna_coord_mapper(); - - return $mapper->map_coordinates( $self, $start, $end, 1, "cdna" ); + my $self = shift; + return $self->get_TranscriptMapper()->cdna2genomic(@_); } - - =head2 genomic2cdna - Arg [1] : $start - The start position in genomic coordinates - Arg [2] : $end - The end position in genomic coordinates - Arg [3] : (optional) $strand - The strand of the genomic coordinates (default value 1) - Arg [4] : (optional) $contig - The contig the coordinates are on. This can be a slice - or RawContig, but must be the same object in memory as - the contig(s) of this transcripts exon(s), because of the - use of object identity. If no contig argument is specified the - contig of the first exon is used, which is fine for slice - coordinates but may cause incorrect mappings in raw contig - coords if this transcript spans multiple contigs. - Example : @coords = $transcript->genomic2cdna($start, $end, $strnd, $ctg); - Description: Converts genomic coordinates to cdna coordinates. The - return value is a list of coordinates and gaps. Gaps - represent intronic or upstream/downstream regions which do - not comprise this transcripts cdna. Coordinate objects - represent genomic regions which map to exons (utrs included). - Returntype : list of Bio::EnsEMBL::Mapper::Coordinate and - Bio::EnsEMBL::Mapper::Gap objects - Exceptions : none - Caller : general + Description: See Bio::EnsEMBL::TranscriptMapper::genomic2cdna =cut sub genomic2cdna { - my ($self, $start, $end, $strand, $slice) = @_; - - unless(defined $start && defined $end && defined $strand) { - throw("start, end and strand arguments are required\n"); - } - - #"ids" in mapper are contigs of exons, so use the same contig that should - #be attached to all of the exons... - if( $slice ) { - throw( "Arbitrary coordinates not supported yet" ); - if( ! $self->adaptor() ) { - throw( "Cant do genomic2cdna without database connection" ); - } - - my $asm_mapper_adaptor = $self->adaptor()->db()->get_AssemblyMapperAdaptor(); - # map from given slice coord system into $self->slice() ... - } - - my $mapper = $self->_get_cdna_coord_mapper; - - - #print "MAPPING $start - $end ($strand)\n"; - #print $contig->name . "=" . $self->get_all_Exons->[0]->contig->name . "\n"; - $slice = $self->slice(); - return $mapper->map_coordinates($slice, $start, $end, $strand, "genomic"); + my $self = shift; + return $self->get_TranscriptMapper->genomic2cdna(@_); } - -=head2 _get_cdna_coord_mapper +=head2 get_TranscriptMapper Args : none - Example : none - Description: creates and caches a mapper from "cdna" coordinate system to - "genomic" coordinate system. Uses Exons to help with that. Only - calculates in the translateable part. - Returntype : Bio::EnsEMBL::Mapper( "cdna", "genomic" ); + Example : my $trans_mapper = $transcript->get_TranscriptMapper(); + Description: Gets a TranscriptMapper object which can be used to perform + a variety of coordinate conversions relating this transcript, + genomic sequence and peptide resulting from this transcripts + translation. + Returntype : Bio::EnsEMBL::TranscriptMapper Exceptions : none - Caller : cdna2genomic, pep2genomic + Caller : cdna2genomic, pep2genomic, genomic2cdna, cdna2genomic =cut -sub _get_cdna_coord_mapper { +sub get_TranscriptMapper { my ( $self ) = @_; - - if( defined $self->{'_exon_coord_mapper'} ) { - return $self->{'_exon_coord_mapper'}; - } - - # - # the mapper is loaded with OBJECTS in place of the IDs !!!! - # the objects are the contigs in the exons - # - my $mapper; - $mapper = Bio::EnsEMBL::Mapper->new( "cdna", "genomic" ); - my @exons = @{$self->get_all_Exons() }; - my $start = 1; - for my $exon ( @exons ) { - $exon->load_genomic_mapper( $mapper, $self, $start ); - $start += $exon->length; - } - $self->{'_exon_coord_mapper'} = $mapper; - return $mapper; + $self->{'transcript_mapper'} ||= Bio::EnsEMBL::TranscriptMapper->new($self); + return $self->{'transcript_mapper'}; } @@ -1540,7 +1338,6 @@ sub end_Exon{ - =head2 description Title : description @@ -1559,7 +1356,6 @@ sub description{ $obj->{'description'} = $value; } return $obj->{'description'}; - } @@ -1699,7 +1495,7 @@ sub transform { #flush internal values that depend on the exon coords that may have been #cached - $new_transcript->{'_exon_coord_mapper'} = undef; + $new_transcript->{'transcript_mapper'} = undef; $new_transcript->{'coding_region_start'} = undef; $new_transcript->{'coding_region_end'} = undef; $new_transcript->{'cdna_coding_start'} = undef; @@ -1755,7 +1551,7 @@ sub transfer { #flush internal values that depend on the exon coords that may have been #cached - $new_transcript->{'_exon_coord_mapper'} = undef; + $new_transcript->{'transcript_mapper'} = undef; $new_transcript->{'coding_region_start'} = undef; $new_transcript->{'coding_region_end'} = undef; $new_transcript->{'cdna_coding_start'} = undef; @@ -1799,15 +1595,15 @@ sub recalculate_coordinates { if( $e->start() < $start ) { $start = $e->start(); } - + if( $e->end() > $end ) { $end = $e->end(); } - + if( $slice && $e->slice() && $e->slice()->name() ne $slice->name() ) { throw( "Exons with different slices not allowed on one Transcript" ); } - + if( $e->strand() != $strand ) { $transsplicing = 1; } @@ -1823,7 +1619,7 @@ sub recalculate_coordinates { #flush internal values that depend on the exon coords that may have been #cached - $self->{'_exon_coord_mapper'} = undef; + $self->{'transcript_mapper'} = undef; $self->{'coding_region_start'} = undef; $self->{'coding_region_end'} = undef; $self->{'cdna_coding_start'} = undef; diff --git a/modules/Bio/EnsEMBL/TranscriptMapper.pm b/modules/Bio/EnsEMBL/TranscriptMapper.pm new file mode 100644 index 0000000000..c28e272065 --- /dev/null +++ b/modules/Bio/EnsEMBL/TranscriptMapper.pm @@ -0,0 +1,374 @@ +# +# Ensembl module for TranscriptMapper +# +# Copyright (c) 2004 Ensembl +# +# You may distribute this module under the same terms as perl itself +# + +=head1 NAME + +TranscriptMapper - A utility class used to perform coordinate conversions +between a number of coordinate systems relating to transcripts + +=head1 SYNOPSIS + + my $trmapper = Bio::EnsEMBL::TranscriptMapper->new($transcript); + + @coords = $trmapper->cdna2genomic(123, 554); + + @coords = $trmapper->genomic2cdna(141, 500, -1); + + @coords = $trmapper->genomic2cds(141, 500, -1); + + @coords = $trmapper->pep2genomic(10, 60); + + @coords = $trmapper->genomic2pep(123, 400, 1); + + +=head1 DESCRIPTION + +This is a utility class which can be used to perform coordinate conversions +between a number of coordinate systems relating to transcripts. + +=head1 CONTACT + +Email questions to the ensembl developer mailing list <ensembl-dev@ebi.ac.uk> + +=head1 METHODS + +=cut + +use strict; +use warnings; + +use Bio::EnsEMBL::Utils::Exception qw(throw); + +use Bio::EnsEMBL::Mapper; +use Bio::EnsEMBL::Mapper::Gap; +use Bio::EnsEMBL::Mapper::Coordinate; + +package Bio::EnsEMBL::TranscriptMapper; + + + +=head2 new + + Arg [1] : Bio::EnsEMBL::Transcript $transcript + The transcript for which a TranscriptMapper should be created. + Example : $trans_mapper = Bio::EnsEMBL::TranscriptMapper->new($transcript) + Description: Creates a TranscriptMapper object which can be used to perform + various coordinate transformations relating to transcripts. + Note that the TranscriptMapper uses the transcript state at the + time of creation to perform the conversions, and that a new + TranscriptMapper must be created if the Transcript is altered. + 'Genomic' coordinates are coordinates which are relative to the + slice that the Transcript is on. + Returntype : Bio::EnsEMBL::TranscriptMapper + Exceptions : none + Caller : Transcript::get_TranscriptMapper + +=cut + +sub new { + my $caller = shift; + my $transcript = shift; + + my $class = ref($caller) || $caller; + + if(!ref($transcript) || !$transcript->isa('Bio::EnsEMBL::Transcript')) { + throw("Transcript argument is required."); + } + + # + # Create a cdna <-> genomic mapper and load it with exon coords + # + + my $mapper = Bio::EnsEMBL::Mapper->new( 'cdna', 'genomic'); + + my $slice = $transcript->slice(); + + my $seqname = ($slice) ? $slice->name() : $transcript->seqname(); + $seqname ||= 'genome'; + + my $cdna_start = 1; + foreach my $exon (@{$transcript->get_all_Exons()}) { + my $cdna_end = $cdna_start + $exon->length() - 1; + $mapper->add_map_coordinates('cdna', $cdna_start, $cdna_end, + $exon->strand(), + $seqname, $exon->start, $exon->end()); + $cdna_start = $cdna_end + 1; + } + + my $exons = $transcript->get_all_Exons(); + my $start_phase; + if(@$exons) { + $start_phase = $exons->[0]->phase; + } else { + $start_phase = -1; + } + + return bless({'exon_coord_mapper' => $mapper, + 'seqname' => $seqname, + 'start_phase' => $start_phase, + 'cdna_coding_start' => $transcript->cdna_coding_start(), + 'cdna_coding_end' => $transcript->cdna_coding_end()},$class); +} + + + +sub exon_coord_mapper { + my $self = shift; + return $self->{'exon_coord_mapper'}; +} + + + +=head2 cdna2genomic + + Arg [1] : $start + The start position in cdna coordinates + Arg [2] : $end + The end position in cdna coordinates + Example : @cdna_coords = $transcript_mapper->cdna2genomic($start, $end); + Description: Converts cdna coordinates to genomic coordinates. The + return value is a list of coordinates and gaps. + Returntype : list of Bio::EnsEMBL::Mapper::Coordinate and + Bio::EnsEMBL::Mapper::Gap objects + Exceptions : none + Caller : general + +=cut + + +sub cdna2genomic { + my ($self,$start,$end) = @_; + + if( !defined $end ) { + throw("Must call with start/end"); + } + + my $mapper = $self->exon_coord_mapper(); + + return $mapper->map_coordinates( 'cdna', $start, $end, 1, "cdna" ); +} + + +=head2 genomic2cdna + + Arg [1] : $start + The start position in genomic coordinates + Arg [2] : $end + The end position in genomic coordinates + Arg [3] : (optional) $strand + The strand of the genomic coordinates (default value 1) + Example : @coords = $trans_mapper->genomic2cdna($start, $end, $strnd); + Description: Converts genomic coordinates to cdna coordinates. The + return value is a list of coordinates and gaps. Gaps + represent intronic or upstream/downstream regions which do + not comprise this transcripts cdna. Coordinate objects + represent genomic regions which map to exons (utrs included). + Returntype : list of Bio::EnsEMBL::Mapper::Coordinate and + Bio::EnsEMBL::Mapper::Gap objects + Exceptions : none + Caller : general + +=cut + +sub genomic2cdna { + my ($self, $start, $end, $strand) = @_; + + unless(defined $start && defined $end && defined $strand) { + throw("start, end and strand arguments are required\n"); + } + + my $mapper = $self->exon_coord_mapper(); + + return $mapper->map_coordinates($self->{'seqname'}, $start, $end, $strand, + "genomic"); + +} + + + + +=head2 pep2genomic + + Arg [1] : int $start + start position in peptide coords + Arg [2] : int $end + end position in peptide coords + Example : @genomic_coords = $transcript_mapper->pep2genomic(23, 102); + Description: Converts peptide coordinates into genomic coordinates. The + coordinates returned are relative to the same slice that the + transcript used to construct this TranscriptMapper was on. + Returntype : list of Bio::EnsEMBL::Mapper::Gap and + Bio::EnsEMBL::Mapper::Coordinate objects + Exceptions : none + Caller : general + +=cut + +sub pep2genomic { + my ($self,$start,$end) = @_; + + if( !defined $end ) { + throw("Must call with start/end"); + } + + # move start end into translate cDNA coordinates now. + # much easier! + $start = 3* $start-2 + ($self->{'cdna_coding_start'} - 1); + $end = 3* $end + ($self->{'cdna_coding_start'} - 1); + + return $self->cdna2genomic( $start, $end ); +} + + + +=head2 genomic2cds + + Arg [1] : int $start + The genomic start position + Arg [2] : int $end + The genomic end position + Arg [3] : int $strand + The genomic strand + Example : @cds_coords = $trans_mapper->genomic2cds($start, $end, $strand); + Description: Converts genomic coordinates into CDS coordinates of the + transcript that was used to create this transcript mapper. + Returntype : list of Bio::EnsEMBL::Mapper::Coordinate and + Bio::EnsEMBL::Mapper::Gap objects + Exceptions : throw if incorrect arguments + Caller : general + +=cut + +sub genomic2cds { + my ($self, $start, $end, $strand) = @_; + + if(!defined($start) || !defined($end) || !defined($strand)) { + throw("start, end and strand arguments are required"); + } + + my @coords = $self->genomic2cdna($start, $end, $strand); + + my @out; + + foreach my $coord (@coords) { + if($coord->isa('Bio::EnsEMBL::Mapper::Gap')) { + push @out, $coord; + } else { + my $start = $coord->start; + my $end = $coord->end; + my $cdna_cstart = $self->{'cdna_coding_start'}; + my $cdna_cend = $self->{'cdna_coding_end'}; + + if($coord->strand == -1 || $end < $cdna_cstart || $start > $cdna_cend) { + #is all gap - does not map to peptide + my $gap = new Bio::EnsEMBL::Mapper::Gap; + $gap->start($start); + $gap->end($end); + push @out, $gap; + } else { + #we know area is at least partially overlapping CDS + + my $cds_start = $start - $cdna_cstart + 1; + my $cds_end = $end - $cdna_cstart + 1; + + if($start < $cdna_cstart) { + #start of coordinates are in the 5prime UTR + my $gap = new Bio::EnsEMBL::Mapper::Gap; + my $gap_len = $cdna_cstart - $start; + $gap->start($start); + $gap->end($cdna_cstart - 1); + #start is now relative to start of CDS + $cds_start = 1; + push @out, $gap; + } + + my $end_gap = undef; + if($end > $cdna_cend) { + #end of coordinates are in the 3prime UTR + $end_gap = new Bio::EnsEMBL::Mapper::Gap; + $end_gap->start($cdna_cend + 1); + $end_gap->end($end); + #adjust end to relative to CDS start + $cds_end = $cdna_cend - $cdna_cstart + 1; + } + + #start and end are now entirely in CDS and relative to CDS start + $coord->start($cds_start); + $coord->end($cds_end); + + push @out, $coord; + + if($end_gap) { + #push out the region which was in the 3prime utr + push @out, $end_gap; + } + } + } + } + + return @out; + +} + + +=head2 genomic2pep + + Arg [1] : $start + The start position in genomic coordinates + Arg [2] : $end + The end position in genomic coordinates + Arg [3] : $strand + The strand of the genomic coordinates + Example : @pep_coords = $transcript->genomic2pep($start, $end, $strand); + Description: Converts genomic coordinates to peptide coordinates. The + return value is a list of coordinates and gaps. + Returntype : list of Bio::EnsEMBL::Mapper::Coordinate and + Bio::EnsEMBL::Mapper::Gap objects + Exceptions : none + Caller : general + +=cut + +sub genomic2pep { + my ($self, $start, $end, $strand) = @_; + + unless(defined $start && defined $end && defined $strand) { + throw("start, end and strand arguments are required"); + } + + my @coords = $self->genomic2cds($start, $end, $strand); + + my @out; + + my $start_phase = $self->{'start_phase'}; + + #take into account possible N padding at beginning of CDS + my $shift = ($start_phase > 0) ? $start_phase : 0; + + foreach my $coord (@coords) { + if($coord->isa('Bio::EnsEMBL::Mapper::Gap')) { + push @out, $coord; + } else { + + #start and end are now entirely in CDS and relative to CDS start + + #convert to peptide coordinates + my $pep_start = int(($coord->start + $shift + 2) / 3); + my $pep_end = int(($coord->end + $shift + 2) / 3); + $coord->start($pep_start); + $coord->end($pep_end); + + push @out, $coord; + } + } + + return @out; +} + + +1; -- GitLab