From becd2dfc16629e22b6c232eaa6f931b03e3be63e Mon Sep 17 00:00:00 2001 From: Yuan Chen <yuan@sanger.ac.uk> Date: Wed, 13 Jul 2005 14:42:39 +0000 Subject: [PATCH] added the way to calculate splice site change caused by allele change --- .../Bio/EnsEMBL/Utils/TranscriptAlleles.pm | 96 +++++++++++-------- 1 file changed, 58 insertions(+), 38 deletions(-) diff --git a/modules/Bio/EnsEMBL/Utils/TranscriptAlleles.pm b/modules/Bio/EnsEMBL/Utils/TranscriptAlleles.pm index 01124daf45..2d2b925e68 100644 --- a/modules/Bio/EnsEMBL/Utils/TranscriptAlleles.pm +++ b/modules/Bio/EnsEMBL/Utils/TranscriptAlleles.pm @@ -44,7 +44,7 @@ use vars qw(@ISA @EXPORT_OK); @ISA = qw(Exporter); -@EXPORT_OK = qw(&get_all_ConsequenceType); +@EXPORT_OK = qw(get_all_ConsequenceType type_variation); use Data::Dumper; @@ -176,26 +176,6 @@ sub type_variation { throw("Not possible to calculate the consequence type for ",ref($var)," : Bio::EnsEMBL::Variation::ConsequenceType object expected"); } - # ARNE: need the following: - # $var and $tr need to have the same slice attached - # if not, its difficult to compare the variations and the - # transcript. If one or both are on a strain slice the - # coordinate transformations DONT WORK CORRECTLY - - # if( $tr->slice() != $var->slice() ) { - -# # problem: Are the coordinates comparable? -# # throw here, or warn and return, otherwise its too difficult -# if( $tr->slice()->is_not_strainSlice() && -# $var->slice()->is_not_strainSlice() ) { -# # coordintes are transferable -# } else { -# # coordinates are not transferable -# } -# } else { -# # yes they are comparable -# } - my $tm = $tr->get_TranscriptMapper(); my @coords = $tm->genomic2cdna($var->start, $var->end, @@ -219,6 +199,23 @@ sub type_variation { return \@out; } + my @coords_splice_2 = $tm->genomic2cdna($var->start -2, $var->end +2, $var->strand); + my @coords_splice_3 = $tm->genomic2cdna($var->start -3, $var->end +3, $var->strand); + my @coords_splice_8 = $tm->genomic2cdna($var->start -8, $var->end +8, $var->strand); + + my ($splice_site_2, $splice_site_3, $splice_site_8); + + if (scalar @coords_splice_2 >1) { + $splice_site_2=1; + } + elsif (scalar @coords_splice_3 >1) { + $splice_site_3=1; + } + elsif (scalar @coords_splice_8 >1) { + $splice_site_8=1; + } + + my $c = $coords[0]; if($c->isa('Bio::EnsEMBL::Mapper::Gap')) { @@ -235,7 +232,19 @@ sub type_variation { # variation must be intronic since mapped to cdna gap, but is within # transcript - $var->type('INTRONIC'); + + + if ($splice_site_2) { + $var->splice_site('ESSENTIAL_SPLICE_SITE'); + $var->type('INTRONIC'); + } + elsif ($splice_site_3 or $splice_site_8) { + $var->splice_site('SPLICE_SITE'); + $var->type('INTRONIC'); + } + else { + $var->type('INTRONIC'); + } return [$var]; } @@ -272,7 +281,7 @@ sub type_variation { } return [$var]; } - + $var->cds_start( $c->start() ); $var->cds_end( $c->end() ); @@ -283,18 +292,12 @@ sub type_variation { throw("Unexpected: Could map to CDS but not to peptide coordinates."); } - my @coords_amp = $tm->genomic2pep($var->start -2, $var->end +2, $var->strand); - - if (@coords_amp != @coords){ - $var->type( 'SPLICE_SITE' ); - } - $c = $coords[0]; $var->aa_start( $c->start()); $var->aa_end( $c->end()); - apply_aa_change($tr, $var); + apply_aa_change($tr, $var, $splice_site_2, $splice_site_3); return [$var]; } @@ -306,6 +309,8 @@ sub type_variation { sub apply_aa_change { my $tr = shift; my $var = shift; + my $splice_site_2 = shift; + my $splice_site_3 = shift; #my $peptide = $tr->translate->seq(); #to consider stop codon as well @@ -323,7 +328,7 @@ sub apply_aa_change { my $peptide = $mrna_seqobj->translate(undef,undef,undef,$codon_table)->seq; my $len = $var->aa_end - $var->aa_start + 1; my $old_aa = substr($peptide, $var->aa_start -1 , $len); - print "old aa $old_aa \n"; + my $codon_cds_start = $var->aa_start * 3 - 2; my $codon_cds_end = $var->aa_end * 3; my $codon_len = $codon_cds_end - $codon_cds_start + 1; @@ -343,8 +348,10 @@ sub apply_aa_change { if(abs(length($a) - $var_len) % 3) { # frameshifting variation, do not set peptide_allele string # since too complicated and could be very long - $var->type('FRAMESHIFT_CODING'); - return; + + $var->type('FRAMESHIFT_CODING'); + + return [$var]; } if($codon_len == 0) { # insertion @@ -382,13 +389,26 @@ sub apply_aa_change { } if(@aa_alleles > 1) { - if (! defined $var->type) { - $var->type('NON_SYNONYMOUS_CODING'); + if ( ! defined $var->type ){ + if ($splice_site_2 or $splice_site_3) { + $var->splice_site('SPLICE_SITE'); + $var->type('NON_SYNONYMOUS_CODING'); + } + else { + $var->type('NON_SYNONYMOUS_CODING'); + } } - } else { - $var->type('SYNONYMOUS_CODING'); } - + else { + if (($splice_site_2 or $splice_site_3) and $aa_alleles[0] !~ /\*/) { + $var->splice_site('SPLICE_SITE'); + $var->type('SYNONYMOUS_CODING'); + } + else { + $var->type('SYNONYMOUS_CODING'); + } + } + $var->aa_alleles(\@aa_alleles); } -- GitLab