From 6c9580d1ca5da0869243175fff3f0849d4b0061d Mon Sep 17 00:00:00 2001 From: Steve Trevanion <st3@sanger.ac.uk> Date: Tue, 9 Feb 2010 17:31:17 +0000 Subject: [PATCH] merge from branch-ensembl-dec09 --- .../Bio/EnsEMBL/Utils/ConversionSupport.pm | 4 +- .../EnsEMBL/Utils/VegaCuration/Transcript.pm | 4 +- .../EnsEMBL/Utils/VegaCuration/Translation.pm | 59 +++++++++++++------ 3 files changed, 46 insertions(+), 21 deletions(-) diff --git a/modules/Bio/EnsEMBL/Utils/ConversionSupport.pm b/modules/Bio/EnsEMBL/Utils/ConversionSupport.pm index e27cae37d9..a6665b6442 100644 --- a/modules/Bio/EnsEMBL/Utils/ConversionSupport.pm +++ b/modules/Bio/EnsEMBL/Utils/ConversionSupport.pm @@ -1124,6 +1124,7 @@ sub _by_chr_num { large chromosomes Arg[2] : (optional) Boolean to include duplicate regions, ie PAR or not (default is no) + Arg[3] : (optional) Coordsystem version to retrieve Example : my $chr_slices = $support->split_chromosomes_by_size; foreach my $block_size (keys %{ $chr_slices }) { @@ -1150,6 +1151,7 @@ sub split_chromosomes_by_size { my $self = shift; my $cutoff = shift || 5000000; my $dup = shift || 0; + my $cs_version = shift; my $slice_adaptor = $self->dba->get_SliceAdaptor; my $top_slices; if ($self->param('chromosomes')) { @@ -1157,7 +1159,7 @@ sub split_chromosomes_by_size { push @{ $top_slices }, $slice_adaptor->fetch_by_region('chromosome', $chr); } } else { - $top_slices = $slice_adaptor->fetch_all('chromosome',undef,0,$dup); + $top_slices = $slice_adaptor->fetch_all('chromosome',$cs_version,0,$dup); } my ($big_chr, $small_chr, $min_big_chr, $min_small_chr); diff --git a/modules/Bio/EnsEMBL/Utils/VegaCuration/Transcript.pm b/modules/Bio/EnsEMBL/Utils/VegaCuration/Transcript.pm index 26fd65706c..003ff59e44 100644 --- a/modules/Bio/EnsEMBL/Utils/VegaCuration/Transcript.pm +++ b/modules/Bio/EnsEMBL/Utils/VegaCuration/Transcript.pm @@ -99,7 +99,7 @@ sub check_remarks_and_update_names { if ($@) { $g_name = $gene->get_all_Attributes('name')->[0]->value; } - my $gene_remark = 'This locus has been annotated as fragmented because either there is not enough evidence covering the whole locus to identify the exact exon structure of the transcript, or because the transcript spans a gap in the assembly'; + my $gene_remark = 'This locus has been annotated as fragmented because either there is not enough evidence covering the whole locus to identify the exact exon structure of the transcript, or because the transcript spans a gap in the assembly'; my $attrib = [ Bio::EnsEMBL::Attribute->new( -CODE => 'remark', @@ -145,7 +145,7 @@ sub check_remarks_and_update_names { } } - #patch transcript names according to length and CDS + ##patch transcript names according to length and CDS $gene_c++; #separate coding and non_coding transcripts diff --git a/modules/Bio/EnsEMBL/Utils/VegaCuration/Translation.pm b/modules/Bio/EnsEMBL/Utils/VegaCuration/Translation.pm index 9b05d203f8..41212a2006 100644 --- a/modules/Bio/EnsEMBL/Utils/VegaCuration/Translation.pm +++ b/modules/Bio/EnsEMBL/Utils/VegaCuration/Translation.pm @@ -127,10 +127,13 @@ sub check_CDS_start_end_remarks_loutre { foreach my $attribute (@{$trans->get_all_Attributes()}) { $attributes{$attribute->code} = $attribute; } +# warn $trans->stable_id; +# warn Data::Dumper::Dumper(\%attributes); my $coding_end = $trans->cdna_coding_end; my $coding_start = $trans->cdna_coding_start; my $trans_end = $trans->length; my $trans_seq = $trans->seq->seq; + my $stop_codon_offset = 3 + $trans->translation->end_Exon->end_phase; my $stop_codon = substr($trans_seq, $coding_end-3, 3); my $start_codon = substr($trans_seq, $coding_start-1, 3); @@ -138,37 +141,49 @@ sub check_CDS_start_end_remarks_loutre { my $results; #extra CDS end not found remarks - if ( ($attributes{'cds_end_NF'}->value == 1) - && ($coding_end != $trans_end) + if ($attributes{'cds_end_NF'}) { + if ( ($attributes{'cds_end_NF'}->value == 1) + && ($coding_end != $trans_end) && ( grep {$_ eq $stop_codon} @stops) ) { - $results->{'END_EXTRA'} = 1; +# warn $trans->stable_id.": $coding_end--$trans_end--$stop_codon"; +# warn $trans->translation->end_Exon->end_phase; + $results->{'END_EXTRA'} = $stop_codon1; + } } #missing CDS end not found remark if ( $coding_end == $trans_end ) { - if ($attributes{'cds_end_NF'}->value == 0 ) { - if (grep {$_ eq $stop_codon} @stops) { - $results->{'END_MISSING_2'} = 1; - } - else { - $results->{'END_MISSING_1'} = $stop_codon; + if ($attributes{'cds_end_NF'}) { + if ($attributes{'cds_end_NF'}->value == 0 ) { + if (! grep {$_ eq $stop_codon} @stops) { +# warn $trans->stable_id.": $coding_end--$trans_end--$stop_codon"; +# warn $trans->translation->end_Exon->end_phase; + $results->{'END_MISSING'}{'WRONG'} = $stop_codon; + } } } + elsif (! grep {$_ eq $stop_codon} @stops) { + $results->{'END_MISSING'}{'ABSENT'} = $stop_codon; + } } - #extra CDS start not found remark - if ( ($attributes{'cds_start_NF'}->value == 1 ) - && ($coding_start != 1) + #extra CDS start not found remark + if ( $attributes{'cds_start_NF'}) { + if ( ($attributes{'cds_start_NF'}->value == 1 ) && ($start_codon eq 'ATG') ) { - $results->{'START_EXTRA'} = 1; + $results->{'START_EXTRA'} = $start_codon; + } } #missing CDS start not found remark if ( $coding_start == 1) { - if ( $attributes{'cds_start_NF'}->value == 0 ) { - if ($start_codon eq 'ATG') { - $results->{'START_MISSING_2'} = 1; - } else { - $results->{'START_MISSING_1'} = $start_codon; + if ( $attributes{'cds_start_NF'} ) { + if ( $attributes{'cds_start_NF'}->value == 0 ) { + if ($start_codon ne 'ATG') { + $results->{'START_MISSING'}{'WRONG'} = $start_codon; + } } } + elsif ($start_codon ne 'ATG') { + $results->{'START_MISSING'}{'ABSENT'} = $start_codon; + } } return $results; } @@ -206,6 +221,14 @@ sub check_for_stops { my $tsi = $trans->stable_id; my $tID = $trans->dbID; my $tname = $trans->get_all_Attributes('name')->[0]->value; + + foreach my $rem (@{$trans->get_all_Attributes('hidden_remark')}) { + if ($rem->value =~ /not_for_Vega/) { + $support->log_verbose("Skipping transcript $tname ($tsi) since 'not_for_Vega'\n",1); + next TRANS; + } + } + $support->log_verbose("Studying transcript $tsi ($tname, $tID)\n",1); my $peptide; -- GitLab