diff --git a/modules/Bio/EnsEMBL/Utils/ConversionSupport.pm b/modules/Bio/EnsEMBL/Utils/ConversionSupport.pm index e27cae37d97c759adc24e3a7ed88835f89cff40b..a6665b6442766ac5343b0a8959141568dde78370 100644 --- a/modules/Bio/EnsEMBL/Utils/ConversionSupport.pm +++ b/modules/Bio/EnsEMBL/Utils/ConversionSupport.pm @@ -1124,6 +1124,7 @@ sub _by_chr_num { large chromosomes Arg[2] : (optional) Boolean to include duplicate regions, ie PAR or not (default is no) + Arg[3] : (optional) Coordsystem version to retrieve Example : my $chr_slices = $support->split_chromosomes_by_size; foreach my $block_size (keys %{ $chr_slices }) { @@ -1150,6 +1151,7 @@ sub split_chromosomes_by_size { my $self = shift; my $cutoff = shift || 5000000; my $dup = shift || 0; + my $cs_version = shift; my $slice_adaptor = $self->dba->get_SliceAdaptor; my $top_slices; if ($self->param('chromosomes')) { @@ -1157,7 +1159,7 @@ sub split_chromosomes_by_size { push @{ $top_slices }, $slice_adaptor->fetch_by_region('chromosome', $chr); } } else { - $top_slices = $slice_adaptor->fetch_all('chromosome',undef,0,$dup); + $top_slices = $slice_adaptor->fetch_all('chromosome',$cs_version,0,$dup); } my ($big_chr, $small_chr, $min_big_chr, $min_small_chr); diff --git a/modules/Bio/EnsEMBL/Utils/VegaCuration/Transcript.pm b/modules/Bio/EnsEMBL/Utils/VegaCuration/Transcript.pm index 26fd65706c63f277fc338b37b43b50e3df1e686d..003ff59e445cd993cda091f4bab857f672376a57 100644 --- a/modules/Bio/EnsEMBL/Utils/VegaCuration/Transcript.pm +++ b/modules/Bio/EnsEMBL/Utils/VegaCuration/Transcript.pm @@ -99,7 +99,7 @@ sub check_remarks_and_update_names { if ($@) { $g_name = $gene->get_all_Attributes('name')->[0]->value; } - my $gene_remark = 'This locus has been annotated as fragmented because either there is not enough evidence covering the whole locus to identify the exact exon structure of the transcript, or because the transcript spans a gap in the assembly'; + my $gene_remark = 'This locus has been annotated as fragmented because either there is not enough evidence covering the whole locus to identify the exact exon structure of the transcript, or because the transcript spans a gap in the assembly'; my $attrib = [ Bio::EnsEMBL::Attribute->new( -CODE => 'remark', @@ -145,7 +145,7 @@ sub check_remarks_and_update_names { } } - #patch transcript names according to length and CDS + ##patch transcript names according to length and CDS $gene_c++; #separate coding and non_coding transcripts diff --git a/modules/Bio/EnsEMBL/Utils/VegaCuration/Translation.pm b/modules/Bio/EnsEMBL/Utils/VegaCuration/Translation.pm index 9b05d203f89ee5d4a9cebe5d030fe8c7d80d0b68..41212a2006f05847de7eee9c883df5968689f04d 100644 --- a/modules/Bio/EnsEMBL/Utils/VegaCuration/Translation.pm +++ b/modules/Bio/EnsEMBL/Utils/VegaCuration/Translation.pm @@ -127,10 +127,13 @@ sub check_CDS_start_end_remarks_loutre { foreach my $attribute (@{$trans->get_all_Attributes()}) { $attributes{$attribute->code} = $attribute; } +# warn $trans->stable_id; +# warn Data::Dumper::Dumper(\%attributes); my $coding_end = $trans->cdna_coding_end; my $coding_start = $trans->cdna_coding_start; my $trans_end = $trans->length; my $trans_seq = $trans->seq->seq; + my $stop_codon_offset = 3 + $trans->translation->end_Exon->end_phase; my $stop_codon = substr($trans_seq, $coding_end-3, 3); my $start_codon = substr($trans_seq, $coding_start-1, 3); @@ -138,37 +141,49 @@ sub check_CDS_start_end_remarks_loutre { my $results; #extra CDS end not found remarks - if ( ($attributes{'cds_end_NF'}->value == 1) - && ($coding_end != $trans_end) + if ($attributes{'cds_end_NF'}) { + if ( ($attributes{'cds_end_NF'}->value == 1) + && ($coding_end != $trans_end) && ( grep {$_ eq $stop_codon} @stops) ) { - $results->{'END_EXTRA'} = 1; +# warn $trans->stable_id.": $coding_end--$trans_end--$stop_codon"; +# warn $trans->translation->end_Exon->end_phase; + $results->{'END_EXTRA'} = $stop_codon1; + } } #missing CDS end not found remark if ( $coding_end == $trans_end ) { - if ($attributes{'cds_end_NF'}->value == 0 ) { - if (grep {$_ eq $stop_codon} @stops) { - $results->{'END_MISSING_2'} = 1; - } - else { - $results->{'END_MISSING_1'} = $stop_codon; + if ($attributes{'cds_end_NF'}) { + if ($attributes{'cds_end_NF'}->value == 0 ) { + if (! grep {$_ eq $stop_codon} @stops) { +# warn $trans->stable_id.": $coding_end--$trans_end--$stop_codon"; +# warn $trans->translation->end_Exon->end_phase; + $results->{'END_MISSING'}{'WRONG'} = $stop_codon; + } } } + elsif (! grep {$_ eq $stop_codon} @stops) { + $results->{'END_MISSING'}{'ABSENT'} = $stop_codon; + } } - #extra CDS start not found remark - if ( ($attributes{'cds_start_NF'}->value == 1 ) - && ($coding_start != 1) + #extra CDS start not found remark + if ( $attributes{'cds_start_NF'}) { + if ( ($attributes{'cds_start_NF'}->value == 1 ) && ($start_codon eq 'ATG') ) { - $results->{'START_EXTRA'} = 1; + $results->{'START_EXTRA'} = $start_codon; + } } #missing CDS start not found remark if ( $coding_start == 1) { - if ( $attributes{'cds_start_NF'}->value == 0 ) { - if ($start_codon eq 'ATG') { - $results->{'START_MISSING_2'} = 1; - } else { - $results->{'START_MISSING_1'} = $start_codon; + if ( $attributes{'cds_start_NF'} ) { + if ( $attributes{'cds_start_NF'}->value == 0 ) { + if ($start_codon ne 'ATG') { + $results->{'START_MISSING'}{'WRONG'} = $start_codon; + } } } + elsif ($start_codon ne 'ATG') { + $results->{'START_MISSING'}{'ABSENT'} = $start_codon; + } } return $results; } @@ -206,6 +221,14 @@ sub check_for_stops { my $tsi = $trans->stable_id; my $tID = $trans->dbID; my $tname = $trans->get_all_Attributes('name')->[0]->value; + + foreach my $rem (@{$trans->get_all_Attributes('hidden_remark')}) { + if ($rem->value =~ /not_for_Vega/) { + $support->log_verbose("Skipping transcript $tname ($tsi) since 'not_for_Vega'\n",1); + next TRANS; + } + } + $support->log_verbose("Studying transcript $tsi ($tname, $tID)\n",1); my $peptide;