diff --git a/modules/Bio/EnsEMBL/Utils/ConversionSupport.pm b/modules/Bio/EnsEMBL/Utils/ConversionSupport.pm index 5d6f0445e36cf83e7c474fb2c0f8efac08a17bf1..98d729c9b2543c7731ed44026fc92124119ce6c3 100644 --- a/modules/Bio/EnsEMBL/Utils/ConversionSupport.pm +++ b/modules/Bio/EnsEMBL/Utils/ConversionSupport.pm @@ -1079,7 +1079,7 @@ sub split_chromosomes_by_size { push @{ $top_slices }, $slice_adaptor->fetch_by_region('chromosome', $chr); } } else { - $top_slices = $slice_adaptor->fetch_all("toplevel"); + $top_slices = $slice_adaptor->fetch_all('chromosome'); } my ($big_chr, $small_chr, $min_big_chr, $min_small_chr); diff --git a/modules/Bio/EnsEMBL/Utils/VegaCuration/Gene.pm b/modules/Bio/EnsEMBL/Utils/VegaCuration/Gene.pm new file mode 100644 index 0000000000000000000000000000000000000000..be7c598d80f2f73de61a883f9e487b5f2a1ba01f --- /dev/null +++ b/modules/Bio/EnsEMBL/Utils/VegaCuration/Gene.pm @@ -0,0 +1,63 @@ +package Bio::EnsEMBL::Utils::VegaCuration::Gene; + +=head1 NAME + +=head1 SYNOPSIS + +=head1 DESCRIPTION + +=head1 LICENCE + +This code is distributed under an Apache style licence: +Please see http://www.ensembl.org/code_licence.html for details + +=head1 AUTHOR + +Steve Trevanion <st3@sanger.ac.uk> + +=head1 CONTACT + +Post questions to the EnsEMBL development list ensembl-dev@ebi.ac.uk + +=cut + +use strict; +use warnings; +use vars qw(@ISA); + +use Bio::EnsEMBL::Utils::ConversionSupport; + +@ISA = qw(Bio::EnsEMBL::Utils::ConversionSupport); + + +=head2 find_gaps + + Args : arrayref of B::E::Transcripts + Example : my $gaps = find_gaps($all_transcripts) + Description: identifies regions of a gene that are not covered by any transcript + Returntype : int + Exceptions : none + Caller : internal + +=cut + +sub find_gaps { + my $self = shift; + my ($all_transcripts) = @_; + my $gaps = 0; + my @sorted_transcripts = sort {$a->start <=> $b->start || $b->end <=> $a->end} @{$all_transcripts}; + my $first_transcript = shift @sorted_transcripts; + my $pos = $first_transcript->end; + foreach my $transcript (@sorted_transcripts) { + next if ($transcript->end < $pos ); + if ($transcript->start < $pos && $transcript->end > $pos ) { + $pos = $transcript->end; + next; + } + elsif ($transcript->end > $pos) { + $gaps++; + $pos = $transcript->end; + } + } + return $gaps; +} diff --git a/modules/Bio/EnsEMBL/Utils/VegaCuration/Transcript.pm b/modules/Bio/EnsEMBL/Utils/VegaCuration/Transcript.pm new file mode 100644 index 0000000000000000000000000000000000000000..7dcc52ee31fe88f6e5345b937e10273854358b8e --- /dev/null +++ b/modules/Bio/EnsEMBL/Utils/VegaCuration/Transcript.pm @@ -0,0 +1,31 @@ +package Bio::EnsEMBL::Utils::VegaCuration::Transcript; + +=head1 NAME + +=head1 SYNOPSIS + +=head1 DESCRIPTION + +=head1 LICENCE + +This code is distributed under an Apache style licence: +Please see http://www.ensembl.org/code_licence.html for details + +=head1 AUTHOR + +Steve Trevanion <st3@sanger.ac.uk> + +=head1 CONTACT + +Post questions to the EnsEMBL development list ensembl-dev@ebi.ac.uk + +=cut + +use strict; +use warnings; +use vars qw(@ISA); + +use Bio::EnsEMBL::Utils::VegaCuration::Gene; + +@ISA = qw(Bio::EnsEMBL::Utils::VegaCuration::Gene); + diff --git a/modules/Bio/EnsEMBL/Utils/VegaCuration/Translation.pm b/modules/Bio/EnsEMBL/Utils/VegaCuration/Translation.pm new file mode 100644 index 0000000000000000000000000000000000000000..1577eeab266092b07cf171a428839858139852cc --- /dev/null +++ b/modules/Bio/EnsEMBL/Utils/VegaCuration/Translation.pm @@ -0,0 +1,98 @@ +package Bio::EnsEMBL::Utils::VegaCuration::Translation; + +=head1 NAME + +=head1 SYNOPSIS + +=head1 DESCRIPTION + +=head1 LICENCE + +This code is distributed under an Apache style licence: +Please see http://www.ensembl.org/code_licence.html for details + +=head1 AUTHOR + +Steve Trevanion <st3@sanger.ac.uk> + +=head1 CONTACT + +Post questions to the EnsEMBL development list ensembl-dev@ebi.ac.uk + +=cut + +use strict; +use warnings; +use vars qw(@ISA); + +use Bio::EnsEMBL::Utils::VegaCuration::Transcript; + +@ISA = qw(Bio::EnsEMBL::Utils::VegaCuration::Transcript); + +=head2 check_CDS_end_remarks + + Args : B::E::Transcript + Example : my $results = $support->check_CDS_end_remarks($transcript) + Description: identifies incorrect 'CDS end...' transcript remarks + Returntype : hashref + +=cut + +sub check_CDS_start_end_remarks { + my $self = shift; + my $trans = shift; + + # info for checking + my @remarks = @{$trans->get_all_Attributes('remark')}; + my $coding_end = $trans->cdna_coding_end; + my $coding_start = $trans->cdna_coding_start; + my $trans_end = $trans->length; + my $trans_seq = $trans->seq->seq; + my $stop_codon = substr($trans_seq, $coding_end-3, 3); + my $start_codon = substr($trans_seq, $coding_start-1, 3); + + #hasref to return results + my $results; + + #extra CDS end not found remarks + if (grep {$_->value eq 'CDS end not found'} @remarks) { + if ( ($coding_end != $trans_end) + && ( grep {$_ eq $stop_codon} qw(TGA TAA TAG) ) ) { + $results->{'END_EXTRA'} = 1; + } + } + + #missing CDS end not found remark + if ( $coding_end == $trans_end ) { + if (! grep {$_->value eq 'CDS end not found'} @remarks) { + if (grep {$_ eq $stop_codon} qw(TGA TAA TAG)) { + $results->{'END_MISSING_1'} = 1; + } + else { + $results->{'END_MISSING_2'} = $stop_codon; + } + } + } + + + #extra CDS start not found remark + if (grep {$_->value eq 'CDS start not found'} @remarks) { + if ( ($coding_start != 1) + && ($start_codon eq 'ATG') ) { + $results->{'START_EXTRA'} = 1; + } + } + + #missing CDS start not found remark + if ( $coding_start == 1) { + if ( ! grep {$_->value eq 'CDS start not found'} @remarks) { + if ($start_codon eq 'ATG') { + $results->{'START_MISSING_1'} = 1; + } else { + $results->{'START_MISSING_2'} = $start_codon; + } + } + } + + return $results; +}