Commit f7ccac98 authored by Steve Trevanion's avatar Steve Trevanion
Browse files

merge from vega-48-dev

parent b4cb20ed
......@@ -186,6 +186,145 @@ sub get_havana_seleno_comments {
return $seen_translations;
}
sub check_for_stops {
my $support = shift;
my ($gene,$seen_transcripts) = @_;
my $scodon = 'TGA';
TRANS:
foreach my $trans (@{$gene->get_all_Transcripts}) {
my $tsi = $trans->stable_id;
my $tID = $trans->dbID;
my $tname = $trans->get_all_Attributes('name')->[0]->value;
$support->log_verbose("Studying transcript $tsi ($tname, $tID)\n",1);
my $peptide;
# go no further if the transcript doesn't translate or if there are no stops
next TRANS unless ($peptide = $trans->translate);
my $pseq = $peptide->seq;
my $orig_seq = $pseq;
# (translate method trims stops from sequence end)
next TRANS unless ($pseq =~ /\*/);
$support->log("Stops found in $tsi ($tname)\n",1);
# find out where and how many stops there are
my @found_stops;
my $mrna = $trans->translateable_seq;
my $offset = 0;
my $tstop;
while ($pseq =~ /([^\*]+)\*(.*)/) {
my $pseq1_f = $1;
$pseq = $2;
my $seq_flag = 0;
$offset += length($pseq1_f) * 3;
my $stop = substr($mrna, $offset, 3);
my $aaoffset = int($offset / 3)+1;
push(@found_stops, [ $stop, $aaoffset ]);
$tstop .= "$aaoffset ";
$offset += 3;
}
# are all stops TGA...?
my $num_stops = scalar(@found_stops);
my $num_tga = 0;
my $positions;
foreach my $stop (@found_stops) {
$positions .= $stop->[0]."(".$stop->[1].") ";
if ($stop->[0] eq $scodon) {
$num_tga++;
}
}
my $source = $gene->source;
#...no - an internal stop codon error in the database...
if ($num_tga < $num_stops) {
if ($source eq 'havana') {
$support->log_warning("INTERNAL STOPS HAVANA: Transcript $tsi ($tname) has non \'$scodon\' stop codons:\nSequence = $orig_seq\nStops at $positions\n\n");
}
else {
$support->log_verbose("INTERNAL STOPS EXTERNAL: Transcript $tsi ($tname) has non \'$scodon\' stop codons:\nSequence = $orig_seq\nStops at $positions\n\n");
}
}
#...yes - check remarks
else {
my $flag_remark = 0; # 1 if word seleno has been used
my $flag_remark2 = 0; # 1 if existing remark has correct numbering
my $alabel = 'Annotation_remark- selenocysteine ';
my $alabel2 = 'selenocysteine ';
my $annot_stops;
my $remarks;
#get both hidden_remarks and remarks
foreach my $remark_type ('remark','hidden_remark') {
foreach my $attrib ( @{$trans->get_all_Attributes($remark_type)}) {
push @{$remarks->{$remark_type}}, $attrib->value;
}
}
#parse remarks to check syntax for location of edits
while (my ($attrib,$remarks)= each %$remarks) {
foreach my $text (@{$remarks}) {
if ( ($attrib eq 'remark') && ($text=~/^$alabel(.*)/) ){
$support->log_warning("seleno remark for $tsi stored as Annotation_remark not hidden remark\n");
$annot_stops=$1;
}
elsif ($text =~ /^$alabel2(.*)/) {
$annot_stops=$1;
}
}
}
#check the location of the annotated edits matches actual stops in the sequence
my @annotated_stops;
if ($annot_stops){
my $i = 0;
foreach my $offset (split(/\s+/, $annot_stops)) {
# not a number - ignore
if ($offset!~/^\d+$/){
}
#OK if it matches a known stop
elsif ($found_stops[$i]->[1] == $offset) {
push @annotated_stops, $offset;
}
# catch old annotations where number was in DNA not peptide coordinates
elsif (($found_stops[$i]->[1] * 3) == $offset) {
$support->log_warning("DNA: Annotated stop for transcript tsi ($tname) is in DNA not peptide coordinates\n");
}
# catch old annotations where number off by one
elsif (($found_stops[$i]->[1]) == $offset+1) {
$support->log_warning("PEPTIDE: Annotated stop for transcript $tsi ($tname) is out by one\n");
}
else {
$support->log_warning("Annotated stop for transcript $tsi ($tname) does not match a TGA codon\n");
push @annotated_stops, $offset;
}
$i++;
}
}
#check location of found stops matches annotated ones - any new ones are reported
foreach my $stop ( @found_stops ) {
my $pos = $stop->[1];
my $seq = $stop->[0];
unless ( grep { $pos == $_} @annotated_stops) {
if ($seen_transcripts->{$tsi}) {
$support->log_verbose("Transcript $tsi ($tname) has potential selenocysteines but has been discounted by annotators:\n\t".$seen_transcripts->{$tsi}."\n");
}
else {
$support->log("POTENTIAL SELENO ($seq) in $tsi ($tname) found at $pos\n");
}
}
}
}
}
}
#details of annotators comments
__DATA__
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment