# go no further if the transcript doesn't translate or if there are no stops
nextTRANSunless($peptide=$trans->translate);
my$pseq=$peptide->seq;
my$orig_seq=$pseq;
# (translate method trims stops from sequence end)
nextTRANSunless($pseq=~/\*/);
$support->log("Stops found in $tsi ($tname)\n",1);
# find out where and how many stops there are
my@found_stops;
my$mrna=$trans->translateable_seq;
my$offset=0;
my$tstop;
while($pseq=~/([^\*]+)\*(.*)/){
my$pseq1_f=$1;
$pseq=$2;
my$seq_flag=0;
$offset+=length($pseq1_f)*3;
my$stop=substr($mrna,$offset,3);
my$aaoffset=int($offset/3)+1;
push(@found_stops,[$stop,$aaoffset]);
$tstop.="$aaoffset";
$offset+=3;
}
# are all stops TGA...?
my$num_stops=scalar(@found_stops);
my$num_tga=0;
my$positions;
foreachmy$stop(@found_stops){
$positions.=$stop->[0]."(".$stop->[1].") ";
if($stop->[0]eq$scodon){
$num_tga++;
}
}
my$source=$gene->source;
#...no - an internal stop codon error in the database...
if($num_tga<$num_stops){
if($sourceeq'havana'){
$support->log_warning("INTERNAL STOPS HAVANA: Transcript $tsi ($tname) has non \'$scodon\' stop codons:\nSequence = $orig_seq\nStops at $positions\n\n");
}
else{
$support->log_verbose("INTERNAL STOPS EXTERNAL: Transcript $tsi ($tname) has non \'$scodon\' stop codons:\nSequence = $orig_seq\nStops at $positions\n\n");
}
}
#...yes - check remarks
else{
my$flag_remark=0;# 1 if word seleno has been used
my$flag_remark2=0;# 1 if existing remark has correct numbering
#parse remarks to check syntax for location of edits
while(my($attrib,$remarks)=each%$remarks){
foreachmy$text(@{$remarks}){
if(($attribeq'remark')&&($text=~/^$alabel(.*)/)){
$support->log_warning("seleno remark for $tsi stored as Annotation_remark not hidden remark\n");
$annot_stops=$1;
}
elsif($text=~/^$alabel2(.*)/){
$annot_stops=$1;
}
}
}
#check the location of the annotated edits matches actual stops in the sequence
my@annotated_stops;
if($annot_stops){
my$i=0;
foreachmy$offset(split(/\s+/,$annot_stops)){
# not a number - ignore
if($offset!~/^\d+$/){
}
#OK if it matches a known stop
elsif($found_stops[$i]->[1]==$offset){
push@annotated_stops,$offset;
}
# catch old annotations where number was in DNA not peptide coordinates
elsif(($found_stops[$i]->[1]*3)==$offset){
$support->log_warning("DNA: Annotated stop for transcript tsi ($tname) is in DNA not peptide coordinates\n");
}
# catch old annotations where number off by one
elsif(($found_stops[$i]->[1])==$offset+1){
$support->log_warning("PEPTIDE: Annotated stop for transcript $tsi ($tname) is out by one\n");
}
else{
$support->log_warning("Annotated stop for transcript $tsi ($tname) does not match a TGA codon\n");
push@annotated_stops,$offset;
}
$i++;
}
}
#check location of found stops matches annotated ones - any new ones are reported
foreachmy$stop(@found_stops){
my$pos=$stop->[1];
my$seq=$stop->[0];
unless(grep{$pos==$_}@annotated_stops){
if($seen_transcripts->{$tsi}){
$support->log_verbose("Transcript $tsi ($tname) has potential selenocysteines but has been discounted by annotators:\n\t".$seen_transcripts->{$tsi}."\n");
}
else{
$support->log("POTENTIAL SELENO ($seq) in $tsi ($tname) found at $pos\n");