Commit bb3a9456 authored by Steve Trevanion's avatar Steve Trevanion
Browse files

fixes when identifying other stops - 1) ribosomal frameshift ignored, 2) stop...

fixes when identifying other stops -  1) ribosomal frameshift ignored, 2) stop codon being automatically annotated as a seleno and 3) non numerical values in comments
parent 9c7ae961
......@@ -220,9 +220,9 @@ sub check_for_stops {
$transcripts=\@help;
}else{
$log_object=$support;
$transcripts=$gene->get_all_Transcripts;
$transcripts=$gene->get_all_Transcripts;
}
my $gname = $gene->get_all_Attributes('name')->[0]->value;
my $gsi = $gene->stable_id;
my $scodon = 'TGA';
......@@ -240,11 +240,20 @@ sub check_for_stops {
}
foreach my $rem (@$hidden_remak_ttributes) {
if ($rem->value =~ /not_for_Vega/) {
#$support->log_verbose("Skipping transcript $tname ($tsi) since 'not_for_Vega'\n",1);
#$support->log_verbose("Skipping transcript $tname ($tsi) since 'not_for_Vega'\n",1);
$log_object->_save_log('log_verbose', '', $gsi, '', $tsi, '', "Skipping transcript $tname ($tsi) since 'not_for_Vega'");
next TRANS;
next TRANS;
}
}
# go no further if there is a ribosomal framshift attribute
foreach my $attrib (@{$trans->get_all_Attributes('_rib_frameshift')}) {
if ($attrib->value) {
$log_object->_save_log('log', '', $gsi, '', $tsi, '', "Skipping $tsi ($tname) since it has a ribosomal frameshift attribute");
next TRANS;
}
}
#$support->log_verbose("Studying transcript $tsi ($tname, $tID)\n",1);
$log_object->_save_log('log_verbose', '', $gsi, '', $tsi, '', "Studying transcript $tsi ($tname, $tID)");
my $peptide;
......@@ -257,7 +266,7 @@ sub check_for_stops {
next TRANS unless ($pseq =~ /\*/);
#$support->log_verbose("Stops found in $tsi ($tname)\n",1);
$log_object->_save_log('log_verbose', '', $gsi, '', $tsi, '', "Stops found in $tsi ($tname)");
# find out where and how many stops there are
my @found_stops;
my $mrna = $trans->translateable_seq;
......@@ -289,7 +298,7 @@ sub check_for_stops {
#...no - an internal stop codon error in the database...
if ($num_tga < $num_stops) {
if ($source eq 'havana') {
#$support->log_warning("INTERNAL STOPS HAVANA: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons [$mod_date]:\nSequence = $orig_seq\nStops at $positions)\n\n");
#$support->log_warning("INTERNAL STOPS HAVANA: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons [$mod_date]:\nSequence = $orig_seq\nStops at $positions)\n\n");
$log_object->_save_log('log_warning', '', $gsi, 'TRANSCRIPT', $tsi, 'VQCT_internal_stop', "INTERNAL STOPS HAVANA: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons [$mod_date]: Sequence = $orig_seq Stops at $positions)");
}
else {
......@@ -313,64 +322,72 @@ sub check_for_stops {
}else{
$att=$trans->get_all_Attributes($remark_type)
}
foreach my $attrib ( @$att) {
push @{$remarks->{$remark_type}}, $attrib->value;
}
foreach my $attrib ( @$att) {
push @{$remarks->{$remark_type}}, $attrib->value;
}
}
#parse remarks to check syntax for location of edits
while (my ($attrib,$remarks)= each %$remarks) {
foreach my $text (@{$remarks}) {
if ( ($attrib eq 'remark') && ($text=~/^$alabel(.*)/) ){
#$support->log_warning("seleno remark for $tsi stored as Annotation_remark not hidden remark) [$mod_date]\n");
foreach my $text (@{$remarks}) {
if ( ($attrib eq 'remark') && ($text=~/^$alabel(.*)/) ){
#$support->log_warning("seleno remark for $tsi stored as Annotation_remark not hidden remark) [$mod_date]\n");
$log_object->_save_log('log_warning', '', $gsi, '', $tsi, 'VQCT_wrong_selC_coord', "seleno remark for $tsi stored as Annotation_remark not hidden remark) [$mod_date]");
$annot_stops=$1;
}
elsif ($text =~ /^$alabel2(.*)/) {
$annot_stops=$1;
}
}
$annot_stops=$1;
}
elsif ($text =~ /^$alabel2(.*)/) {
$annot_stops=$1;
}
}
}
#check the location of the annotated edits matches actual stops in the sequence
my @annotated_stops;
if ($annot_stops){
my $i = 0;
foreach my $offset (split(/\s+/, $annot_stops)) {
#OK if it matches a known stop
if (
defined($found_stops[$i]) && defined($found_stops[$i]->[1]) && ($found_stops[$i]->[1] == $offset)) {
push @annotated_stops, $offset;
}
# catch old annotations where number was in DNA not peptide coordinates
elsif (defined($found_stops[$i]) && defined($found_stops[$i]->[1]) && (($found_stops[$i]->[1] * 3) == $offset)) {
$log_object->_save_log('log_warning', '', $gene->stable_id, 'DNA', $tsi, 'VQCT_wrong_selC_coord', "DNA: Annotated stop for transcript tsi ($tname) is in DNA not peptide coordinates) [$mod_date]");
}
# catch old annotations where number off by one
elsif (defined($found_stops[$i]) && defined($found_stops[$i]->[1]) && (($found_stops[$i]->[1]) == $offset+1)) {
$log_object->_save_log('log_warning', '', $gene->stable_id, 'PEPTIDE', $tsi, 'VQCT_wrong_selC_coord', "PEPTIDE: Annotated stop for transcript $tsi ($tname) is out by one) [$mod_date]");
}
my $i = 0;
foreach my $offset (split(/\s+/, $annot_stops)) {
if ($offset !~ /^\d+$/) {
$log_object->_save_log('log', '', $gene->stable_id, '', $tsi, '', "Annotated stop for transcript tsi ($tname) is not an integer \"$offset\" - might need to investigate [$mod_date]");
}
#OK if it matches a known stop
elsif (
defined($found_stops[$i]) && defined($found_stops[$i]->[1]) && ($found_stops[$i]->[1] == $offset)) {
push @annotated_stops, $offset;
}
# catch old annotations where number was in DNA not peptide coordinates
elsif (defined($found_stops[$i]) && defined($found_stops[$i]->[1]) && (($found_stops[$i]->[1] * 3) == $offset)) {
$log_object->_save_log('log_warning', '', $gene->stable_id, 'DNA', $tsi, 'VQCT_wrong_selC_coord', "DNA: Annotated stop for transcript tsi ($tname) is in DNA not peptide coordinates) [$mod_date]");
}
# catch old annotations where number off by one
elsif (defined($found_stops[$i]) && defined($found_stops[$i]->[1]) && (($found_stops[$i]->[1]) == $offset+1)) {
$log_object->_save_log('log_warning', '', $gene->stable_id, 'PEPTIDE', $tsi, 'VQCT_wrong_selC_coord', "PEPTIDE: Annotated stop for transcript $tsi ($tname) is out by one) [$mod_date]");
}
elsif (defined($offset) && ($offset=~/^\d+$/)){
$log_object->_save_log('log_warning', '', $gene->stable_id, 'TRANSCRIPT', $tsi, 'VQCT_wrong_selC_coord', "Annotated stop for transcript $tsi ($tname) does not match a TGA codon) [$mod_date]");
push @annotated_stops, $offset;
}
$i++;
}
if ($offset == length($pseq)) {
$log_object->_save_log('log', '', $gene->stable_id, 'TRANSCRIPT', $tsi, '', "Annotated stop for transcript $tsi ($tname) \"$offset\" matches actual stop codon, sounds like an anacode bug to me [$mod_date]");
}
else {
$log_object->_save_log('log_warning', '', $gene->stable_id, 'TRANSCRIPT', $tsi, 'VQCT_wrong_selC_coord', "Annotated stop for transcript $tsi ($tname) \"$offset\" does not match a TGA codon) [$mod_date]");
push @annotated_stops, $offset;
}
}
$i++;
}
}
#check location of found stops matches annotated ones - any new ones are reported
foreach my $stop ( @found_stops ) {
my $pos = $stop->[1];
my $seq = $stop->[0];
unless ( grep { $pos == $_} @annotated_stops) {
if ($seen_transcripts->{$tsi}) {
#$support->log_verbose("Transcript $tsi ($tname) has potential selenocysteines but has been discounted by annotators:\n\t".$seen_transcripts->{$tsi}.") [$mod_date]\n");
my $pos = $stop->[1];
my $seq = $stop->[0];
unless ( grep { $pos == $_} @annotated_stops) {
if ($seen_transcripts->{$tsi}) {
#$support->log_verbose("Transcript $tsi ($tname) has potential selenocysteines but has been discounted by annotators:\n\t".$seen_transcripts->{$tsi}.") [$mod_date]\n");
$log_object->_save_log('log_verbose', '', $gene->stable_id, '', $tsi, 'VQCT_pot_selC', "Transcript $tsi ($tname) has potential selenocysteines but has been discounted by annotators: ".$seen_transcripts->{$tsi}.") [$mod_date]");
}
else {
#$support->log("POTENTIAL SELENO ($seq) in $tsi ($tname, gene $gname) found at $pos [$mod_date]\n");
}
else {
#$support->log("POTENTIAL SELENO ($seq) in $tsi ($tname, gene $gname) found at $pos [$mod_date]\n");
$log_object->_save_log('log', '', $gene->stable_id, '', $tsi, 'VQCT_pot_selC', "POTENTIAL SELENO ($seq) in $tsi ($tname, gene $gname) found at $pos [$mod_date]");
}
}
}
}
}
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment