Commit bb3a9456 authored by Steve Trevanion's avatar Steve Trevanion
Browse files

fixes when identifying other stops - 1) ribosomal frameshift ignored, 2) stop...

fixes when identifying other stops -  1) ribosomal frameshift ignored, 2) stop codon being automatically annotated as a seleno and 3) non numerical values in comments
parent 9c7ae961
...@@ -220,9 +220,9 @@ sub check_for_stops { ...@@ -220,9 +220,9 @@ sub check_for_stops {
$transcripts=\@help; $transcripts=\@help;
}else{ }else{
$log_object=$support; $log_object=$support;
$transcripts=$gene->get_all_Transcripts; $transcripts=$gene->get_all_Transcripts;
} }
my $gname = $gene->get_all_Attributes('name')->[0]->value; my $gname = $gene->get_all_Attributes('name')->[0]->value;
my $gsi = $gene->stable_id; my $gsi = $gene->stable_id;
my $scodon = 'TGA'; my $scodon = 'TGA';
...@@ -240,11 +240,20 @@ sub check_for_stops { ...@@ -240,11 +240,20 @@ sub check_for_stops {
} }
foreach my $rem (@$hidden_remak_ttributes) { foreach my $rem (@$hidden_remak_ttributes) {
if ($rem->value =~ /not_for_Vega/) { if ($rem->value =~ /not_for_Vega/) {
#$support->log_verbose("Skipping transcript $tname ($tsi) since 'not_for_Vega'\n",1); #$support->log_verbose("Skipping transcript $tname ($tsi) since 'not_for_Vega'\n",1);
$log_object->_save_log('log_verbose', '', $gsi, '', $tsi, '', "Skipping transcript $tname ($tsi) since 'not_for_Vega'"); $log_object->_save_log('log_verbose', '', $gsi, '', $tsi, '', "Skipping transcript $tname ($tsi) since 'not_for_Vega'");
next TRANS; next TRANS;
}
}
# go no further if there is a ribosomal framshift attribute
foreach my $attrib (@{$trans->get_all_Attributes('_rib_frameshift')}) {
if ($attrib->value) {
$log_object->_save_log('log', '', $gsi, '', $tsi, '', "Skipping $tsi ($tname) since it has a ribosomal frameshift attribute");
next TRANS;
} }
} }
#$support->log_verbose("Studying transcript $tsi ($tname, $tID)\n",1); #$support->log_verbose("Studying transcript $tsi ($tname, $tID)\n",1);
$log_object->_save_log('log_verbose', '', $gsi, '', $tsi, '', "Studying transcript $tsi ($tname, $tID)"); $log_object->_save_log('log_verbose', '', $gsi, '', $tsi, '', "Studying transcript $tsi ($tname, $tID)");
my $peptide; my $peptide;
...@@ -257,7 +266,7 @@ sub check_for_stops { ...@@ -257,7 +266,7 @@ sub check_for_stops {
next TRANS unless ($pseq =~ /\*/); next TRANS unless ($pseq =~ /\*/);
#$support->log_verbose("Stops found in $tsi ($tname)\n",1); #$support->log_verbose("Stops found in $tsi ($tname)\n",1);
$log_object->_save_log('log_verbose', '', $gsi, '', $tsi, '', "Stops found in $tsi ($tname)"); $log_object->_save_log('log_verbose', '', $gsi, '', $tsi, '', "Stops found in $tsi ($tname)");
# find out where and how many stops there are # find out where and how many stops there are
my @found_stops; my @found_stops;
my $mrna = $trans->translateable_seq; my $mrna = $trans->translateable_seq;
...@@ -289,7 +298,7 @@ sub check_for_stops { ...@@ -289,7 +298,7 @@ sub check_for_stops {
#...no - an internal stop codon error in the database... #...no - an internal stop codon error in the database...
if ($num_tga < $num_stops) { if ($num_tga < $num_stops) {
if ($source eq 'havana') { if ($source eq 'havana') {
#$support->log_warning("INTERNAL STOPS HAVANA: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons [$mod_date]:\nSequence = $orig_seq\nStops at $positions)\n\n"); #$support->log_warning("INTERNAL STOPS HAVANA: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons [$mod_date]:\nSequence = $orig_seq\nStops at $positions)\n\n");
$log_object->_save_log('log_warning', '', $gsi, 'TRANSCRIPT', $tsi, 'VQCT_internal_stop', "INTERNAL STOPS HAVANA: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons [$mod_date]: Sequence = $orig_seq Stops at $positions)"); $log_object->_save_log('log_warning', '', $gsi, 'TRANSCRIPT', $tsi, 'VQCT_internal_stop', "INTERNAL STOPS HAVANA: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons [$mod_date]: Sequence = $orig_seq Stops at $positions)");
} }
else { else {
...@@ -313,64 +322,72 @@ sub check_for_stops { ...@@ -313,64 +322,72 @@ sub check_for_stops {
}else{ }else{
$att=$trans->get_all_Attributes($remark_type) $att=$trans->get_all_Attributes($remark_type)
} }
foreach my $attrib ( @$att) { foreach my $attrib ( @$att) {
push @{$remarks->{$remark_type}}, $attrib->value; push @{$remarks->{$remark_type}}, $attrib->value;
} }
} }
#parse remarks to check syntax for location of edits #parse remarks to check syntax for location of edits
while (my ($attrib,$remarks)= each %$remarks) { while (my ($attrib,$remarks)= each %$remarks) {
foreach my $text (@{$remarks}) { foreach my $text (@{$remarks}) {
if ( ($attrib eq 'remark') && ($text=~/^$alabel(.*)/) ){ if ( ($attrib eq 'remark') && ($text=~/^$alabel(.*)/) ){
#$support->log_warning("seleno remark for $tsi stored as Annotation_remark not hidden remark) [$mod_date]\n"); #$support->log_warning("seleno remark for $tsi stored as Annotation_remark not hidden remark) [$mod_date]\n");
$log_object->_save_log('log_warning', '', $gsi, '', $tsi, 'VQCT_wrong_selC_coord', "seleno remark for $tsi stored as Annotation_remark not hidden remark) [$mod_date]"); $log_object->_save_log('log_warning', '', $gsi, '', $tsi, 'VQCT_wrong_selC_coord', "seleno remark for $tsi stored as Annotation_remark not hidden remark) [$mod_date]");
$annot_stops=$1; $annot_stops=$1;
} }
elsif ($text =~ /^$alabel2(.*)/) { elsif ($text =~ /^$alabel2(.*)/) {
$annot_stops=$1; $annot_stops=$1;
} }
} }
} }
#check the location of the annotated edits matches actual stops in the sequence #check the location of the annotated edits matches actual stops in the sequence
my @annotated_stops; my @annotated_stops;
if ($annot_stops){ if ($annot_stops){
my $i = 0; my $i = 0;
foreach my $offset (split(/\s+/, $annot_stops)) { foreach my $offset (split(/\s+/, $annot_stops)) {
#OK if it matches a known stop if ($offset !~ /^\d+$/) {
if ( $log_object->_save_log('log', '', $gene->stable_id, '', $tsi, '', "Annotated stop for transcript tsi ($tname) is not an integer \"$offset\" - might need to investigate [$mod_date]");
defined($found_stops[$i]) && defined($found_stops[$i]->[1]) && ($found_stops[$i]->[1] == $offset)) { }
push @annotated_stops, $offset; #OK if it matches a known stop
} elsif (
# catch old annotations where number was in DNA not peptide coordinates defined($found_stops[$i]) && defined($found_stops[$i]->[1]) && ($found_stops[$i]->[1] == $offset)) {
elsif (defined($found_stops[$i]) && defined($found_stops[$i]->[1]) && (($found_stops[$i]->[1] * 3) == $offset)) { push @annotated_stops, $offset;
$log_object->_save_log('log_warning', '', $gene->stable_id, 'DNA', $tsi, 'VQCT_wrong_selC_coord', "DNA: Annotated stop for transcript tsi ($tname) is in DNA not peptide coordinates) [$mod_date]"); }
} # catch old annotations where number was in DNA not peptide coordinates
# catch old annotations where number off by one elsif (defined($found_stops[$i]) && defined($found_stops[$i]->[1]) && (($found_stops[$i]->[1] * 3) == $offset)) {
elsif (defined($found_stops[$i]) && defined($found_stops[$i]->[1]) && (($found_stops[$i]->[1]) == $offset+1)) { $log_object->_save_log('log_warning', '', $gene->stable_id, 'DNA', $tsi, 'VQCT_wrong_selC_coord', "DNA: Annotated stop for transcript tsi ($tname) is in DNA not peptide coordinates) [$mod_date]");
$log_object->_save_log('log_warning', '', $gene->stable_id, 'PEPTIDE', $tsi, 'VQCT_wrong_selC_coord', "PEPTIDE: Annotated stop for transcript $tsi ($tname) is out by one) [$mod_date]"); }
} # catch old annotations where number off by one
elsif (defined($found_stops[$i]) && defined($found_stops[$i]->[1]) && (($found_stops[$i]->[1]) == $offset+1)) {
$log_object->_save_log('log_warning', '', $gene->stable_id, 'PEPTIDE', $tsi, 'VQCT_wrong_selC_coord', "PEPTIDE: Annotated stop for transcript $tsi ($tname) is out by one) [$mod_date]");
}
elsif (defined($offset) && ($offset=~/^\d+$/)){ elsif (defined($offset) && ($offset=~/^\d+$/)){
$log_object->_save_log('log_warning', '', $gene->stable_id, 'TRANSCRIPT', $tsi, 'VQCT_wrong_selC_coord', "Annotated stop for transcript $tsi ($tname) does not match a TGA codon) [$mod_date]"); if ($offset == length($pseq)) {
push @annotated_stops, $offset; $log_object->_save_log('log', '', $gene->stable_id, 'TRANSCRIPT', $tsi, '', "Annotated stop for transcript $tsi ($tname) \"$offset\" matches actual stop codon, sounds like an anacode bug to me [$mod_date]");
} }
$i++; else {
} $log_object->_save_log('log_warning', '', $gene->stable_id, 'TRANSCRIPT', $tsi, 'VQCT_wrong_selC_coord', "Annotated stop for transcript $tsi ($tname) \"$offset\" does not match a TGA codon) [$mod_date]");
push @annotated_stops, $offset;
}
}
$i++;
}
} }
#check location of found stops matches annotated ones - any new ones are reported #check location of found stops matches annotated ones - any new ones are reported
foreach my $stop ( @found_stops ) { foreach my $stop ( @found_stops ) {
my $pos = $stop->[1]; my $pos = $stop->[1];
my $seq = $stop->[0]; my $seq = $stop->[0];
unless ( grep { $pos == $_} @annotated_stops) { unless ( grep { $pos == $_} @annotated_stops) {
if ($seen_transcripts->{$tsi}) { if ($seen_transcripts->{$tsi}) {
#$support->log_verbose("Transcript $tsi ($tname) has potential selenocysteines but has been discounted by annotators:\n\t".$seen_transcripts->{$tsi}.") [$mod_date]\n"); #$support->log_verbose("Transcript $tsi ($tname) has potential selenocysteines but has been discounted by annotators:\n\t".$seen_transcripts->{$tsi}.") [$mod_date]\n");
$log_object->_save_log('log_verbose', '', $gene->stable_id, '', $tsi, 'VQCT_pot_selC', "Transcript $tsi ($tname) has potential selenocysteines but has been discounted by annotators: ".$seen_transcripts->{$tsi}.") [$mod_date]"); $log_object->_save_log('log_verbose', '', $gene->stable_id, '', $tsi, 'VQCT_pot_selC', "Transcript $tsi ($tname) has potential selenocysteines but has been discounted by annotators: ".$seen_transcripts->{$tsi}.") [$mod_date]");
} }
else { else {
#$support->log("POTENTIAL SELENO ($seq) in $tsi ($tname, gene $gname) found at $pos [$mod_date]\n"); #$support->log("POTENTIAL SELENO ($seq) in $tsi ($tname, gene $gname) found at $pos [$mod_date]\n");
$log_object->_save_log('log', '', $gene->stable_id, '', $tsi, 'VQCT_pot_selC', "POTENTIAL SELENO ($seq) in $tsi ($tname, gene $gname) found at $pos [$mod_date]"); $log_object->_save_log('log', '', $gene->stable_id, '', $tsi, 'VQCT_pot_selC', "POTENTIAL SELENO ($seq) in $tsi ($tname, gene $gname) found at $pos [$mod_date]");
} }
} }
} }
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment