Commit 9e1f3af8 authored by Maurice Hendrix's avatar Maurice Hendrix
Browse files

slight change to allow automated loutre QC.

sub check_for_stops can be given a 3rd parameter, an object with the sub _save_log, to save the logs to. This is optional, ommitting it renders the sub functionally equivalent to the previous version
parent 15b051f9
......@@ -211,8 +211,13 @@ sub get_havana_seleno_comments {
sub check_for_stops {
my $support = shift;
my ($gene,$seen_transcripts) = @_;
my ($gene,$seen_transcripts,$log_object) = @_;
if(not defined $log_object){
$log_object=$support;
}
my $gname = $gene->get_all_Attributes('name')->[0]->value;
my $gsi = $gene->stable_id;
my $scodon = 'TGA';
my $mod_date = $support->date_format( $gene->modified_date,'%d/%m/%y' );
......@@ -224,12 +229,14 @@ sub check_for_stops {
foreach my $rem (@{$trans->get_all_Attributes('hidden_remark')}) {
if ($rem->value =~ /not_for_Vega/) {
$support->log_verbose("Skipping transcript $tname ($tsi) since 'not_for_Vega'\n",1);
#$support->log_verbose("Skipping transcript $tname ($tsi) since 'not_for_Vega'\n",1);
$log_object->_save_log('log_verbose', '', $gsi, '', $tsi, '', "Skipping transcript $tname ($tsi) since 'not_for_Vega'\n");
next TRANS;
}
}
$support->log_verbose("Studying transcript $tsi ($tname, $tID)\n",1);
#$support->log_verbose("Studying transcript $tsi ($tname, $tID)\n",1);
$log_object->_save_log('log_verbose', '', $gsi, '', $tsi, '', "Studying transcript $tsi ($tname, $tID)\n");
my $peptide;
......@@ -241,7 +248,8 @@ sub check_for_stops {
# (translate method trims stops from sequence end)
next TRANS unless ($pseq =~ /\*/);
$support->log_verbose("Stops found in $tsi ($tname)\n",1);
#$support->log_verbose("Stops found in $tsi ($tname)\n",1);
$log_object->_save_log('log_verbose', '', $gsi, '', $tsi, '', "Stops found in $tsi ($tname)\n");
# find out where and how many stops there are
my @found_stops;
......@@ -275,10 +283,12 @@ sub check_for_stops {
#...no - an internal stop codon error in the database...
if ($num_tga < $num_stops) {
if ($source eq 'havana') {
$support->log_warning("INTERNAL STOPS HAVANA: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons [$mod_date]:\nSequence = $orig_seq\nStops at $positions)\n\n");
#$support->log_warning("INTERNAL STOPS HAVANA: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons [$mod_date]:\nSequence = $orig_seq\nStops at $positions)\n\n");
$log_object->_save_log('log_warning', '', $gsi, '', $tsi, 'VQCT_internal_stop', "INTERNAL STOPS HAVANA: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons [$mod_date]:\nSequence = $orig_seq\nStops at $positions)\n\n");
}
else {
$support->log_warning("INTERNAL STOPS EXTERNAL: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons[$mod_date]:\nSequence = $orig_seq\nStops at $positions)\n\n");
#$support->log_warning("INTERNAL STOPS EXTERNAL: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons[$mod_date]:\nSequence = $orig_seq\nStops at $positions)\n\n");
$log_object->_save_log('log_warning', '', $gsi, '', $tsi, 'VQCT_internal_stop', "INTERNAL STOPS EXTERNAL: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons[$mod_date]:\nSequence = $orig_seq\nStops at $positions)\n\n");
}
}
......@@ -302,7 +312,8 @@ sub check_for_stops {
while (my ($attrib,$remarks)= each %$remarks) {
foreach my $text (@{$remarks}) {
if ( ($attrib eq 'remark') && ($text=~/^$alabel(.*)/) ){
$support->log_warning("seleno remark for $tsi stored as Annotation_remark not hidden remark) [$mod_date]\n");
#$support->log_warning("seleno remark for $tsi stored as Annotation_remark not hidden remark) [$mod_date]\n");
$log_object->_save_log('log_warning', '', $gsi, '', $tsi, 'VQCT_wrong_selC_coord', "seleno remark for $tsi stored as Annotation_remark not hidden remark) [$mod_date]\n");
$annot_stops=$1;
}
elsif ($text =~ /^$alabel2(.*)/) {
......@@ -325,14 +336,17 @@ sub check_for_stops {
}
# catch old annotations where number was in DNA not peptide coordinates
elsif (($found_stops[$i]->[1] * 3) == $offset) {
$support->log_warning("DNA: Annotated stop for transcript tsi ($tname) is in DNA not peptide coordinates) [$mod_date]\n");
#$support->log_warning("DNA: Annotated stop for transcript tsi ($tname) is in DNA not peptide coordinates) [$mod_date]\n");
$log_object->_save_log('log_warning', '', $gsi, '', $tsi, '', "DNA: Annotated stop for transcript tsi ($tname) is in DNA not peptide coordinates) [$mod_date]\n");
}
# catch old annotations where number off by one
elsif (($found_stops[$i]->[1]) == $offset+1) {
$support->log_warning("PEPTIDE: Annotated stop for transcript $tsi ($tname) is out by one) [$mod_date]\n");
#$support->log_warning("PEPTIDE: Annotated stop for transcript $tsi ($tname) is out by one) [$mod_date]\n");
$log_object->_save_log('log_warning', '', $gsi, '', $tsi, '', "PEPTIDE: Annotated stop for transcript $tsi ($tname) is out by one) [$mod_date]\n");
}
else {
$support->log_warning("Annotated stop for transcript $tsi ($tname) does not match a TGA codon) [$mod_date]\n");
#$support->log_warning("Annotated stop for transcript $tsi ($tname) does not match a TGA codon) [$mod_date]\n");
$log_object->_save_log('log_warning', '', $gsi, '', $tsi, '', "Annotated stop for transcript $tsi ($tname) does not match a TGA codon) [$mod_date]\n");
push @annotated_stops, $offset;
}
$i++;
......@@ -345,10 +359,12 @@ sub check_for_stops {
my $seq = $stop->[0];
unless ( grep { $pos == $_} @annotated_stops) {
if ($seen_transcripts->{$tsi}) {
$support->log_verbose("Transcript $tsi ($tname) has potential selenocysteines but has been discounted by annotators:\n\t".$seen_transcripts->{$tsi}.") [$mod_date]\n");
#$support->log_verbose("Transcript $tsi ($tname) has potential selenocysteines but has been discounted by annotators:\n\t".$seen_transcripts->{$tsi}.") [$mod_date]\n");
$log_object->_save_log('log_verbose', '', $gsi, '', $tsi, 'VQCT_pot_selC', "Transcript $tsi ($tname) has potential selenocysteines but has been discounted by annotators:\n\t".$seen_transcripts->{$tsi}.") [$mod_date]\n");
}
else {
$support->log("POTENTIAL SELENO ($seq) in $tsi ($tname, gene $gname) found at $pos [$mod_date]\n");
#$support->log("POTENTIAL SELENO ($seq) in $tsi ($tname, gene $gname) found at $pos [$mod_date]\n");
$log_object->_save_log('log', '', $gsi, '', $tsi, 'VQCT_pot_selC', "POTENTIAL SELENO ($seq) in $tsi ($tname, gene $gname) found at $pos [$mod_date]\n");
}
}
}
......@@ -356,7 +372,17 @@ sub check_for_stops {
}
}
sub _save_log{
my $self=shift;
my $log_type = shift;
my $chrom_name=shift || '';
my $gsi=shift || '';
my $type=shift || '';
my $tsi=shift || '';
my $tag=shift || '';
my $txt=shift || '';
$self->$log_type($txt);
}
#details of annotators comments
__DATA__
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment