Skip to content
Snippets Groups Projects
Commit 9e1f3af8 authored by Maurice Hendrix's avatar Maurice Hendrix
Browse files

slight change to allow automated loutre QC.

sub check_for_stops can be given a 3rd parameter, an object with the sub _save_log, to save the logs to. This is optional, ommitting it renders the sub functionally equivalent to the previous version
parent 15b051f9
No related branches found
No related tags found
No related merge requests found
......@@ -211,8 +211,13 @@ sub get_havana_seleno_comments {
sub check_for_stops {
my $support = shift;
my ($gene,$seen_transcripts) = @_;
my ($gene,$seen_transcripts,$log_object) = @_;
if(not defined $log_object){
$log_object=$support;
}
my $gname = $gene->get_all_Attributes('name')->[0]->value;
my $gsi = $gene->stable_id;
my $scodon = 'TGA';
my $mod_date = $support->date_format( $gene->modified_date,'%d/%m/%y' );
......@@ -224,12 +229,14 @@ sub check_for_stops {
foreach my $rem (@{$trans->get_all_Attributes('hidden_remark')}) {
if ($rem->value =~ /not_for_Vega/) {
$support->log_verbose("Skipping transcript $tname ($tsi) since 'not_for_Vega'\n",1);
#$support->log_verbose("Skipping transcript $tname ($tsi) since 'not_for_Vega'\n",1);
$log_object->_save_log('log_verbose', '', $gsi, '', $tsi, '', "Skipping transcript $tname ($tsi) since 'not_for_Vega'\n");
next TRANS;
}
}
$support->log_verbose("Studying transcript $tsi ($tname, $tID)\n",1);
#$support->log_verbose("Studying transcript $tsi ($tname, $tID)\n",1);
$log_object->_save_log('log_verbose', '', $gsi, '', $tsi, '', "Studying transcript $tsi ($tname, $tID)\n");
my $peptide;
......@@ -241,7 +248,8 @@ sub check_for_stops {
# (translate method trims stops from sequence end)
next TRANS unless ($pseq =~ /\*/);
$support->log_verbose("Stops found in $tsi ($tname)\n",1);
#$support->log_verbose("Stops found in $tsi ($tname)\n",1);
$log_object->_save_log('log_verbose', '', $gsi, '', $tsi, '', "Stops found in $tsi ($tname)\n");
# find out where and how many stops there are
my @found_stops;
......@@ -275,10 +283,12 @@ sub check_for_stops {
#...no - an internal stop codon error in the database...
if ($num_tga < $num_stops) {
if ($source eq 'havana') {
$support->log_warning("INTERNAL STOPS HAVANA: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons [$mod_date]:\nSequence = $orig_seq\nStops at $positions)\n\n");
#$support->log_warning("INTERNAL STOPS HAVANA: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons [$mod_date]:\nSequence = $orig_seq\nStops at $positions)\n\n");
$log_object->_save_log('log_warning', '', $gsi, '', $tsi, 'VQCT_internal_stop', "INTERNAL STOPS HAVANA: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons [$mod_date]:\nSequence = $orig_seq\nStops at $positions)\n\n");
}
else {
$support->log_warning("INTERNAL STOPS EXTERNAL: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons[$mod_date]:\nSequence = $orig_seq\nStops at $positions)\n\n");
#$support->log_warning("INTERNAL STOPS EXTERNAL: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons[$mod_date]:\nSequence = $orig_seq\nStops at $positions)\n\n");
$log_object->_save_log('log_warning', '', $gsi, '', $tsi, 'VQCT_internal_stop', "INTERNAL STOPS EXTERNAL: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons[$mod_date]:\nSequence = $orig_seq\nStops at $positions)\n\n");
}
}
......@@ -302,7 +312,8 @@ sub check_for_stops {
while (my ($attrib,$remarks)= each %$remarks) {
foreach my $text (@{$remarks}) {
if ( ($attrib eq 'remark') && ($text=~/^$alabel(.*)/) ){
$support->log_warning("seleno remark for $tsi stored as Annotation_remark not hidden remark) [$mod_date]\n");
#$support->log_warning("seleno remark for $tsi stored as Annotation_remark not hidden remark) [$mod_date]\n");
$log_object->_save_log('log_warning', '', $gsi, '', $tsi, 'VQCT_wrong_selC_coord', "seleno remark for $tsi stored as Annotation_remark not hidden remark) [$mod_date]\n");
$annot_stops=$1;
}
elsif ($text =~ /^$alabel2(.*)/) {
......@@ -325,14 +336,17 @@ sub check_for_stops {
}
# catch old annotations where number was in DNA not peptide coordinates
elsif (($found_stops[$i]->[1] * 3) == $offset) {
$support->log_warning("DNA: Annotated stop for transcript tsi ($tname) is in DNA not peptide coordinates) [$mod_date]\n");
#$support->log_warning("DNA: Annotated stop for transcript tsi ($tname) is in DNA not peptide coordinates) [$mod_date]\n");
$log_object->_save_log('log_warning', '', $gsi, '', $tsi, '', "DNA: Annotated stop for transcript tsi ($tname) is in DNA not peptide coordinates) [$mod_date]\n");
}
# catch old annotations where number off by one
elsif (($found_stops[$i]->[1]) == $offset+1) {
$support->log_warning("PEPTIDE: Annotated stop for transcript $tsi ($tname) is out by one) [$mod_date]\n");
#$support->log_warning("PEPTIDE: Annotated stop for transcript $tsi ($tname) is out by one) [$mod_date]\n");
$log_object->_save_log('log_warning', '', $gsi, '', $tsi, '', "PEPTIDE: Annotated stop for transcript $tsi ($tname) is out by one) [$mod_date]\n");
}
else {
$support->log_warning("Annotated stop for transcript $tsi ($tname) does not match a TGA codon) [$mod_date]\n");
#$support->log_warning("Annotated stop for transcript $tsi ($tname) does not match a TGA codon) [$mod_date]\n");
$log_object->_save_log('log_warning', '', $gsi, '', $tsi, '', "Annotated stop for transcript $tsi ($tname) does not match a TGA codon) [$mod_date]\n");
push @annotated_stops, $offset;
}
$i++;
......@@ -345,10 +359,12 @@ sub check_for_stops {
my $seq = $stop->[0];
unless ( grep { $pos == $_} @annotated_stops) {
if ($seen_transcripts->{$tsi}) {
$support->log_verbose("Transcript $tsi ($tname) has potential selenocysteines but has been discounted by annotators:\n\t".$seen_transcripts->{$tsi}.") [$mod_date]\n");
#$support->log_verbose("Transcript $tsi ($tname) has potential selenocysteines but has been discounted by annotators:\n\t".$seen_transcripts->{$tsi}.") [$mod_date]\n");
$log_object->_save_log('log_verbose', '', $gsi, '', $tsi, 'VQCT_pot_selC', "Transcript $tsi ($tname) has potential selenocysteines but has been discounted by annotators:\n\t".$seen_transcripts->{$tsi}.") [$mod_date]\n");
}
else {
$support->log("POTENTIAL SELENO ($seq) in $tsi ($tname, gene $gname) found at $pos [$mod_date]\n");
#$support->log("POTENTIAL SELENO ($seq) in $tsi ($tname, gene $gname) found at $pos [$mod_date]\n");
$log_object->_save_log('log', '', $gsi, '', $tsi, 'VQCT_pot_selC', "POTENTIAL SELENO ($seq) in $tsi ($tname, gene $gname) found at $pos [$mod_date]\n");
}
}
}
......@@ -356,7 +372,17 @@ sub check_for_stops {
}
}
sub _save_log{
my $self=shift;
my $log_type = shift;
my $chrom_name=shift || '';
my $gsi=shift || '';
my $type=shift || '';
my $tsi=shift || '';
my $tag=shift || '';
my $txt=shift || '';
$self->$log_type($txt);
}
#details of annotators comments
__DATA__
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment