Skip to content
Snippets Groups Projects
Commit 9f0ab422 authored by Graham McVicker's avatar Graham McVicker
Browse files

cleaned up debugging output, made silencable; StatCodes can be logged to file...

cleaned up debugging output, made silencable; StatCodes can be logged to file for later analysis/retrieval
parent 4078368e
No related branches found
No related tags found
No related merge requests found
......@@ -37,11 +37,11 @@ sub is_fatal {
foreach my $msg (@{$self->get_all_StatMsgs}) {
foreach my $code (@FATAL) {
if(($msg->code() & $code) == $code) {
info("Code is Fatal: ". StatMsg::code2str($msg->code()));
#info("Code is Fatal: ". StatMsg::code2str($msg->code()));
return 1;
}
}
info("Code is NON fatal=". StatMsg::code2str($msg->code()));
#info("Code is NON fatal=". StatMsg::code2str($msg->code()));
}
return 0;
......@@ -146,7 +146,7 @@ sub fail {
if(@_) {
my $fail = shift;
warning("Setting ".$self->stable_id." to failed.\n") if($fail);
#warning("Setting ".$self->stable_id." to failed.\n") if($fail);
$self->{'fail'} = $fail;
}
......
......@@ -42,6 +42,10 @@ use constant MIDDLE => 0x00040000;
use constant CONFUSED => 0x00080000;
use constant ALL_INTRON => 0x00100000;
use constant TRANSLATES => 0x00200000;
use constant SPLIT => 0x00400000;
use constant NO_SEQUENCE_LEFT => 0x00800000;
use constant PARTIAL => 0x01000000;
use vars qw(@EXPORT_OK @ISA);
......@@ -50,6 +54,10 @@ use vars qw(@EXPORT_OK @ISA);
@EXPORT_OK = qw(&push_err &pop_err &ec2str);
our $CUR_ID = 0;
our $LOGGER = undef;
sub new {
my $class = shift;
my $code = shift;
......@@ -58,20 +66,53 @@ sub new {
die("Status Code Argument is required.\n");
}
return bless {'code' => $code}, $class;
my $sm = bless {'code' => $code, 'id' => $CUR_ID++}, $class;
if($LOGGER) {
$LOGGER->add_StatMsg($sm);
}
return $sm;
}
#
# Sets a logger for stat msg logging
# If set, every created StatMsg will be logged to a file
# if undef, nothing will be logged.
#
sub set_logger {
my $logger = shift;
$LOGGER = $logger;
}
#
# Unique id is given to each statmsg so that they can be distinguished.
# Sometimes the same statmsg will be given to multiple exons and it is handy
# to be able to tell the messages apart, for example.
#
sub id {
my $self = shift;
return $self->{'id'};
}
#
# The code is a bitvector - a combination of bitwise or'd constants.
#
sub code {
my $self = shift;
return $self->{'code'};
}
#
# Returns a human readible version of what the code of this statmsg means.
#
sub code_str {
my $self = shift;
return code2str($self->{'code'});
}
#
# converts a code to a string
#
......@@ -86,6 +127,9 @@ sub code2str {
if($code & ENTIRE) {
$str .= ' entire';
}
if($code & PARTIAL) {
$str .= ' partial';
}
if($code & LONG) {
$str .= ' long';
}
......@@ -146,6 +190,16 @@ sub code2str {
if($code & ALL_INTRON) {
$str .= " consumed by frameshift intron";
}
if($code & TRANSLATES) {
$str .= " translates";
}
if($code & SPLIT) {
$str .= " split";
}
if($code & NO_SEQUENCE_LEFT) {
$str .= " no sequence left";
}
return "$str\n";
}
......
......@@ -12,23 +12,24 @@ package Transcript;
use StatMsg;
use InterimTranscript;
use Utils qw(print_exon);
use Bio::EnsEMBL::Transcript;
use Bio::EnsEMBL::Exon;
use Bio::EnsEMBL::Utils::Exception qw(throw);
use Bio::EnsEMBL::Utils::Exception qw(throw info);
#
# Sets the phases of the interim exons in an interim transcript
# failed exons are skipped completely.
#
# Note that the phases of the original exon are ignored, so there is a
# possibility of bad translations when the original start exon had a non-zero
# possibility of bad translations when the original start exon had a non-zero
# start phase.
#
sub set_iexon_phases {
my $itranscript = shift;
debug("setting exon phases for : " . $itranscript->stable_id());
info("setting exon phases for : " . $itranscript->stable_id());
my @iexons = @{$itranscript->get_all_Exons()};
......@@ -119,7 +120,7 @@ sub check_iexons {
my $transcript_seq_region = undef;
my $transcript_strand = undef;
debug("checking exons for : " . $itranscript->stable_id());
info("checking exons for : " . $itranscript->stable_id());
my $first = 1;
......@@ -133,7 +134,7 @@ sub check_iexons {
# when one of them is fatal?
#
if ($iexon->fail() || $iexon->is_fatal()) {
debug(" failed/fatal exon, splitting transcript");
info(" failed/fatal exon, splitting transcript");
# split this transcript in two, and restart the processing
# at the beginning of the second transcript
......@@ -170,7 +171,7 @@ sub check_iexons {
$transcript_seq_region = $iexon->seq_region();
} elsif ($transcript_seq_region ne $iexon->seq_region()) {
debug(" scaffold span, splitting transcript");
info(" scaffold span, splitting transcript");
my $stat_msg = StatMsg->new(StatMsg::TRANSCRIPT|StatMsg::SCAFFOLD_SPAN);
$itranscript->add_StatMsg($stat_msg);
......@@ -193,7 +194,7 @@ sub check_iexons {
(defined($prev_start) && $iexon->strand() == -1 &&
$prev_start < $iexon->end())) {
debug(" inversion, splitting transcript");
info(" inversion, splitting transcript");
my $stat_msg = StatMsg->new(StatMsg::TRANSCRIPT | StatMsg::INVERT);
$itranscript->add_StatMsg($stat_msg);
......@@ -214,7 +215,7 @@ sub check_iexons {
if (!defined($transcript_strand)) {
$transcript_strand = $iexon->strand();
} elsif ($transcript_strand != $iexon->strand()) {
debug(" strand flip, splitting transcript");
info(" strand flip, splitting transcript");
my $stat_msg = StatMsg->new(StatMsg::TRANSCRIPT | StatMsg::STRAND_FLIP);
$itranscript->add_StatMsg($stat_msg);
......@@ -239,7 +240,7 @@ sub check_iexons {
if ($total_exons > 0) {
push @$itranscript_array, $itranscript;
} else {
debug(" no exons left in transcript");
info(" no exons left in transcript");
}
return $itranscript_array;
......@@ -272,9 +273,9 @@ sub split_itrans {
# all the exons until the 'bad exon' are loaded into the first transcript
debug("==FIRST TRANSCRIPT:\n");
info("==FIRST TRANSCRIPT:\n");
while($cur_exon && $cur_exon != $bad_exon) {
print_exon($cur_exon);
print_exon($cur_exon);
push @first_exons, $cur_exon;
$first_trans->add_Exon($cur_exon);
$cur_exon = shift(@remaining_exons);
......@@ -284,8 +285,8 @@ sub split_itrans {
throw("unexpected: could not find bad exon in transcript");
}
debug("==BAD EXON: ". (($keep_exon) ? 'keeping' : 'discarding'));
print_exon($bad_exon);
info("==BAD EXON: ". (($keep_exon) ? 'keeping' : 'discarding'));
print_exon($bad_exon);
# keep the 'bad exon' if the flag was set
if($keep_exon) {
......@@ -294,9 +295,9 @@ sub split_itrans {
# the remaining exons are reloaded into the second (original) transcript
$itrans->flush_Exons();
debug("==SECOND TRANSCRIPT:\n");
info("==SECOND TRANSCRIPT:\n");
foreach my $exon (@remaining_exons) {
print_exon($exon);
print_exon($exon);
$itrans->add_Exon($exon);
}
......@@ -337,7 +338,7 @@ sub split_itrans {
my $cdna_shift = 0;
foreach my $ex (@first_exons) {
$cdna_shift += $ex->length();
# debug("cdna_shift = $cdna_shift\n");
info("cdna_shift = $cdna_shift\n");
}
# if we threw away the 'bad' exon we want to take into account
......@@ -348,8 +349,7 @@ sub split_itrans {
$cdna_shift += $bad_exon->length();
}
# debug("Shifting CDNA of second transcript by $cdna_shift\n");
# debug("First exon (2nd trans) cdna_start = ".$remaining_exons[0]->cdna_start()."\n");
info("Shifting CDNA of second transcript by $cdna_shift\n");
if($cdna_shift) {
foreach my $ex (@remaining_exons) {
......@@ -386,7 +386,7 @@ sub make_Transcript {
my $transcript = Bio::EnsEMBL::Transcript->new();
$transcript->stable_id($itrans->stable_id);
# debug("making final transcript for ". $itrans->stable_id);
info("making final transcript for ". $itrans->stable_id);
my $translation;
......@@ -412,13 +412,9 @@ sub make_Transcript {
-STABLE_ID => $iexon->stable_id(),
-SLICE => $slice);
#print STDERR "Adding exon " . $exon->stable_id . " to final transcript\n";
$transcript->add_Exon($exon);
#
# see if this exon is the start or end exon of the translation
#
if ($translation) {
if ($iexon->cdna_start() <= $itrans->cdna_coding_start() &&
$iexon->cdna_end() >= $itrans->cdna_coding_start()) {
......@@ -430,13 +426,6 @@ sub make_Transcript {
if ($iexon->cdna_start() <= $itrans->cdna_coding_end() &&
$iexon->cdna_end() >= $itrans->cdna_coding_end()) {
#debug("end exon=".$exon->stable_id()."\n".
# " ex_coding_start=".$iexon->cdna_start()."\n".
# " ex_coding_end=".$iexon->cdna_end()."\n".
# " start=".$iexon->start()."\n".
# " end=".$iexon->end()."\n".
# " transl_end = ".$itrans->cdna_coding_end());
my $translation_end =
$itrans->cdna_coding_end() - $iexon->cdna_start() + 1;
$translation->end_Exon($exon);
......@@ -465,49 +454,6 @@ sub make_Transcript {
return $transcript;
}
sub print_exon {
my $exon = shift;
my $tr = shift;
if (!$exon) {
throw("Exon undefined");
}
print STDERR " ",$exon->stable_id, "\n";
print STDERR " cdna_start = ",$exon->cdna_start(),"\n"
if(defined($exon->cdna_start()));
print STDERR " cdna_end = ", $exon->cdna_end(), "\n"
if(defined($exon->cdna_end()));
print STDERR " start = ". $exon->start() . "\n"
if(defined($exon->start()));
print STDERR " end = ". $exon->end() . "\n"
if(defined($exon->end()));
print STDERR " strand = ". $exon->strand() . "\n"
if(defined($exon->strand()));
if($exon->fail) {
print STDERR " FAILED\n";
}
if($tr) {
print STDERR " TRANSCRIPT:\n";
print STDERR " cdna_coding_start = ". $tr->cdna_coding_start() . "\n";
print STDERR " cdna_coding_end = ". $tr->cdna_coding_end(). "\n\n";
}
return;
}
sub debug {
my $msg = shift;
print STDERR $msg, "\n";
}
1;
......@@ -11,6 +11,10 @@ use StatMsg;
use Deletion;
use Insertion;
use Transcript;
use StatLogger;
use StatMsg;
use Utils qw(print_exon print_coords print_translation);
use Bio::EnsEMBL::Utils::Exception qw(throw info verbose);
......@@ -20,7 +24,7 @@ use Bio::EnsEMBL::Utils::Exception qw(throw info verbose);
my ($hhost, $hdbname, $huser, $hpass, $hport, $hassembly, #human vars
$hchromosome, $hstart, $hend,
$chost, $cdbname, $cuser, $cpass, $cport, $cassembly, #chimp vars
$help, $verbose);
$help, $verbose, $logfile);
GetOptions('hhost=s' => \$hhost,
'hdbname=s' => \$hdbname,
......@@ -37,6 +41,7 @@ use Bio::EnsEMBL::Utils::Exception qw(throw info verbose);
'cpass=s' => \$cpass,
'cport=i' => \$cport,
'cassembly=s' => \$cassembly,
'logfile=s' => \$logfile,
'help' => \$help,
'verbose' => \$verbose);
......@@ -74,6 +79,8 @@ use Bio::EnsEMBL::Utils::Exception qw(throw info verbose);
-port => $cport);
StatMsg::set_logger(StatLogger->new($logfile));
$human_db->dnadb($chimp_db);
my $slice_adaptor = $human_db->get_SliceAdaptor();
......@@ -109,6 +116,8 @@ use Bio::EnsEMBL::Utils::Exception qw(throw info verbose);
my $mapper = $asmap_adaptor->fetch_by_CoordSystems($chimp_cs, $human_cs);
my $total_transcripts = 0;
foreach my $slice (@$slices) {
info("Chromosome: " . $slice->seq_region_name());
......@@ -123,69 +132,80 @@ use Bio::EnsEMBL::Utils::Exception qw(throw info verbose);
foreach my $transcript (@$transcripts) {
next if(!$transcript->translation); #skip pseudo genes
print ++$total_transcripts, "\n";
my $interim_transcript = transfer_transcript($transcript, $mapper,
$human_cs);
my $finished_transcripts =
create_transcripts($interim_transcript, $slice_adaptor);
my $transcript_count = @$finished_transcripts;
my $translation_count = 0;
my $stop_codons_count = 0;
if($transcript_count > 1) {
StatMsg->new(StatMsg::TRANSCRIPT | StatMsg::SPLIT);
}
elsif($transcript_count== 0) {
StatMsg->new(StatMsg::TRANSCRIPT | StatMsg::NO_SEQUENCE_LEFT);
}
foreach my $ftrans (@$finished_transcripts) {
if($ftrans->translation()) {
$translation_count++;
my $pep = $ftrans->translate->seq();
print STDERR "\n\n$pep\n\n";
if($pep =~ /\*/) {
$stop_codons_count++;
}
# sanity check, if translation is defined we expect a peptide
if(!$pep) {
dump_translation($ftrans->translation());
print_translation($ftrans->translation());
throw("Unexpected Translation but no peptide");
}
} else {
print STDERR "NO TRANSLATION LEFT\n";
}
}
}
}
}
}
sub dump_translation {
my $tl = shift;
print STDERR "TRANSLATION\n";
if(!$tl) {
print STDERR " undef\n";
return;
}
if($tl->start_Exon()) {
print STDERR " start exon = ", $tl->start_Exon->stable_id, "\n";
} else {
print STDERR " start exon = undef\n";
}
if($tl->end_Exon()) {
print STDERR " end exon = ", $tl->end_Exon->stable_id, "\n";
} else {
print STDERR " end exon = undef\n";
}
# If there were stop codons in one of the split transcripts
# report it. Report it as 'entire' if all split transcripts had
# stops.
if($stop_codons_count) {
my $code = StatMsg::TRANSCRIPT | StatMsg::DOESNT_TRANSLATE;
if($stop_codons_count == $translation_count) {
$code |= StatMsg::ENTIRE;
} else {
$code |= StatMsg::PARTIAL;
}
StatMsg->new($code);
}
if(defined($tl->start())) {
print STDERR " start = ", $tl->start(), "\n";
} else {
print STDERR " start = undef\n";
}
if(!$translation_count) {
StatMsg->new(StatMsg::TRANSCRIPT | StatMsg::NO_CDS_LEFT);
}
if(defined($tl->end())) {
print STDERR " end = ", $tl->end(), "\n";
} else {
print STDERR " end = undef\n";
if($translation_count) {
if($stop_codons_count) {
if($translation_count > $stop_codons_count) {
StatMsg->new(StatMsg::TRANSCRIPT | StatMsg::TRANSLATES |
StatMsg::PARTIAL);
}
} else {
StatMsg->new(StatMsg::TRANSCRIPT | StatMsg::TRANSLATES |
StatMsg::ENTIRE);
}
}
}
}
}
return;
}
###############################################################################
#
# transfer_transcript
......@@ -372,6 +392,8 @@ sub get_coords_extent {
my $stat_code = StatMsg::EXON;
#print_coords($coords);
foreach my $c (@$coords) {
next if($c->isa('Bio::EnsEMBL::Mapper::Gap'));
......@@ -522,3 +544,5 @@ EOF
}
#!/bin/sh
/usr/local/ensembl/bin/perl human2chimp.pl -hdbname homo_sapiens_core_20_34b -hhost ecs4 -huser ensro -hport 3351 -cdbname mcvicker_chimp_human_merge -chost ecs4 -cport 3350 -cuser ensro -verbose -hassembly NCBI34 -cassembly BROAD1
/usr/local/ensembl/bin/perl human2chimp.pl -hdbname homo_sapiens_core_20_34b -hhost ecs4 -huser ensro -hport 3351 -cdbname mcvicker_chimp_human_merge -chost ecs4 -cport 3350 -cuser ensro -hassembly NCBI34 -cassembly BROAD1 -hchromosome 9 -logfile chimp.log
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment