Skip to content
Snippets Groups Projects
Commit 4603dc5b authored by Patrick Meidl's avatar Patrick Meidl
Browse files

latest state of the code (bugfixes and adaptations due to changes in ConfParser and Logger)

parent 649a4c36
No related branches found
No related tags found
No related merge requests found
......@@ -47,6 +47,10 @@ use Storable qw(nfreeze thaw nstore retrieve);
# define available cache names here
my @cache_names = qw(
exons_by_id
transcripts_by_id
transcripts_by_exon_id
genes_by_id
genes_by_transcript_id
);
......@@ -101,12 +105,17 @@ sub build_cache {
my $i = scalar(@$genes);
# find common coord_system
$self->find_common_coord_systems;
my $common_cs_found = $self->find_common_coord_systems;
# find out whether native and common coord_system are identical
# find out whether native coord_system is a common coord_system.
# if so, you don't need to project.
# also don't project if no common coord_system present
my $need_project = 1;
my $csid = join(':', $slice->coord_system_name, $slice->coord_system->version);
$need_project = 0 if ($self->is_common_cs($csid));
my $csid = join(':', $slice->coord_system_name,
$slice->coord_system->version);
if ($self->is_common_cs($csid) or !$self->highest_common_cs) {
$need_project = 0;
}
# build cache
my $type = "$dbtype.$slice_name";
......@@ -163,8 +172,8 @@ sub build_cache_from_genes {
]);
# build gene caches
#$self->add($type, 'genes_by_id', $gene->dbID, $lgene);
#$self->add($type, 'genes_by_stable_id', $gene->stable_id, $lgene);
$self->add('genes_by_id', $type, $gene->dbID, $lgene);
#$self->add('genes_by_stable_id', $type, $gene->stable_id, $lgene);
# transcripts
foreach my $tr (@{ $gene->get_all_Transcripts }) {
......@@ -180,9 +189,9 @@ sub build_cache_from_genes {
#$lgene->add_Transcript($ltr);
# build transcript caches
$self->add($type, 'transcripts_by_id', $tr->dbID, $ltr);
#$self->add($type, 'transcripts_by_stable_id', $tr->stable_id, $ltr);
$self->add($type, 'genes_by_transcript_id', $tr->dbID, $lgene);
$self->add('transcripts_by_id', $type, $tr->dbID, $ltr);
#$self->add('transcripts_by_stable_id', $type, $tr->stable_id, $ltr);
$self->add('genes_by_transcript_id', $type, $tr->dbID, $lgene);
# translation (if there is one)
if (my $tl = $tr->translation) {
......@@ -193,9 +202,9 @@ sub build_cache_from_genes {
#$ltr->add_Translation($ltl);
#$self->add($type, 'translations_by_id', $tl->dbID, $ltl);
#$self->add($type, 'translations_by_stable_id', $tl->stable_id, $ltl);
#$self->add($type, 'translations_by_transcript_id', $tr->dbID, $ltl);
#$self->add('translations_by_id', $type, $tl->dbID, $ltl);
#$self->add('translations_by_stable_id', $type, $tl->stable_id, $ltl);
#$self->add('translations_by_transcript_id', $type, $tr->dbID, $ltl);
undef $tl;
}
......@@ -233,8 +242,8 @@ sub build_cache_from_genes {
$ltr->add_Exon($lexon);
$self->add('exons_by_id', $type, $exon->dbID, $lexon);
#$self->add($type, 'genes_by_exon_id', $exon->dbID, $lgene);
$self->add_list($type, 'transcripts_by_exon_id', $exon->dbID, $ltr);
#$self->add('genes_by_exon_id', $type, $exon->dbID, $lgene);
$self->add_list('transcripts_by_exon_id', $type, $exon->dbID, $ltr);
undef $exon;
}
......@@ -295,7 +304,7 @@ sub get_by_key {
# transparently load cache from file unless already loaded
unless ($self->{'instance'}->{'loaded'}->{"$name:$type"}) {
$self->load_and_merge($name, $type);
$self->read_and_merge($name, $type);
}
return $self->{'cache'}->{$name}->{$type}->{$key};
......@@ -311,7 +320,7 @@ sub get_by_name {
# transparently load cache from file unless already loaded
unless ($self->{'instance'}->{'loaded'}->{"$name:$type"}) {
$self->load_and_merge($name, $type);
$self->read_and_merge($name, $type);
}
return $self->{'cache'}->{$name}->{$type} || {};
......@@ -328,7 +337,7 @@ sub get_count_by_name {
# transparently load cache from file unless already loaded
unless ($self->{'instance'}->{'loaded'}->{"$name:$type"}) {
$self->load_and_merge($name, $type);
$self->read_and_merge($name, $type);
}
return scalar(keys %{ $self->get_by_name($name, $type) });
......@@ -382,6 +391,7 @@ sub find_common_coord_systems {
}
}
return $found_highest;
}
......@@ -427,7 +437,7 @@ sub seq_regions_compatible {
if ($equal/$s_count > 0.5 and $equal/$t_count > 0.5) {
return(1);
} else {
$self->logger->log("Only $equal seq_regions identical for ".$cs->name." ".$cs->version."\n");
$self->logger->info("Only $equal seq_regions identical for ".$cs->name." ".$cs->version."\n");
return(0);
}
......@@ -491,10 +501,10 @@ sub cache_file_exists {
my $cache_file = $self->cache_file($name, $type);
if (-s $cache_file) {
$self->logger->log("Cache file found. Will read from $cache_file.\n", 3);
$self->logger->info("Cache file found. Will read from $cache_file.\n", 3);
return 1;
} else {
$self->logger->log("No cache file found. Will build cache from db.\n", 3);
$self->logger->info("No cache file found for $name/$type. Will build cache from db.\n", 3);
return 0;
}
}
......@@ -573,7 +583,7 @@ sub write_to_file {
throw("You must provide a cache type.") unless $type;
unless ($self->{'cache'}->{$name}->{$type}) {
$self->logger->log_warning("No features found in $name/$type. Won't write cache file.\n");
$self->logger->warning("No features found in $name/$type. Won't write cache file.\n");
return;
}
......@@ -639,13 +649,13 @@ sub read_from_file {
throw("No valid cache file found at $cache_file.");
}
#$self->logger->log_stamped("Reading cache from file...\n");
#$self->logger->log("Cache file $cache_file.\n", 1);
#$self->logger->info("Reading cache from file...\n", 0, 'stamped');
#$self->logger->info("Cache file $cache_file.\n", 1);
eval { $self->{'cache'}->{$name}->{$type} = retrieve($cache_file); };
if ($@) {
throw("Unable to retrieve cache: $@");
}
#$self->logger->log_stamped("Done.\n");
#$self->logger->info("Done.\n", 0, 'stamped');
return $self->{'cache'}->{$name}->{$type};
}
......@@ -662,7 +672,7 @@ sub merge {
foreach my $key (keys %{ $self->{'cache'}->{$name}->{$type} || {} }) {
if (defined $self->{'cache'}->{$name}->{$merged_type}->{$key}) {
warning("Duplicate key in cache: $name|$merged_type|$key. Skipping.\n");
# warning("Duplicate key in cache: $name|$merged_type|$key. Skipping.\n");
} else {
$self->{'cache'}->{$name}->{$merged_type}->{$key} =
$self->{'cache'}->{$name}->{$type}->{$key};
......@@ -715,7 +725,11 @@ sub slice_names {
} elsif ($self->conf->param('region')) {
# filter by region (specific slice)
my $slice = $sa->fetch_by_name($self->conf->param('region'));
# don't use SliceAdaptor->fetch_by_name() since this will fail if assembly
# versions are different for source and target db
my ($cs, $version, $name, $start, $end, $strand) =
split(/:/, $self->conf->param('region'));
my $slice = $sa->fetch_by_region($cs, $name, $start, $end);
push @slice_names, $slice->name;
} else {
......
......@@ -52,28 +52,28 @@ use Bio::EnsEMBL::IdMapping::ScoredMappingMatrix;
sub score_exons {
my $self = shift;
$self->logger->log_stamped("Starting exon scoring...\n\n");
$self->logger->info("Starting exon scoring...\n\n", 0, 'stamped');
# score using overlaps, then exonerate
my $matrix = $self->overlap_score;
my $exonerate_matrix = $self->exonerate_score($matrix);
# log stats before matrix merging
$self->logger->log("\nOverlap scoring matrix:\n");
$self->logger->info("\nOverlap scoring matrix:\n");
$self->log_matrix_stats($matrix);
$self->logger->log("\nExonerate scoring matrix:\n");
$self->logger->info("\nExonerate scoring matrix:\n");
$self->log_matrix_stats($exonerate_matrix);
# merge matrices
$self->logger->log_stamped("\nMerging scoring matrices...\n");
$self->logger->info("\nMerging scoring matrices...\n", 0, 'stamped');
$matrix->merge($exonerate_matrix);
$self->logger->log_stamped("Done.\n\n");
$self->logger->info("Done.\n\n", 0, 'stamped');
# log stats of combined matrix
$self->logger->log("Combined scoring matrix:\n");
$self->logger->info("Combined scoring matrix:\n");
$self->log_matrix_stats($matrix);
$self->logger->log_stamped("\nDone with exon scoring.\n\n");
$self->logger->info("\nDone with exon scoring.\n\n", 0, 'stamped');
return $matrix;
}
......@@ -95,20 +95,20 @@ sub overlap_score {
if (-s $overlap_cache) {
# read from file
$self->logger->log_stamped("Reading exon overlap scoring matrix from file...\n");
$self->logger->log("Cache file $overlap_cache.\n", 1);
$self->logger->info("Reading exon overlap scoring matrix from file...\n", 0, 'stamped');
$self->logger->info("Cache file $overlap_cache.\n", 1);
$matrix->read_from_file;
$self->logger->log_stamped("Done.\n");
$self->logger->info("Done.\n", 0, 'stamped');
} else {
# build scoring matrix
$self->logger->log("No exon overlap scoring matrix found. Will build new one.\n");
$self->logger->info("No exon overlap scoring matrix found. Will build new one.\n");
if ($self->cache->highest_common_cs) {
$self->logger->log_stamped("Overlap scoring...\n");
$self->logger->info("Overlap scoring...\n", 0, 'stamped');
$matrix = $self->build_overlap_scores($matrix);
$self->logger->log_stamped("Done.\n");
$self->logger->info("Done.\n", 0, 'stamped');
}
# write scoring matrix to file
......@@ -142,15 +142,15 @@ sub exonerate_score {
if (-s $exonerate_cache) {
# read from file
$self->logger->log_stamped("Reading exonerate matrix from file...\n");
$self->logger->log("Cache file $exonerate_cache.\n", 1);
$self->logger->info("Reading exonerate matrix from file...\n", 0, 'stamped');
$self->logger->info("Cache file $exonerate_cache.\n", 1);
$exonerate_matrix->read_from_file;
$self->logger->log_stamped("Done.\n");
$self->logger->info("Done.\n", 0, 'stamped');
} else {
# build scoring matrix
$self->logger->log("No exonerate matrix found. Will build new one.\n");
$self->logger->info("No exonerate matrix found. Will build new one.\n");
# dump exons to fasta files
my $dump_count = $self->dump_filtered_exons($matrix);
......@@ -164,7 +164,7 @@ sub exonerate_score {
} else {
$self->logger->log("No source and/or target exons dumped, so don't need to run exonerate.\n");
$self->logger->info("No source and/or target exons dumped, so don't need to run exonerate.\n");
}
......@@ -193,7 +193,7 @@ sub build_overlap_scores {
}
# get sorted list of exon containers
$self->logger->log_stamped("Reading sorted exons from cache...\n", 1);
$self->logger->info("Reading sorted exons from cache...\n", 1, 'stamped');
my @source_exons = $self->sort_exons(
[values %{ $self->cache->get_by_name('exons_by_id', 'source') }]
......@@ -202,7 +202,7 @@ sub build_overlap_scores {
[values %{ $self->cache->get_by_name('exons_by_id', 'target') }]
);
$self->logger->log_stamped("Done.\n", 1);
$self->logger->info("Done.\n", 1, 'stamped');
# get first source and target exon container
my $source_ec = shift(@source_exons);
......@@ -211,7 +211,7 @@ sub build_overlap_scores {
my %source_overlap = ();
my %target_overlap = ();
$self->logger->log_stamped("Scoring...\n", 1);
$self->logger->info("Scoring...\n", 1, 'stamped');
while ($source_ec or $target_ec) {
......@@ -243,7 +243,8 @@ sub build_overlap_scores {
next if (defined($matrix->get_score(
$source_ec->[0]->id, $target_exon->id)));
$self->overlap_score($source_ec->[0], $target_exon, $matrix);
$self->calc_overlap_score($source_ec->[0], $target_exon,
$matrix);
}
}
......@@ -265,7 +266,7 @@ sub build_overlap_scores {
next if (defined($matrix->get_score(
$source_exon->id, $target_ec->[0]->id)));
$self->overlap_score($source_exon, $target_ec->[0], $matrix);
$self->calc_overlap_score($source_exon, $target_ec->[0], $matrix);
}
}
......@@ -274,7 +275,7 @@ sub build_overlap_scores {
}
}
$self->logger->log_stamped("Done.\n", 1);
$self->logger->info("Done.\n", 1, 'stamped');
return $matrix;
}
......@@ -312,7 +313,7 @@ sub compare_exon_containers {
# region by exons sizes. 1.0 is full overlap on both exons. Score of at least
# 0,5 are added to the exon scoring matrix.
#
sub overlap_score {
sub calc_overlap_score {
my $self = shift;
my $source_exon = shift;
my $target_exon = shift;
......@@ -361,8 +362,8 @@ sub run_exonerate {
my $source_file = $self->exon_fasta_file('source');
my $target_file = $self->exon_fasta_file('target');
my $source_size = -s $source_size;
my $target_size = -s $target_size;
my $source_size = -s $source_file;
my $target_size = -s $target_file;
# check if fasta files exist and are not empty
unless ($source_size and $target_size) {
......@@ -373,21 +374,21 @@ sub run_exonerate {
my $logpath = ($self->conf->param('logpath')||$self->cache->dump_path).
'/lsf_exonerate';
system("rm -rf $logpath") == 0 or
$self->logger->log_error("Unable to delete lsf log dir $logpath: $!\n");
$self->logger->error("Unable to delete lsf log dir $logpath: $!\n");
system("mkdir -p $logpath") == 0 or
$self->logger->log_error("Can't create lsf log dir $logpath: $!\n");
$self->logger->error("Can't create lsf log dir $logpath: $!\n");
# delete exonerate output from previous runs
my $dumppath = $self->cache->dump_path;
opendir(DUMPDIR, $dumppath) or
$self->logger->log_error("Can't open $dumppath for reading: $!");
$self->logger->error("Can't open $dumppath for reading: $!");
while (defined(my $file = readdir(DUMPDIR))) {
next unless /exonerate_map\.\d+/;
unlink("$dumppath/$file") or
$self->logger->log_error("Can't delete $dumppath/$file: $!");
$self->logger->error("Can't delete $dumppath/$file: $!");
}
closedir(DUMPDIR);
......@@ -404,29 +405,37 @@ sub run_exonerate {
#
# run exonerate jobs using lsf
#
my $exonerate_job = "bsub -J $lsf_name[1-$num_jobs] " .
" -o $logpath/exonerate.%I.out -e $logpath/exonerate.%I.err" .
" -q normal -m bc_hosts " .
" $exonerate_path $source_file $target_file" .
" --querychunkid \$LSB_JOBINDEX --querychunktotal $num_jobs" .
" --model affine:local -M 900 --showalignment FALSE --subopt no" .
" --percent $percent --ryo \"myinfo: \%qi \%ti \%et \%ql \%tl\\n\"" .
" | grep '^myinfo:' > $dumppath/exonerate_map.\$LSB_JOBINDEX";
$self->logger->log("Submitting $num_jobs exonerate jobs to lsf:\n\n");
$self->logger->log("$exonerate_job\n\n");
local *BSUB;
open BSUB, "|bsub -J$lsf_name\[1-$num_jobs\] -o $logpath/exonerate.\%I.out"
or $self->logger->error("Could not open open pipe to bsub: $!\n");
my $exonerate_job = qq{$exonerate_path } .
qq{--query $source_file --target $target_file } .
q{--querychunkid $LSB_JOBINDEX } .
qq{--querychunktotal $num_jobs } .
q{--model affine:local -M 900 --showalignment FALSE --subopt no } .
qq{--percent $percent } .
q{--ryo 'myinfo: %qi %ti %et %ql %tl\n' } .
qq{| grep '^myinfo:' > $dumppath/exonerate_map.\$LSB_JOBINDEX} . "\n";
$self->logger->info("Submitting $num_jobs exonerate jobs to lsf:\n\n");
$self->logger->info("$exonerate_job\n\n");
system("$exonerate_job") == 0
or $self->logger->log_error("Error submitting exonerate jobs: $!");
print BSUB $exonerate_job;
$self->logger->error("Error submitting exonerate jobs: $!\n")
unless ($? == 0);
close BSUB;
# submit depended job to monitor finishing of exonerate jobs
$self->logger->log_stamped("Waiting for exonerate jobs to finish...\n");
$self->logger->info("Waiting for exonerate jobs to finish...\n", 0, 'stamped');
my $depended_job = "bsub -K -w ended($lsf_name) -q small " .
" -o $logpath/exonerate_depend.out -e $logpath/exonerate_depend.err" .
" /bin/true";
my $dependent_job = qq{bsub -K -w "ended($lsf_name)" -q small } .
qq{-o $logpath/exonerate_depend.out /bin/true};
$self->logger->log_stamepd("All exonerate jobs finished.\n");
system($dependent_job) == 0 or
$self->logger->error("Error submitting dependent job: $!\n");
$self->logger->info("All exonerate jobs finished.\n", 0, 'stamped');
#
# check results
......@@ -446,18 +455,18 @@ sub run_exonerate {
}
if (@missing) {
$self->logger->log("Couldn't find all exonerate output files. These are missing:\n");
$self->logger->info("Couldn't find all exonerate output files. These are missing:\n");
foreach (@missing) {
$self->logger->log("$_\n", 1);
$self->logger->info("$_\n", 1);
}
exit(1);
}
if (@error) {
$self->logger->log("One or more exonerate jobs failed. Check these error files:\n");
$self->logger->info("One or more exonerate jobs failed. Check these error files:\n");
foreach (@error) {
$self->logger->log("$_\n", 1);
$self->logger->info("$_\n", 1);
}
exit(1);
......@@ -506,7 +515,7 @@ sub write_filtered_exons {
throw('You must provide a ScoredMappingMatrix.');
}
$self->logger->log_stamped("\nDumping $type exons to fasta file...\n");
$self->logger->info("\nDumping $type exons to fasta file...\n", 0, 'stamped');
# don't dump exons shorter than this
my $min_exon_length = $self->conf->param('min_exon_length') || 15;
......@@ -552,11 +561,11 @@ sub write_filtered_exons {
# log
my $fmt = "%-30s%10s\n";
my $size = -e $file;
$self->logger->log(sprintf($fmt, 'Total exons:', $total_exons), 1);
$self->logger->log(sprintf($fmt, 'Dumped exons:', $dumped_exons), 1);
$self->logger->log(sprintf($fmt, 'Dump file size:', parse_bytes($size)), 1);
$self->logger->log_stamped("Done.\n\n");
my $size = -s $file;
$self->logger->info(sprintf($fmt, 'Total exons:', $total_exons), 1);
$self->logger->info(sprintf($fmt, 'Dumped exons:', $dumped_exons), 1);
$self->logger->info(sprintf($fmt, 'Dump file size:', parse_bytes($size)), 1);
$self->logger->info("Done.\n\n", 0, 'stamped');
return $dumped_exons;
}
......@@ -571,21 +580,20 @@ sub parse_exonerate_results {
throw('You must provide a ScoredMappingMatrix.');
}
$self->logger->log_stamped("Parsing exonerate results...\n");
$self->logger->info("Parsing exonerate results...\n", 0, 'stamped');
# loop over all result files
my $dumppath = $self->cache->dump_path;
my $num_files = 0;
my $num_lines = 0;
opendir(DUMPDIR, $dumppath) or
$self->logger->log_error("Can't open $dumppath for reading: $!");
$self->logger->error("Can't open $dumppath for reading: $!");
while (defined(my $file = readdir(DUMPDIR))) {
next unless /exonerate_map\.\d+/;
next unless $file =~ /exonerate_map\.\d+/;
# counters
$num_files++;
my $num_lines = 0;
open(F, '<', "$dumppath/$file");
......@@ -601,7 +609,7 @@ sub parse_exonerate_results {
my $score = 0;
if ($source_length == 0 or $target_length == 0) {
$self->logger->log_warning("Alignment length is 0 for $source_id/$target_id.\n");
$self->logger->warning("Alignment length is 0 for $source_id/$target_id.\n");
} else {
$score = 2 * $match_length / ($source_length + $target_length);
}
......@@ -615,7 +623,7 @@ sub parse_exonerate_results {
closedir(DUMPDIR);
$self->logger->log_stamped("Done parsing $num_lines lines from $num_files result files.\n");
$self->logger->info("Done parsing $num_lines lines from $num_files result files.\n", 0, 'stamped');
return $exonerate_matrix;
}
......
......@@ -52,7 +52,7 @@ sub score_genes {
throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
}
$self->logger->log_stamped("Starting gene scoring...\n\n");
$self->logger->info("Starting gene scoring...\n\n", 0, 'stamped');
# build scores based on transcript scores
my $matrix = $self->scores_from_transcript_scores($transcript_matrix);
......@@ -60,23 +60,23 @@ sub score_genes {
# log stats of combined matrix
my $fmt = "%-40s%10.0f\n";
$self->logger->log("Scoring matrix:\n");
$self->logger->info("Scoring matrix:\n");
$self->logger->log(sprintf($fmt, "Total source genes:",
$self->cache->get_count_by_name('genes_by_id', 'source'), 1);
$self->logger->info(sprintf($fmt, "Total source genes:",
$self->cache->get_count_by_name('genes_by_id', 'source')), 1);
$self->logger->log(sprintf($fmt, "Scored source genes:",
$self->logger->info(sprintf($fmt, "Scored source genes:",
$matrix->get_source_count), 1);
$self->logger->log(sprintf($fmt, "Total target genes:",
$self->cache->get_count_by_name('genes_by_id', 'target'), 1);
$self->logger->info(sprintf($fmt, "Total target genes:",
$self->cache->get_count_by_name('genes_by_id', 'target')), 1);
$self->logger->log(sprintf($fmt, "Scored target genes:",
$self->logger->info(sprintf($fmt, "Scored target genes:",
$matrix->get_target_count), 1);
$self->log_matrix_stats($matrix);
$self->logger->log("\nDone with transcript scoring.\n\n");
$self->logger->info("\nDone with transcript scoring.\n\n");
return $matrix;
}
......@@ -101,19 +101,19 @@ sub scores_from_transcript_scores {
if (-s $gene_cache) {
# read from file
$self->logger->log_stamped("Reading gene scoring matrix from file...\n");
$self->logger->log("Cache file $gene_cache.\n", 1);
$self->logger->info("Reading gene scoring matrix from file...\n", 0, 'stamped');
$self->logger->info("Cache file $gene_cache.\n", 1);
$matrix->read_from_file;
$self->logger->log_stamped("Done.\n");
$self->logger->info("Done.\n", 0, 'stamped');
} else {
# build scoring matrix
$self->logger->log("No gene scoring matrix found. Will build new one.\n");
$self->logger->info("No gene scoring matrix found. Will build new one.\n");
$self->logger->log_stamped("Transcript scoring...\n");
$self->logger->info("Gene scoring...\n", 0, 'stamped');
$matrix = $self->build_scores($matrix, $transcript_matrix);
$self->logger->log_stamped("Done.\n");
$self->logger->info("Done.\n", 0, 'stamped');
# write scoring matrix to file
$matrix->write_to_file;
......@@ -144,7 +144,7 @@ sub build_scores {
$self->flag_matrix_from_transcript_scores($matrix, $transcript_matrix);
# now calculate the actual scores for the genes in the flag matrix
$final_matrix = $self->score_matrix_from_flag_matrix($matrix,
my $final_matrix = $self->score_matrix_from_flag_matrix($matrix,
$transcript_matrix);
return $final_matrix;
......@@ -161,7 +161,7 @@ sub flag_matrix_from_transcript_scores {
my $num_genes =
scalar(keys %{ $self->cache->get_by_name('genes_by_id', 'source') });
$self->logger->log("Creating flag matrix...\n", 1);
$self->logger->info("Creating flag matrix...\n", 1);
# for every transcript scoring matrix entry, make an entry in the gene flag
# matrix.
......@@ -178,7 +178,7 @@ sub flag_matrix_from_transcript_scores {
$matrix->add_score($source_gene->id, $target_gene->id, 1);
}
$self->logger->log("\n\n");
$self->logger->info("\n\n");
return $matrix;
}
......@@ -201,7 +201,7 @@ sub score_matrix_from_flag_matrix {
my $num_genes =
scalar(keys %{ $self->cache->get_by_name('genes_by_id', 'source') });
$self->logger->log("Creating score matrix from flag matrix...\n", 1);
$self->logger->info("Creating score matrix from flag matrix...\n", 1);
# loop over flag matrix and do proper scoring for each entry
foreach my $entry (@{ $flag_matrix->get_all_Entries }) {
......@@ -230,7 +230,7 @@ sub score_matrix_from_flag_matrix {
$matrix->add($entry->source, $entry->target, $score);
}
$self->logger->log("\n\n");
$self->logger->info("\n\n");
return $matrix;
}
......@@ -254,7 +254,7 @@ sub complex_gene_gene_score {
@{ $target_gene->get_all_Transcripts };
# loop over source transcripts
foreach my $source_transcript (@{ $source_gene->get_all_Transcripts ) {
foreach my $source_transcript (@{ $source_gene->get_all_Transcripts }) {
# now loop over target transcripts and find the highest scoring target
# transcript belonging to the target gene
......@@ -262,7 +262,7 @@ sub complex_gene_gene_score {
foreach my $target_transcript_id (@{ $transcript_matrix->get_targets_for_source($source_transcript->id) }) {
next unless (%target_transcripts{$target_transcript_id});
next unless ($target_transcripts{$target_transcript_id});
my $score = $transcript_matrix->get_score(
$source_transcript->id, $target_transcript_id);
......@@ -281,7 +281,7 @@ sub complex_gene_gene_score {
@{ $source_gene->get_all_Transcripts };
# loop over target transcripts
foreach my $target_transcript (@{ $target_gene->get_all_Transcripts ) {
foreach my $target_transcript (@{ $target_gene->get_all_Transcripts }) {
# now loop over source transcripts and find the highest scoring source
# transcript belonging to the source gene
......@@ -289,7 +289,7 @@ sub complex_gene_gene_score {
foreach my $source_transcript_id (@{ $transcript_matrix->get_sources_for_target($target_transcript->id) }) {
next unless (%source_transcripts{$source_transcript_id});
next unless ($source_transcripts{$source_transcript_id});
my $score = $transcript_matrix->get_score(
$source_transcript_id, $target_transcript->id);
......@@ -306,14 +306,14 @@ sub complex_gene_gene_score {
# calculate overall score for this gene
my $gene_score = 0;
if (($source_gene_length + $target_gene_length) > 0) {
if (($source_gene->length + $target_gene->length) > 0) {
$gene_score = ($source_gene_score + $target_gene_score) /
($source_gene_length + $target_gene_length);
($source_gene->length + $target_gene->length);
} else {
$self->logger->log_warning("Combined length of source (".$source_gene->id.") and target (".$target_gene->id.") gene is zero!\n", 1);
$self->logger->warning("Combined length of source (".$source_gene->id.") and target (".$target_gene->id.") gene is zero!\n", 1);
}
......@@ -326,7 +326,7 @@ sub complex_gene_gene_score {
# This is used when the more elaborate gene representing score does not
# distinguish very well.
#
sub complex_gene_gene_score {
sub simple_gene_gene_score {
my $self = shift;
my $source_gene = shift;
my $target_gene = shift;
......@@ -334,8 +334,8 @@ sub complex_gene_gene_score {
my $gene_score = 0;
foreach my $source_transcript (@{ $source_gene->get_all_Transcripts ) {
foreach my $target_transcript (@{ $target_gene->get_all_Transcripts ) {
foreach my $source_transcript (@{ $source_gene->get_all_Transcripts }) {
foreach my $target_transcript (@{ $target_gene->get_all_Transcripts }) {
my $score = $transcript_matrix->get_score($source_transcript->id,
$target_transcript->id);
......
......@@ -111,15 +111,15 @@ sub log_matrix_stats {
my $fmt1 = "%-40s%10.0f\n";
my $fmt2 = "%-40s%10.2f\n";
$self->logger->log(sprintf($fmt1, "Scoring matrix entries:",
$self->logger->info(sprintf($fmt1, "Scoring matrix entries:",
$matrix->get_entry_count), 1);
$self->logger->log(sprintf($fmt2, "Average score:",
$self->logger->info(sprintf($fmt2, "Average score:",
$matrix->get_average_score), 1);
my ($min, $max) = @{ $matrix->get_min_max_scores };
$self->logger->log(sprintf($fmt2, "Min. score:", $min), 1);
$self->logger->log(sprintf($fmt2, "Max. score:", $max), 1);
$self->logger->info(sprintf($fmt2, "Min. score:", $min), 1);
$self->logger->info(sprintf($fmt2, "Max. score:", $max), 1);
}
......
......@@ -156,27 +156,32 @@ sub get_all_Entries {
sub get_all_sources {
return [keys %{ $_->{'source_list'} }];
my $self = shift;
return [keys %{ $self->{'cache'}->{'source_list'} }];
}
sub get_all_targets {
return [keys %{ $_->{'target_list'} }];
my $self = shift;
return [keys %{ $self->{'cache'}->{'target_list'} }];
}
sub get_entry_count {
return scalar(keys %{ $_->{'matrix'} });
my $self = shift;
return scalar(keys %{ $self->{'cache'}->{'matrix'} });
}
sub get_source_count {
return scalar(keys %{ $_->{'source_list'} });
my $self = shift;
return scalar(keys %{ $self->{'cache'}->{'source_list'} });
}
sub get_target_count {
return scalar(keys %{ $_->{'target_list'} });
my $self = shift;
return scalar(keys %{ $self->{'cache'}->{'target_list'} });
}
......@@ -228,10 +233,10 @@ sub merge {
my $c = 0;
foreach my $key (keys %{ $matrix->{'matrix'} }) {
foreach my $key (keys %{ $matrix->{'cache'}->{'matrix'} }) {
if (!defined($self->{'cache'}->{'matrix'}->{$key}) or
$self->{'cache'}->{'matrix'}->{$key} < $matrix->{'matrix'}->{$key}) {
$self->{'cache'}->{'matrix'}->{$key} = $matrix->{'matrix'}->{$key};
$self->{'cache'}->{'matrix'}->{$key} < $matrix->{'cache'}->{'matrix'}->{$key}) {
$self->{'cache'}->{'matrix'}->{$key} = $matrix->{'cache'}->{'matrix'}->{$key};
$c++;
}
}
......
......@@ -52,7 +52,7 @@ sub score_transcripts {
throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
}
$self->logger->log_stamped("Starting transcript scoring...\n\n");
$self->logger->info("Starting transcript scoring...\n\n", 0, 'stamped');
# build scores based on exon scores
my $matrix = $self->scores_from_exon_scores($exon_matrix);
......@@ -60,23 +60,23 @@ sub score_transcripts {
# log stats of combined matrix
my $fmt = "%-40s%10.0f\n";
$self->logger->log("Scoring matrix:\n");
$self->logger->info("Scoring matrix:\n");
$self->logger->log(sprintf($fmt, "Total source transcripts:",
$self->cache->get_count_by_name('transcripts_by_id', 'source'), 1);
$self->logger->info(sprintf($fmt, "Total source transcripts:",
$self->cache->get_count_by_name('transcripts_by_id', 'source')), 1);
$self->logger->log(sprintf($fmt, "Scored source transcripts:",
$self->logger->info(sprintf($fmt, "Scored source transcripts:",
$matrix->get_source_count), 1);
$self->logger->log(sprintf($fmt, "Total target transcripts:",
$self->cache->get_count_by_name('transcripts_by_id', 'target'), 1);
$self->logger->info(sprintf($fmt, "Total target transcripts:",
$self->cache->get_count_by_name('transcripts_by_id', 'target')), 1);
$self->logger->log(sprintf($fmt, "Scored target transcripts:",
$self->logger->info(sprintf($fmt, "Scored target transcripts:",
$matrix->get_target_count), 1);
$self->log_matrix_stats($matrix);
$self->logger->log("\nDone with transcript scoring.\n\n");
$self->logger->info("\nDone with transcript scoring.\n\n");
return $matrix;
}
......@@ -101,19 +101,19 @@ sub scores_from_exon_scores {
if (-s $transcript_cache) {
# read from file
$self->logger->log_stamped("Reading transcript scoring matrix from file...\n");
$self->logger->log("Cache file $transcript_cache.\n", 1);
$self->logger->info("Reading transcript scoring matrix from file...\n", 0, 'stamped');
$self->logger->info("Cache file $transcript_cache.\n", 1);
$matrix->read_from_file;
$self->logger->log_stamped("Done.\n");
$self->logger->info("Done.\n", 0, 'stamped');
} else {
# build scoring matrix
$self->logger->log("No transcript scoring matrix found. Will build new one.\n");
$self->logger->info("No transcript scoring matrix found. Will build new one.\n");
$self->logger->log_stamped("Transcript scoring...\n");
$self->logger->info("Transcript scoring...\n", 0, 'stamped');
$matrix = $self->build_scores($matrix, $exon_matrix);
$self->logger->log_stamped("Done.\n");
$self->logger->info("Done.\n", 0, 'stamped');
# write scoring matrix to file
$matrix->write_to_file;
......@@ -144,7 +144,8 @@ sub build_scores {
$self->flag_matrix_from_exon_scores($matrix, $exon_matrix);
# now calculate the actual scores for the transcripts in the flag matrix
$final_matrix = $self->score_matrix_from_flag_matrix($matrix, $exon_matrix);
my $final_matrix =
$self->score_matrix_from_flag_matrix($matrix, $exon_matrix);
return $final_matrix;
}
......@@ -160,7 +161,7 @@ sub flag_matrix_from_exon_scores {
my $num_transcripts =
scalar(keys %{ $self->cache->get_by_name('transcripts_by_id', 'source') });
$self->logger->log("Creating flag matrix...\n", 1);
$self->logger->info("Creating flag matrix...\n", 1);
# loop over source transcripts
foreach my $source_transcript (values %{ $self->cache->get_by_name('transcripts_by_id', 'source') }) {
......@@ -169,7 +170,7 @@ sub flag_matrix_from_exon_scores {
$self->logger->log_progress($num_transcripts, ++$i, 20, 1, 0);
# get all exons for the source transcript
foreach my $source_exon (@{ $souce_transcript->get_all_Exons }) {
foreach my $source_exon (@{ $source_transcript->get_all_Exons }) {
# get target exons for this source exon from scoring matrix
foreach my $target_exon_id (@{ $exon_matrix->get_targets_for_source($source_exon->id) }) {
......@@ -178,14 +179,14 @@ sub flag_matrix_from_exon_scores {
foreach my $target_transcript (@{ $self->cache->get_by_key('transcripts_by_exon_id', 'target', $target_exon_id) }) {
# add scoring flag for these two transcripts
$matrix->add_score($source_transcript->id, $target_transcript->id, 1);
$matrix->add_score($source_transcript->id, $target_transcript->id, 1);
}
}
}
}
$self->logger->log("\n\n");
$self->logger->info("\n\n");
return $matrix;
}
......@@ -210,7 +211,7 @@ sub score_matrix_from_flag_matrix {
my $num_transcripts =
scalar(keys %{ $self->cache->get_by_name('transcripts_by_id', 'source') });
$self->logger->log("Creating score matrix from flag matrix...\n", 1);
$self->logger->info("Creating score matrix from flag matrix...\n", 1);
# loop over source transcripts
foreach my $source_transcript (values %{ $self->cache->get_by_name('transcripts_by_id', 'source') }) {
......@@ -247,7 +248,7 @@ sub score_matrix_from_flag_matrix {
foreach my $target_exon_id (@{ $exon_matrix->get_targets_for_source($source_exon->id) }) {
next unless (%target_exon{$target_exon_id});
next unless ($target_exons{$target_exon_id});
my $score = $exon_matrix->get_score(
$source_exon->id, $target_exon_id);
......@@ -266,7 +267,7 @@ sub score_matrix_from_flag_matrix {
foreach my $source_exon_id (@{ $exon_matrix->get_sources_for_target($target_exon->id) }) {
next unless (%source_exon{$source_exon_id});
next unless ($source_exons{$source_exon_id});
my $score = $exon_matrix->get_score(
$source_exon_id, $target_exon->id);
......@@ -287,7 +288,7 @@ sub score_matrix_from_flag_matrix {
if (($source_transcript_score > $source_transcript_length) or
($target_transcript_score > $target_transcript_length)) {
$self->logger->log_warning("Score > length for source ($source_target_score <> $source_transcript_length) or target ($target_transcript_score <> $target_transcript_length).\n", 1);
$self->logger->warning("Score > length for source ($source_transcript_score <> $source_transcript_length) or target ($target_transcript_score <> $target_transcript_length).\n", 1);
} else {
......@@ -305,14 +306,14 @@ sub score_matrix_from_flag_matrix {
} else {
$self->logger->log_warning("Combined length of source (".$source_transcript->id.") and target (".$target_transcript->id.") transcript is zero!\n", 1);
$self->logger->warning("Combined length of source (".$source_transcript->id.") and target (".$target_transcript->id.") transcript is zero!\n", 1);
}
}
}
$self->logger->log("\n\n");
$self->logger->info("\n\n");
return $matrix;
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment