diff --git a/modules/Bio/EnsEMBL/IdMapping/ExonScoreBuilder.pm b/modules/Bio/EnsEMBL/IdMapping/ExonScoreBuilder.pm index c3908f6a74b100c15878a937f08f90c6c93b57c7..4101c280d9c0d5766ca5d212b2dbbb8ac5d2512d 100644 --- a/modules/Bio/EnsEMBL/IdMapping/ExonScoreBuilder.pm +++ b/modules/Bio/EnsEMBL/IdMapping/ExonScoreBuilder.pm @@ -678,6 +678,34 @@ sub non_mapped_transcript_rescore { my @target_transcripts = @{ $self->cache->get_by_key( 'transcripts_by_exon_id', 'target', $entry->target) }; + # EG reworking of logic to allow no source/target e.g. for new + # species in multispecies databases + my $st = + $self->cache() + ->get_by_key( 'transcripts_by_exon_id', 'source', + $entry->source() ); + my @source_transcripts; + if ( !defined($st) ) { + $self->logger->warning( + "Can't find source transcipts by exon_id for " + . $entry->source() ); + } else { + @source_transcripts = @{$st}; + } + + my $tt = + $self->cache() + ->get_by_key( 'transcripts_by_exon_id', 'target', + $entry->target() ); + my @target_transcripts = (); + if ( !defined($tt) ) { + $self->logger->warning( + "Can't find target transcipts by exon_id for " + . $entry->target() ); + } else { + @target_transcripts = @{$tt}; + } + my $found_mapped = 0; TR: diff --git a/modules/Bio/EnsEMBL/IdMapping/ResultAnalyser.pm b/modules/Bio/EnsEMBL/IdMapping/ResultAnalyser.pm index b6e7259c1e0f208a848f08946c9b6d3598338674..2c3476402967fd7a854720a003b475991e4d4222 100644 --- a/modules/Bio/EnsEMBL/IdMapping/ResultAnalyser.pm +++ b/modules/Bio/EnsEMBL/IdMapping/ResultAnalyser.pm @@ -733,28 +733,33 @@ sub create_summary_email { print $fh "\n"; - # - # clicklist of first 10 deleted genes - # - print $fh qq(\nFirst 10 deleted known genes:\n); - print $fh qq(=============================\n\n); + # EG genes_lost.txt file may not exist if species is new + if ( $self->file_exists( 'genes_lost.txt', 'debug' ) ) { + # + # clicklist of first 10 deleted genes + # - my $in_fh = $self->get_filehandle( 'genes_lost.txt', 'debug', '<' ); - my $prefix = $self->conf->param('urlprefix'); - my $i; + print $fh qq(\nFirst 10 deleted known genes:\n); + print $fh qq(=============================\n\n); - while (<$in_fh>) { - last if ( ++$i > 10 ); + my $in_fh = $self->get_filehandle( 'genes_lost.txt', 'debug', '<' ); + my $prefix = $self->conf->param('urlprefix'); + my $i; - chomp; - my ( $stable_id, $type ) = split(/\s+/); + while (<$in_fh>) { + last if ( ++$i > 10 ); - next unless ( $type eq 'known' ); + chomp; + my ( $stable_id, $type ) = split(/\s+/); - print $fh sprintf( $fmt2, $stable_id, "${prefix}$stable_id" ); - } + next unless ( $type eq 'known' ); + + print $fh sprintf( $fmt2, $stable_id, "${prefix}$stable_id" ); + } + + close($in_fh); + } ## end if ( $self->file_exists...) - close($in_fh); close($fh); } diff --git a/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblGeneric.pm b/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblGeneric.pm index aa475697813a2813afc111edcd42afe08dbd6b86..2ba059869b0ce45c317ecd4d6cd2e525a23bca12 100644 --- a/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblGeneric.pm +++ b/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblGeneric.pm @@ -120,30 +120,41 @@ sub initial_stable_id { my $self = shift; my $type = shift; - my $init_stable_id; - - # use stable ID from configuration if set - if ($init_stable_id = $self->conf->param("starting_${type}_stable_id")) { - $self->logger->debug("Using pre-configured $init_stable_id as base for new $type stable IDs.\n"); - return $init_stable_id; - } - - my $s_dba = $self->cache->get_DBAdaptor('source'); - my $s_dbh = $s_dba->dbc->db_handle; - - # look in the ${type}_stable_id table first - my $sql = qq(SELECT MAX(stable_id) FROM ${type}_stable_id); - $init_stable_id = $self->fetch_value_from_db($s_dbh, $sql); + # EG modifications to permit the current stable ID to persist trohough + # different invocations + my $init_stable_id = $self->{stable_id_list}{$type}; + + if ( !defined($init_stable_id) ) { + # use stable ID from configuration if set + if ( $init_stable_id = + $self->conf->param("starting_${type}_stable_id") ) + { + $self->logger->debug( "Using pre-configured $init_stable_id " + . "as base for new $type stable IDs.\n" ); + return $init_stable_id; + } - # also look in gene_archive to make sure there are no larger Ids there - unless ($type eq 'exon') { - $sql = qq(SELECT MAX(${type}_stable_id) FROM gene_archive); - my $archived_stable_id = $self->fetch_value_from_db($s_dbh, $sql); - if ($archived_stable_id and $self->is_valid($archived_stable_id) and - ($archived_stable_id gt $init_stable_id)) { - $init_stable_id = $archived_stable_id; + my $s_dba = $self->cache->get_DBAdaptor('source'); + my $s_dbh = $s_dba->dbc->db_handle; + + # look in the ${type}_stable_id table first + my $sql = qq(SELECT MAX(stable_id) FROM ${type}_stable_id); + $init_stable_id = $self->fetch_value_from_db( $s_dbh, $sql ); + + # also look in gene_archive to make sure there are no larger Ids + # there + unless ( $type eq 'exon' ) { + $sql = qq(SELECT MAX(${type}_stable_id) FROM gene_archive); + my $archived_stable_id = + $self->fetch_value_from_db( $s_dbh, $sql ); + if ( $archived_stable_id + and $self->is_valid($archived_stable_id) + and ( $archived_stable_id gt $init_stable_id ) ) + { + $init_stable_id = $archived_stable_id; + } } - } + } ## end if ( !defined($init_stable_id...)) if ($init_stable_id) { # since $init_stable_id now is the highest existing stable Id for this diff --git a/modules/Bio/EnsEMBL/IdMapping/StableIdMapper.pm b/modules/Bio/EnsEMBL/IdMapping/StableIdMapper.pm index 234e6bf7521220ac74a7d9bc55202397f64035c1..5d9fda6926da490e24da7b3c1aa16d0b99d06975 100644 --- a/modules/Bio/EnsEMBL/IdMapping/StableIdMapper.pm +++ b/modules/Bio/EnsEMBL/IdMapping/StableIdMapper.pm @@ -187,9 +187,10 @@ sub map_stable_ids { # check if there are any objects of this type at all my %all_sources = %{ $self->cache->get_by_name("${type}s_by_id", 'source') }; my %all_targets = %{ $self->cache->get_by_name("${type}s_by_id", 'target') }; - unless (scalar(keys %all_sources)) { - $self->logger->info("No cached ${type}s found.\n\n"); - return; + if ( scalar( keys(%all_sources) ) == 0 ) { + # EG may be possible to have no sources for new species + $self->logger->warning("No cached ${type}s found.\n\n"); + %all_sources = (); } my %stats = map { $_ => 0 } @@ -637,24 +638,31 @@ sub generate_mapping_stats { my $novel_total = $stats->{'mapped_novel'} + $stats->{'lost_novel'}; # no split into known and novel for exons - unless ( $type eq 'exon' ) { - $result .= sprintf( $fmt2, - 'known', - $stats->{'mapped_known'}, - $stats->{'lost_known'}, - ($known_total ? $stats->{'mapped_known'}/$known_total*100 : 0) - ); + if ( $type ne 'exon' ) { + $result .= sprintf( $fmt2, 'known', + $stats->{'mapped_known'}, + $stats->{'lost_known'}, ( + $known_total + ? $stats->{'mapped_known'}/$known_total*100 + : 0 ) ); + + $result .= sprintf( $fmt2, 'novel', + $stats->{'mapped_novel'}, + $stats->{'lost_novel'}, ( + $novel_total + ? $stats->{'mapped_novel'}/$novel_total*100 + : 0 ) ); + } + if ( $mapped_total == 0 ) { + # EG different calculation needed when no mappings found for new + # species + $result .= sprintf( $fmt2, 'total', $mapped_total, $lost_total, 0 ); + } else { $result .= sprintf( $fmt2, - 'novel', - $stats->{'mapped_novel'}, - $stats->{'lost_novel'}, - ($novel_total ? $stats->{'mapped_novel'}/$novel_total*100 : 0) - ); - } ## end unless ( $type eq 'exon' ) - - $result .= sprintf($fmt2, 'total', $mapped_total, $lost_total, - $mapped_total/($known_total + $novel_total)*100); + 'total', $mapped_total, $lost_total, + $mapped_total/( $known_total + $novel_total )*100 ); + } # log result $self->logger->info($result."\n"); diff --git a/modules/Bio/EnsEMBL/IdMapping/SyntenyFramework.pm b/modules/Bio/EnsEMBL/IdMapping/SyntenyFramework.pm index dff799eea755adf3017836e6c67779acfe6f92f6..a3d7fcf12617167a075f3c1383c2cab34432e422 100644 --- a/modules/Bio/EnsEMBL/IdMapping/SyntenyFramework.pm +++ b/modules/Bio/EnsEMBL/IdMapping/SyntenyFramework.pm @@ -376,12 +376,15 @@ sub rescore_gene_matrix_lsf { is_component => 1, ); - my $cmd = qq{$Bin/synteny_rescore.pl $options --index \$LSB_JOBINDEX}; - - my $pipe = qq{|bsub -J$lsf_name\[1-$num_jobs\] } . - qq{-o $logpath/synteny_rescore.\%I.out } . - qq{-e $logpath/synteny_rescore.\%I.err } . - $self->conf->param('lsf_opt_synteny_rescore'); + my $cmd = qq{perl -I./modules $Bin/synteny_rescore.pl } + . qq{$options --index \$LSB_JOBINDEX}; + + my $pipe = + qq{|bsub -J$lsf_name\[1-$num_jobs\] } + . qq{-o $logpath/synteny_rescore.\%I.out } + . qq{-e $logpath/synteny_rescore.\%I.err } + . $self->conf()->param('lsf_opt_run') + . $self->conf()->param('lsf_opt_synteny_rescore'); # run lsf job array $self->logger->info("Submitting $num_jobs jobs to lsf.\n");