Skip to content
Snippets Groups Projects
Commit cb5a62c7 authored by Andreas Kusalananda Kähäri's avatar Andreas Kusalananda Kähäri
Browse files

Add patches from Dan S. to allow the stable ID mapping pipeline to run

on new species (i.e. species without previous stable IDs).

Untested at the moment.
parent 888f38d8
No related branches found
No related tags found
No related merge requests found
...@@ -678,6 +678,34 @@ sub non_mapped_transcript_rescore { ...@@ -678,6 +678,34 @@ sub non_mapped_transcript_rescore {
my @target_transcripts = @{ $self->cache->get_by_key( my @target_transcripts = @{ $self->cache->get_by_key(
'transcripts_by_exon_id', 'target', $entry->target) }; 'transcripts_by_exon_id', 'target', $entry->target) };
# EG reworking of logic to allow no source/target e.g. for new
# species in multispecies databases
my $st =
$self->cache()
->get_by_key( 'transcripts_by_exon_id', 'source',
$entry->source() );
my @source_transcripts;
if ( !defined($st) ) {
$self->logger->warning(
"Can't find source transcipts by exon_id for "
. $entry->source() );
} else {
@source_transcripts = @{$st};
}
my $tt =
$self->cache()
->get_by_key( 'transcripts_by_exon_id', 'target',
$entry->target() );
my @target_transcripts = ();
if ( !defined($tt) ) {
$self->logger->warning(
"Can't find target transcipts by exon_id for "
. $entry->target() );
} else {
@target_transcripts = @{$tt};
}
my $found_mapped = 0; my $found_mapped = 0;
TR: TR:
......
...@@ -733,28 +733,33 @@ sub create_summary_email { ...@@ -733,28 +733,33 @@ sub create_summary_email {
print $fh "\n"; print $fh "\n";
# # EG genes_lost.txt file may not exist if species is new
# clicklist of first 10 deleted genes if ( $self->file_exists( 'genes_lost.txt', 'debug' ) ) {
# #
print $fh qq(\nFirst 10 deleted known genes:\n); # clicklist of first 10 deleted genes
print $fh qq(=============================\n\n); #
my $in_fh = $self->get_filehandle( 'genes_lost.txt', 'debug', '<' ); print $fh qq(\nFirst 10 deleted known genes:\n);
my $prefix = $self->conf->param('urlprefix'); print $fh qq(=============================\n\n);
my $i;
while (<$in_fh>) { my $in_fh = $self->get_filehandle( 'genes_lost.txt', 'debug', '<' );
last if ( ++$i > 10 ); my $prefix = $self->conf->param('urlprefix');
my $i;
chomp; while (<$in_fh>) {
my ( $stable_id, $type ) = split(/\s+/); last if ( ++$i > 10 );
next unless ( $type eq 'known' ); chomp;
my ( $stable_id, $type ) = split(/\s+/);
print $fh sprintf( $fmt2, $stable_id, "${prefix}$stable_id" ); next unless ( $type eq 'known' );
}
print $fh sprintf( $fmt2, $stable_id, "${prefix}$stable_id" );
}
close($in_fh);
} ## end if ( $self->file_exists...)
close($in_fh);
close($fh); close($fh);
} }
......
...@@ -120,30 +120,41 @@ sub initial_stable_id { ...@@ -120,30 +120,41 @@ sub initial_stable_id {
my $self = shift; my $self = shift;
my $type = shift; my $type = shift;
my $init_stable_id; # EG modifications to permit the current stable ID to persist trohough
# different invocations
# use stable ID from configuration if set my $init_stable_id = $self->{stable_id_list}{$type};
if ($init_stable_id = $self->conf->param("starting_${type}_stable_id")) {
$self->logger->debug("Using pre-configured $init_stable_id as base for new $type stable IDs.\n"); if ( !defined($init_stable_id) ) {
return $init_stable_id; # use stable ID from configuration if set
} if ( $init_stable_id =
$self->conf->param("starting_${type}_stable_id") )
my $s_dba = $self->cache->get_DBAdaptor('source'); {
my $s_dbh = $s_dba->dbc->db_handle; $self->logger->debug( "Using pre-configured $init_stable_id "
. "as base for new $type stable IDs.\n" );
# look in the ${type}_stable_id table first return $init_stable_id;
my $sql = qq(SELECT MAX(stable_id) FROM ${type}_stable_id); }
$init_stable_id = $self->fetch_value_from_db($s_dbh, $sql);
# also look in gene_archive to make sure there are no larger Ids there my $s_dba = $self->cache->get_DBAdaptor('source');
unless ($type eq 'exon') { my $s_dbh = $s_dba->dbc->db_handle;
$sql = qq(SELECT MAX(${type}_stable_id) FROM gene_archive);
my $archived_stable_id = $self->fetch_value_from_db($s_dbh, $sql); # look in the ${type}_stable_id table first
if ($archived_stable_id and $self->is_valid($archived_stable_id) and my $sql = qq(SELECT MAX(stable_id) FROM ${type}_stable_id);
($archived_stable_id gt $init_stable_id)) { $init_stable_id = $self->fetch_value_from_db( $s_dbh, $sql );
$init_stable_id = $archived_stable_id;
# also look in gene_archive to make sure there are no larger Ids
# there
unless ( $type eq 'exon' ) {
$sql = qq(SELECT MAX(${type}_stable_id) FROM gene_archive);
my $archived_stable_id =
$self->fetch_value_from_db( $s_dbh, $sql );
if ( $archived_stable_id
and $self->is_valid($archived_stable_id)
and ( $archived_stable_id gt $init_stable_id ) )
{
$init_stable_id = $archived_stable_id;
}
} }
} } ## end if ( !defined($init_stable_id...))
if ($init_stable_id) { if ($init_stable_id) {
# since $init_stable_id now is the highest existing stable Id for this # since $init_stable_id now is the highest existing stable Id for this
......
...@@ -187,9 +187,10 @@ sub map_stable_ids { ...@@ -187,9 +187,10 @@ sub map_stable_ids {
# check if there are any objects of this type at all # check if there are any objects of this type at all
my %all_sources = %{ $self->cache->get_by_name("${type}s_by_id", 'source') }; my %all_sources = %{ $self->cache->get_by_name("${type}s_by_id", 'source') };
my %all_targets = %{ $self->cache->get_by_name("${type}s_by_id", 'target') }; my %all_targets = %{ $self->cache->get_by_name("${type}s_by_id", 'target') };
unless (scalar(keys %all_sources)) { if ( scalar( keys(%all_sources) ) == 0 ) {
$self->logger->info("No cached ${type}s found.\n\n"); # EG may be possible to have no sources for new species
return; $self->logger->warning("No cached ${type}s found.\n\n");
%all_sources = ();
} }
my %stats = map { $_ => 0 } my %stats = map { $_ => 0 }
...@@ -637,24 +638,31 @@ sub generate_mapping_stats { ...@@ -637,24 +638,31 @@ sub generate_mapping_stats {
my $novel_total = $stats->{'mapped_novel'} + $stats->{'lost_novel'}; my $novel_total = $stats->{'mapped_novel'} + $stats->{'lost_novel'};
# no split into known and novel for exons # no split into known and novel for exons
unless ( $type eq 'exon' ) { if ( $type ne 'exon' ) {
$result .= sprintf( $fmt2, $result .= sprintf( $fmt2, 'known',
'known', $stats->{'mapped_known'},
$stats->{'mapped_known'}, $stats->{'lost_known'}, (
$stats->{'lost_known'}, $known_total
($known_total ? $stats->{'mapped_known'}/$known_total*100 : 0) ? $stats->{'mapped_known'}/$known_total*100
); : 0 ) );
$result .= sprintf( $fmt2, 'novel',
$stats->{'mapped_novel'},
$stats->{'lost_novel'}, (
$novel_total
? $stats->{'mapped_novel'}/$novel_total*100
: 0 ) );
}
if ( $mapped_total == 0 ) {
# EG different calculation needed when no mappings found for new
# species
$result .= sprintf( $fmt2, 'total', $mapped_total, $lost_total, 0 );
} else {
$result .= sprintf( $fmt2, $result .= sprintf( $fmt2,
'novel', 'total', $mapped_total, $lost_total,
$stats->{'mapped_novel'}, $mapped_total/( $known_total + $novel_total )*100 );
$stats->{'lost_novel'}, }
($novel_total ? $stats->{'mapped_novel'}/$novel_total*100 : 0)
);
} ## end unless ( $type eq 'exon' )
$result .= sprintf($fmt2, 'total', $mapped_total, $lost_total,
$mapped_total/($known_total + $novel_total)*100);
# log result # log result
$self->logger->info($result."\n"); $self->logger->info($result."\n");
......
...@@ -376,12 +376,15 @@ sub rescore_gene_matrix_lsf { ...@@ -376,12 +376,15 @@ sub rescore_gene_matrix_lsf {
is_component => 1, is_component => 1,
); );
my $cmd = qq{$Bin/synteny_rescore.pl $options --index \$LSB_JOBINDEX}; my $cmd = qq{perl -I./modules $Bin/synteny_rescore.pl }
. qq{$options --index \$LSB_JOBINDEX};
my $pipe = qq{|bsub -J$lsf_name\[1-$num_jobs\] } .
qq{-o $logpath/synteny_rescore.\%I.out } . my $pipe =
qq{-e $logpath/synteny_rescore.\%I.err } . qq{|bsub -J$lsf_name\[1-$num_jobs\] }
$self->conf->param('lsf_opt_synteny_rescore'); . qq{-o $logpath/synteny_rescore.\%I.out }
. qq{-e $logpath/synteny_rescore.\%I.err }
. $self->conf()->param('lsf_opt_run')
. $self->conf()->param('lsf_opt_synteny_rescore');
# run lsf job array # run lsf job array
$self->logger->info("Submitting $num_jobs jobs to lsf.\n"); $self->logger->info("Submitting $num_jobs jobs to lsf.\n");
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment