diff --git a/modules/Bio/EnsEMBL/IdMapping/Cache.pm b/modules/Bio/EnsEMBL/IdMapping/Cache.pm index 44f0ea5efbe1106c739a755e30a7e05db20e9405..32316da4b7a3e620e1e59b28e9dee93c27fd3db5 100644 --- a/modules/Bio/EnsEMBL/IdMapping/Cache.pm +++ b/modules/Bio/EnsEMBL/IdMapping/Cache.pm @@ -322,11 +322,6 @@ sub build_cache_from_genes { ($tr->is_known ? 1 : 0), ]); - # EG to support improved discrimination, include the name - if ( defined( $gene->display_xref() ) ) { - $lgene->gene_name( $gene->display_xref()->display_id() ); - } - $lgene->add_Transcript($ltr); # build transcript caches @@ -372,10 +367,6 @@ sub build_cache_from_genes { $need_project, ]); - # EG to support improved discrimination, include the name of the - # gene tp which exon belongs - $lexon->gene_name( $lgene->gene_name() ); - # get coordinates in common coordinate system if needed if ($need_project) { my @seg = @{ $exon->project($self->highest_common_cs, @@ -389,7 +380,7 @@ sub build_cache_from_genes { $lexon->common_sr_name($sl->seq_region_name); } } - + $ltr->add_Exon($lexon); $self->add('exons_by_id', $type, $exon->dbID, $lexon); @@ -922,28 +913,24 @@ sub fetch_value_from_db { return $c; } + sub get_DBAdaptor { - my ( $self, $prefix ) = @_; + my $self = shift; + my $prefix = shift; - unless ( $self->{'_dba'}->{$prefix} ) { + unless ($self->{'_dba'}->{$prefix}) { # connect to database - my %args = ( -host => $self->conf->param("${prefix}host"), - -port => $self->conf->param("${prefix}port"), - -user => $self->conf->param("${prefix}user"), - -pass => $self->conf->param("${prefix}pass"), - -dbname => $self->conf->param("${prefix}dbname"), - -group => $prefix ); - - if ( defined( $self->conf()->param('species_id') ) ) { # EG - $args{-species_id} = $self->conf->param('species_id'); - $args{-species} = $self->conf->param('species_name'); - $args{-multispecies_db} = 1; - } - - my $dba = new Bio::EnsEMBL::DBSQL::DBAdaptor(%args); - - # explicitely set the dnadb to itself - by default the Registry - # assumes a group 'core' for this now + my $dba = new Bio::EnsEMBL::DBSQL::DBAdaptor( + -host => $self->conf->param("${prefix}host"), + -port => $self->conf->param("${prefix}port"), + -user => $self->conf->param("${prefix}user"), + -pass => $self->conf->param("${prefix}pass"), + -dbname => $self->conf->param("${prefix}dbname"), + -group => $prefix, + ); + + # explicitely set the dnadb to itself - by default the Registry assumes + # a group 'core' for this now $dba->dnadb($dba); $self->{'_dba'}->{$prefix} = $dba; diff --git a/modules/Bio/EnsEMBL/IdMapping/ExonScoreBuilder.pm b/modules/Bio/EnsEMBL/IdMapping/ExonScoreBuilder.pm index 23de8009ad0c25c423a3bff20fe652673417f272..a569d2b2dcc332e29bc6ee5befd7017d254ff334 100644 --- a/modules/Bio/EnsEMBL/IdMapping/ExonScoreBuilder.pm +++ b/modules/Bio/EnsEMBL/IdMapping/ExonScoreBuilder.pm @@ -430,10 +430,7 @@ sub run_exonerate { $self->logger->debug("$exonerate_job\n\n"); local *BSUB; - open BSUB, - "|bsub " - . $self->conf()->param('lsf_opt_run') - . " -J$lsf_name\[1-$num_jobs\] -o $logpath/exonerate.\%I.out" + open BSUB, "|bsub -J$lsf_name\[1-$num_jobs\] -o $logpath/exonerate.\%I.out" or $self->logger->error("Could not open open pipe to bsub: $!\n"); print BSUB $exonerate_job; @@ -444,10 +441,8 @@ sub run_exonerate { # submit dependent job to monitor finishing of exonerate jobs $self->logger->info("Waiting for exonerate jobs to finish...\n", 0, 'stamped'); - my $dependent_job = - qq{bsub -K -w "ended($lsf_name)" } - . $self->conf()->param('lsf_opt_run_small') - . qq{ -o $logpath/exonerate_depend.out /bin/true}; + my $dependent_job = qq{bsub -K -w "ended($lsf_name)" -q small } . + qq{-o $logpath/exonerate_depend.out /bin/true}; system($dependent_job) == 0 or $self->logger->error("Error submitting dependent job: $!\n"); @@ -673,35 +668,10 @@ sub non_mapped_transcript_rescore { foreach my $entry (@{ $matrix->get_all_Entries }) { - # EG reworking of logic to allow no source/target e.g. for new - # species in multispecies databases - my $st = - $self->cache() - ->get_by_key( 'transcripts_by_exon_id', 'source', - $entry->source() ); - - my @source_transcripts = (); - if ( !defined($st) ) { - $self->logger->warning( - "Can't find source transcipts by exon_id for " - . $entry->source() ); - } else { - @source_transcripts = @{$st}; - } - - my $tt = - $self->cache() - ->get_by_key( 'transcripts_by_exon_id', 'target', - $entry->target() ); - - my @target_transcripts = (); - if ( !defined($tt) ) { - $self->logger->warning( - "Can't find target transcipts by exon_id for " - . $entry->target() ); - } else { - @target_transcripts = @{$tt}; - } + my @source_transcripts = @{ $self->cache->get_by_key( + 'transcripts_by_exon_id', 'source', $entry->source) }; + my @target_transcripts = @{ $self->cache->get_by_key( + 'transcripts_by_exon_id', 'target', $entry->target) }; my $found_mapped = 0; @@ -727,84 +697,5 @@ sub non_mapped_transcript_rescore { } -sub name_exon_rescore { - - # EG name_exon_rescore is additional method for rescoring exons based - # on name matches - - my ( $self, $matrix ) = @_; - - if ( !( defined($matrix) - && ref($matrix) - && $matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix') - ) ) - { - throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.'); - } - - my $i = 0; - - foreach my $entry ( @{ $matrix->get_all_Entries() } ) { - my $source_exon = - $self->cache->get_by_key( 'exons_by_id', 'source', - $entry->source() ); - my $target_exon = - $self->cache->get_by_key( 'exons_by_id', 'target', - $entry->target() ); - - if ( defined($source_exon) - && defined($target_exon) - && $source_exon->gene_name() ne $target_exon->gene_name() ) - { - $matrix->set_score( $entry->source(), $entry->target(), - ( $entry->score()*0.75 ) ); - $i++; - } - } - - $self->logger->debug( "Scored exons with name mismatch: $i\n", 1 ); -} ## end sub name_exon_rescore - -sub bounds_exon_rescore { - - # EG additional method for rescoring exons based on bounds matching - - my ( $self, $matrix ) = @_; - - if ( !( defined($matrix) - && ref($matrix) - && $matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix') - ) ) - { - throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.'); - } - - my $i = 0; - - foreach my $entry ( @{ $matrix->get_all_Entries() } ) { - my $source_exon = - $self->cache->get_by_key( 'exons_by_id', 'source', - $entry->source() ); - my $target_exon = - $self->cache->get_by_key( 'exons_by_id', 'target', - $entry->target() ); - - if ( defined($target_exon) - && defined($source_exon) - && ( $source_exon->strand() != $target_exon->strand() - || $source_exon->start() != $target_exon->start() - || $source_exon->end() != $target_exon->end() ) ) - { - my $new_score = ( $entry->score()*0.5 ); - $matrix->set_score( $entry->source(), $entry->target(), - $new_score ); - $i++; - } - } - - $self->logger->debug( "Scored exons with bounds mismatch: $i\n", 1 ); -} ## end sub bounds_exon_rescore - - 1; diff --git a/modules/Bio/EnsEMBL/IdMapping/GeneScoreBuilder.pm b/modules/Bio/EnsEMBL/IdMapping/GeneScoreBuilder.pm index 7e2ee41057273bd447dea0cd40758c65737b7ed1..af65c6da0d27124c8937c4be5c116771913eabbc 100644 --- a/modules/Bio/EnsEMBL/IdMapping/GeneScoreBuilder.pm +++ b/modules/Bio/EnsEMBL/IdMapping/GeneScoreBuilder.pm @@ -424,40 +424,5 @@ sub biotype_gene_rescore { } ## end sub biotype_gene_rescore -sub name_gene_rescore { - - # EG name_gene_rescore is supplementary method to use to discriminate - # near identical genes using existing names - - my ( $self, $matrix ) = @_; - - if ( !( defined($matrix) - && ref($matrix) - && $matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix') - ) ) - { - throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.'); - } - - my $i = 0; - - foreach my $entry ( @{ $matrix->get_all_Entries() } ) { - my $source_gene = - $self->cache->get_by_key( 'genes_by_id', 'source', - $entry->source() ); - my $target_gene = - $self->cache->get_by_key( 'genes_by_id', 'target', - $entry->target() ); - - if ( $source_gene->gene_name() ne $target_gene->gene_name() ) { - $matrix->set_score( $entry->source(), $entry->target(), - ( $entry->score()*0.8 ) ); - $i++; - } - } - - $self->logger->debug( "Scored genes with name mismatch: $i\n", 1 ); -} ## end sub name_gene_rescore - 1; diff --git a/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/EnsemblExonGeneric.pm b/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/EnsemblExonGeneric.pm index bdb8abb3c7b28d524a9ae2944b38cd9ef4deba8c..61ae2a3968d06669f2920d89a1e99caf01c7f0c9 100644 --- a/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/EnsemblExonGeneric.pm +++ b/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/EnsemblExonGeneric.pm @@ -86,60 +86,7 @@ sub mapped_transcript { return ($new_scores, $mappings); } - -sub gene_name { - - # EG gene_name is new supplementary method to support discrimination - # based on name - - my ( $self, $num, $gsb, $mappings, $exon_scores ) = @_; - - $self->logger->info( "Retry with exon name disambiguation...\n", - 0, 'stamped' ); - - if ( !$exon_scores->loaded() ) { - $gsb->name_exon_rescore($exon_scores); - $exon_scores->write_to_file(); - } - - my $new_mappings = - $self->basic_mapping( $exon_scores, "exon_mappings$num" ); - - $num++; - - my $new_scores = - $gsb->create_shrinked_matrix( $exon_scores, $new_mappings, - "exon_matrix$num" ); - - return ( $new_scores, $new_mappings ); -} ## end sub gene_name - -sub bounds { - - # EG new supplementary method to distuinguish based on bounds of exon - - my ( $self, $num, $gsb, $mappings, $exon_scores ) = @_; - - $self->logger->info( "Retry with exon bounds disambiguation...\n", - 0, 'stamped' ); - - if ( !$exon_scores->loaded() ) { - $gsb->bounds_exon_rescore($exon_scores); - $exon_scores->write_to_file(); - } - - my $new_mappings = - $self->basic_mapping( $exon_scores, "exon_mappings$num" ); - - $num++; - - my $new_scores = - $gsb->create_shrinked_matrix( $exon_scores, $new_mappings, - "exon_matrix$num" ); - - return ( $new_scores, $new_mappings ); -} ## end sub bounds - + 1; diff --git a/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/EnsemblGeneGeneric.pm b/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/EnsemblGeneGeneric.pm index bfd8c8d45139cb0c055860d532820789ecd315e1..bf921652366b61b6863aed85b50e2aef5bbcdcdc 100644 --- a/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/EnsemblGeneGeneric.pm +++ b/modules/Bio/EnsEMBL/IdMapping/InternalIdMapper/EnsemblGeneGeneric.pm @@ -184,34 +184,5 @@ sub internal_id { } -sub gene_name { - - # EG gene_name supplementary method for very similar genes (e.g. - # transposons in bacteria) to penalise where gene name is different - - my ( $self, $num, $gsb, $mappings, $gene_scores ) = @_; - - $self->logger->info( "Retry with gene name disambiguation...\n", - 0, 'stamped' ); - - if ( !$gene_scores->loaded() ) { - $gsb->name_gene_rescore($gene_scores); - $gene_scores->write_to_file(); - } - - my $new_mappings = - $self->basic_mapping( $gene_scores, "gene_mappings$num" ); - - $num++; - - my $new_scores = - $gsb->create_shrinked_matrix( $gene_scores, $new_mappings, - "gene_matrix$num" ); - - return ( $new_scores, $new_mappings ); -} ## end sub gene_name - - - 1; diff --git a/modules/Bio/EnsEMBL/IdMapping/ResultAnalyser.pm b/modules/Bio/EnsEMBL/IdMapping/ResultAnalyser.pm index 2c3476402967fd7a854720a003b475991e4d4222..26ff16f92d148b6a21c3fa06919fb275b8efe811 100644 --- a/modules/Bio/EnsEMBL/IdMapping/ResultAnalyser.pm +++ b/modules/Bio/EnsEMBL/IdMapping/ResultAnalyser.pm @@ -733,33 +733,28 @@ sub create_summary_email { print $fh "\n"; - # EG genes_lost.txt file may not exist if species is new - if ( $self->file_exists( 'genes_lost.txt', 'debug' ) ) { - # - # clicklist of first 10 deleted genes - # - - print $fh qq(\nFirst 10 deleted known genes:\n); - print $fh qq(=============================\n\n); - - my $in_fh = $self->get_filehandle( 'genes_lost.txt', 'debug', '<' ); - my $prefix = $self->conf->param('urlprefix'); - my $i; - - while (<$in_fh>) { - last if ( ++$i > 10 ); - - chomp; - my ( $stable_id, $type ) = split(/\s+/); - - next unless ( $type eq 'known' ); + # + # clicklist of first 10 deleted genes + # + print $fh qq(\nFirst 10 deleted known genes:\n); + print $fh qq(=============================\n\n); - print $fh sprintf( $fmt2, $stable_id, "${prefix}$stable_id" ); - } + my $in_fh = $self->get_filehandle('genes_lost.txt', 'debug', '<'); + my $prefix = $self->conf->param('urlprefix'); + my $i; + + while (<$in_fh>) { + last if (++$i > 10); + + chomp; + my ($stable_id, $type) = split(/\s+/); + + next unless ($type eq 'known'); - close($in_fh); - } ## end if ( $self->file_exists...) + print $fh sprintf($fmt2, $stable_id, "${prefix}$stable_id"); + } + close($in_fh); close($fh); } diff --git a/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblBacteria.pm b/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblBacteria.pm deleted file mode 100644 index a46e9d6f4562012218044174f2c668872a54cff0..0000000000000000000000000000000000000000 --- a/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblBacteria.pm +++ /dev/null @@ -1,144 +0,0 @@ -=head1 LICENSE - - Copyright (c) 1999-2010 The European Bioinformatics Institute and - Genome Research Limited. All rights reserved. - - This software is distributed under a modified Apache license. - For license details, please see - - http://www.ensembl.org/info/about/code_licence.html - -=head1 CONTACT - - Please email comments or questions to the public Ensembl - developers list at <ensembl-dev@ebi.ac.uk>. - - Questions may also be sent to the Ensembl help desk at - <helpdesk@ensembl.org>. - -=cut - -package Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblBacteria; -use strict; -use warnings; -no warnings 'uninitialized'; -use Bio::EnsEMBL::Utils::Exception qw(throw warning); -use base qw(Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblGeneric); - -# -# new generator for EnsemblBacteria -# 1. generates EB style IDs -# 2. updates base ID to allow for multi-species DBs to be correctly incremented -# -sub initial_stable_id { - my $self = shift; - my $type = shift; - my $base = $self->get_base(); - # retrieve last good stable ID from hash - my $init_stable_id = $self->{stable_id_list}{$type}; - if ( !$init_stable_id ) { - $self->logger->debug( - "Finding new init_stable_id as base for new $type stable IDs.\n"); - - # use stable ID from configuration if set - if ( $init_stable_id = - $self->conf->param("starting_${type}_stable_id") ) - { - $self->logger->debug( -"Using pre-configured $init_stable_id as base for new $type stable IDs.\n" - ); - return $init_stable_id; - } - my $s_dba = $self->cache->get_DBAdaptor('source'); - my $s_dbh = $s_dba->dbc->db_handle; - - # look in the ${type}_stable_id table first - my $sql = -qq(SELECT MAX(stable_id) FROM ${type}_stable_id where stable_id like '${base}%'); - print $sql; - $init_stable_id = $self->fetch_value_from_db( $s_dbh, $sql ); - - # also look in gene_archive to make sure there are no larger Ids there - unless ( $type eq 'exon' ) { - $sql = qq(SELECT MAX(${type}_stable_id) FROM gene_archive); - my $archived_stable_id = $self->fetch_value_from_db( $s_dbh, $sql ); - if ( $archived_stable_id - and $self->is_valid($archived_stable_id) - and ( $archived_stable_id gt $init_stable_id ) ) - { - $init_stable_id = $archived_stable_id; - } - } - $self->{stable_id_list}{$type} = $init_stable_id; - } else { - $self->logger->debug( -"Using preexisting initial $init_stable_id as base for new $type stable IDs.\n" - ); - } - if ($init_stable_id) { - - # since $init_stable_id now is the highest existing stable Id for this - # object type, we need to increment it to find the first one we want to use - # for new assignments - $init_stable_id = $self->increment_stable_id( $init_stable_id, $type ); - $self->logger->debug( - "Using $init_stable_id as base for new $type stable IDs.\n"); - } else { - $self->logger->warning( - "Can't find highest ${type}_stable_id in source db.\n"); - my $pref = - $self->cache->get_DBAdaptor('target')->get_MetaContainer() - ->list_value_by_key('species.stable_id_prefix'); - if ($pref) { - $init_stable_id = - $pref->[0] . substr( uc($type), 0, 1 ) . '00000000000'; - $self->logger->debug( - "Using $init_stable_id as base for new $type stable IDs.\n"); - } - } - return $init_stable_id; -} - -sub increment_stable_id { - my $self = shift; - my $stable_id = shift; - my $type = shift; - unless ( $self->is_valid($stable_id) ) { - throw("Unknown or missing stable ID: $stable_id."); - } - my $base = $self->get_base(); - $stable_id =~ /$base([A-Z]{1,4})(\d{11})/; - my $number = $2; - my $new_stable_id = $base . $1 . ( ++$number ); - $self->{stable_id_list}{$type} = $new_stable_id; - return $new_stable_id; -} - -=head2 is_valid - - Arg[1] : String $stable_id - the stable Id to check - Example : unless ($generator->is_valid($stable_id)) { - die "Invalid stable Id: $stable_id.\n"; - } - Description : Tests a stable Id to be valid (according to the Ensembl stable - Id format definition). - Return type : Boolean - TRUE if valid, FALSE otherwise - Exceptions : none - Caller : general - Status : At Risk - : under development - -=cut - -sub is_valid { - my ( $self, $stable_id ) = @_; - - my $base = $self->get_base(); - - return ( $stable_id - and ( $stable_id =~ /$base([A-z]{1,4})(\d{11})/ ) ); -} - -sub get_base { return 'EB' } - -1; diff --git a/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblFungi.pm b/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblFungi.pm deleted file mode 100644 index d1cedea771ea0063a6001616e07376a43ff732b3..0000000000000000000000000000000000000000 --- a/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblFungi.pm +++ /dev/null @@ -1,44 +0,0 @@ -=head1 LICENSE - - Copyright (c) 1999-2010 The European Bioinformatics Institute and - Genome Research Limited. All rights reserved. - - This software is distributed under a modified Apache license. - For license details, please see - - http://www.ensembl.org/info/about/code_licence.html - -=head1 CONTACT - - Please email comments or questions to the public Ensembl - developers list at <ensembl-dev@ebi.ac.uk>. - - Questions may also be sent to the Ensembl help desk at - <helpdesk@ensembl.org>. - -=cut - -package Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblFungi; - -use strict; -use warnings; -no warnings 'uninitialized'; -use Bio::EnsEMBL::Utils::Exception qw(throw warning); -use base qw(Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblBacteria); - -# new generator providing EF IDs - -sub is_valid { - my ( $self, $stable_id ) = @_; - - my $base = $self->get_base(); - - return ( $stable_id and ( $stable_id =~ /$base([A-z]{1,4})(\d{11})/ - or $stable_id =~ /SP.*/ ) ); -} - - -sub get_base { return 'EF' } - -1; - diff --git a/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblGeneric.pm b/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblGeneric.pm index 2ba059869b0ce45c317ecd4d6cd2e525a23bca12..aa475697813a2813afc111edcd42afe08dbd6b86 100644 --- a/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblGeneric.pm +++ b/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblGeneric.pm @@ -120,41 +120,30 @@ sub initial_stable_id { my $self = shift; my $type = shift; - # EG modifications to permit the current stable ID to persist trohough - # different invocations - my $init_stable_id = $self->{stable_id_list}{$type}; - - if ( !defined($init_stable_id) ) { - # use stable ID from configuration if set - if ( $init_stable_id = - $self->conf->param("starting_${type}_stable_id") ) - { - $self->logger->debug( "Using pre-configured $init_stable_id " - . "as base for new $type stable IDs.\n" ); - return $init_stable_id; - } + my $init_stable_id; + + # use stable ID from configuration if set + if ($init_stable_id = $self->conf->param("starting_${type}_stable_id")) { + $self->logger->debug("Using pre-configured $init_stable_id as base for new $type stable IDs.\n"); + return $init_stable_id; + } - my $s_dba = $self->cache->get_DBAdaptor('source'); - my $s_dbh = $s_dba->dbc->db_handle; - - # look in the ${type}_stable_id table first - my $sql = qq(SELECT MAX(stable_id) FROM ${type}_stable_id); - $init_stable_id = $self->fetch_value_from_db( $s_dbh, $sql ); - - # also look in gene_archive to make sure there are no larger Ids - # there - unless ( $type eq 'exon' ) { - $sql = qq(SELECT MAX(${type}_stable_id) FROM gene_archive); - my $archived_stable_id = - $self->fetch_value_from_db( $s_dbh, $sql ); - if ( $archived_stable_id - and $self->is_valid($archived_stable_id) - and ( $archived_stable_id gt $init_stable_id ) ) - { - $init_stable_id = $archived_stable_id; - } + my $s_dba = $self->cache->get_DBAdaptor('source'); + my $s_dbh = $s_dba->dbc->db_handle; + + # look in the ${type}_stable_id table first + my $sql = qq(SELECT MAX(stable_id) FROM ${type}_stable_id); + $init_stable_id = $self->fetch_value_from_db($s_dbh, $sql); + + # also look in gene_archive to make sure there are no larger Ids there + unless ($type eq 'exon') { + $sql = qq(SELECT MAX(${type}_stable_id) FROM gene_archive); + my $archived_stable_id = $self->fetch_value_from_db($s_dbh, $sql); + if ($archived_stable_id and $self->is_valid($archived_stable_id) and + ($archived_stable_id gt $init_stable_id)) { + $init_stable_id = $archived_stable_id; } - } ## end if ( !defined($init_stable_id...)) + } if ($init_stable_id) { # since $init_stable_id now is the highest existing stable Id for this diff --git a/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblProtists.pm b/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblProtists.pm deleted file mode 100644 index fe0e1265f3d60cd40acf960b81fe3f33c4067d4a..0000000000000000000000000000000000000000 --- a/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblProtists.pm +++ /dev/null @@ -1,41 +0,0 @@ -=head1 LICENSE - - Copyright (c) 1999-2010 The European Bioinformatics Institute and - Genome Research Limited. All rights reserved. - - This software is distributed under a modified Apache license. - For license details, please see - - http://www.ensembl.org/info/about/code_licence.html - -=head1 CONTACT - - Please email comments or questions to the public Ensembl - developers list at <ensembl-dev@ebi.ac.uk>. - - Questions may also be sent to the Ensembl help desk at - <helpdesk@ensembl.org>. - -=cut - -package Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblProtists; -use strict; -use warnings; -no warnings 'uninitialized'; -use Bio::EnsEMBL::Utils::Exception qw(throw warning); -use base qw(Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblBacteria); - -# new generator to create new protist IDs (and also deal with existing plasmodial IDs) - -sub is_valid { - my ( $self, $stable_id ) = @_; - my $base = $self->get_base(); - return ( $stable_id and ( $stable_id =~ /$base([A-z]{1,4})(\d{11})/ - or $stable_id =~ /PVX.*/ - or $stable_id =~ /PKH.*/ ) ); - -} - -sub get_base { return 'EPr' } - -1; diff --git a/modules/Bio/EnsEMBL/IdMapping/StableIdMapper.pm b/modules/Bio/EnsEMBL/IdMapping/StableIdMapper.pm index 5d9fda6926da490e24da7b3c1aa16d0b99d06975..9b78008d44a535fad5b55b37c4451236211a3d94 100644 --- a/modules/Bio/EnsEMBL/IdMapping/StableIdMapper.pm +++ b/modules/Bio/EnsEMBL/IdMapping/StableIdMapper.pm @@ -150,17 +150,15 @@ sub generate_mapping_session { $self->logger->info("old_assembly: $old_assembly, new_assembly $new_assembly\n", 2); } - print $fh join( "\t", - $mapping_session_id, - $self->conf->param('sourcedbname'), - $self->conf->param('targetdbname'), - $old_release, - $new_release, - $old_assembly, - $new_assembly, - $self->mapping_session_date_fmt(), - $self->conf->param('species_id') # EG - ); + print $fh join("\t", + $mapping_session_id, + $self->conf->param('sourcedbname'), + $self->conf->param('targetdbname'), + $old_release, + $new_release, + $old_assembly, + $new_assembly, + $self->mapping_session_date_fmt); print $fh "\n"; close($fh); @@ -187,10 +185,9 @@ sub map_stable_ids { # check if there are any objects of this type at all my %all_sources = %{ $self->cache->get_by_name("${type}s_by_id", 'source') }; my %all_targets = %{ $self->cache->get_by_name("${type}s_by_id", 'target') }; - if ( scalar( keys(%all_sources) ) == 0 ) { - # EG may be possible to have no sources for new species - $self->logger->warning("No cached ${type}s found.\n\n"); - %all_sources = (); + unless (scalar(keys %all_sources)) { + $self->logger->info("No cached ${type}s found.\n\n"); + return; } my %stats = map { $_ => 0 } @@ -284,9 +281,8 @@ sub map_stable_ids { } # increment the stable Id (to be assigned to the next unmapped object) - $new_stable_id = - $self->stable_id_generator->increment_stable_id( $new_stable_id, - $type ); + $new_stable_id = $self->stable_id_generator->increment_stable_id( + $new_stable_id); # stats $stats{'new'}++; @@ -638,31 +634,24 @@ sub generate_mapping_stats { my $novel_total = $stats->{'mapped_novel'} + $stats->{'lost_novel'}; # no split into known and novel for exons - if ( $type ne 'exon' ) { - $result .= sprintf( $fmt2, 'known', - $stats->{'mapped_known'}, - $stats->{'lost_known'}, ( - $known_total - ? $stats->{'mapped_known'}/$known_total*100 - : 0 ) ); - - $result .= sprintf( $fmt2, 'novel', - $stats->{'mapped_novel'}, - $stats->{'lost_novel'}, ( - $novel_total - ? $stats->{'mapped_novel'}/$novel_total*100 - : 0 ) ); - } + unless ( $type eq 'exon' ) { + $result .= sprintf( $fmt2, + 'known', + $stats->{'mapped_known'}, + $stats->{'lost_known'}, + ($known_total ? $stats->{'mapped_known'}/$known_total*100 : 0) + ); - if ( $mapped_total == 0 ) { - # EG different calculation needed when no mappings found for new - # species - $result .= sprintf( $fmt2, 'total', $mapped_total, $lost_total, 0 ); - } else { $result .= sprintf( $fmt2, - 'total', $mapped_total, $lost_total, - $mapped_total/( $known_total + $novel_total )*100 ); - } + 'novel', + $stats->{'mapped_novel'}, + $stats->{'lost_novel'}, + ($novel_total ? $stats->{'mapped_novel'}/$novel_total*100 : 0) + ); + } ## end unless ( $type eq 'exon' ) + + $result .= sprintf($fmt2, 'total', $mapped_total, $lost_total, + $mapped_total/($known_total + $novel_total)*100); # log result $self->logger->info($result."\n"); diff --git a/modules/Bio/EnsEMBL/IdMapping/SyntenyFramework.pm b/modules/Bio/EnsEMBL/IdMapping/SyntenyFramework.pm index a3d7fcf12617167a075f3c1383c2cab34432e422..0b2c09339f8a31595cee8c541418ffcffeb2b923 100644 --- a/modules/Bio/EnsEMBL/IdMapping/SyntenyFramework.pm +++ b/modules/Bio/EnsEMBL/IdMapping/SyntenyFramework.pm @@ -376,15 +376,12 @@ sub rescore_gene_matrix_lsf { is_component => 1, ); - my $cmd = qq{perl -I./modules $Bin/synteny_rescore.pl } - . qq{$options --index \$LSB_JOBINDEX}; + my $cmd = qq{$Bin/synteny_rescore.pl $options --index \$LSB_JOBINDEX}; - my $pipe = - qq{|bsub -J$lsf_name\[1-$num_jobs\] } - . qq{-o $logpath/synteny_rescore.\%I.out } - . qq{-e $logpath/synteny_rescore.\%I.err } - . $self->conf()->param('lsf_opt_run') - . $self->conf()->param('lsf_opt_synteny_rescore'); + my $pipe = qq{|bsub -J$lsf_name\[1-$num_jobs\] } . + qq{-o $logpath/synteny_rescore.\%I.out } . + qq{-e $logpath/synteny_rescore.\%I.err } . + $self->conf->param('lsf_opt_synteny_rescore'); # run lsf job array $self->logger->info("Submitting $num_jobs jobs to lsf.\n"); @@ -402,10 +399,8 @@ sub rescore_gene_matrix_lsf { # submit dependent job to monitor finishing of jobs $self->logger->info("Waiting for jobs to finish...\n", 0, 'stamped'); - my $dependent_job = - qq{bsub -K -w "ended($lsf_name)" } - . $self->conf()->param('lsf_opt_run_small') - . qq{ -o $logpath/synteny_rescore_depend.out /bin/true}; + my $dependent_job = qq{bsub -K -w "ended($lsf_name)" -q small } . + qq{-o $logpath/synteny_rescore_depend.out /bin/true}; system($dependent_job) == 0 or $self->logger->error("Error submitting dependent job: $!\n"); diff --git a/modules/Bio/EnsEMBL/IdMapping/TinyExon.pm b/modules/Bio/EnsEMBL/IdMapping/TinyExon.pm index e32ede6f4fddd93f36401154a66ead473d5c5d79..e3173c3e93fb9d4aa8589368dbadbaf861bf44fd 100644 --- a/modules/Bio/EnsEMBL/IdMapping/TinyExon.pm +++ b/modules/Bio/EnsEMBL/IdMapping/TinyExon.pm @@ -461,28 +461,6 @@ sub is_known { return 1; } -=head2 gene_name - - Description : optional name of gene to which exon belongs - Return type : String + Exceptions : none - Caller : general - Status : At Risk - : under development - -=cut - -sub gene_name { - my ($self, $value) = @_; - - # EG optional gene name for exon for improved discrimination - - if ( defined($value) ) { - $self->[18] = $value; - } - - return $self->[18]; -} - 1; diff --git a/modules/Bio/EnsEMBL/IdMapping/TinyGene.pm b/modules/Bio/EnsEMBL/IdMapping/TinyGene.pm index 93e6c183fbbf501c06d359cb000a50087a5fb506..c79a5a5ad0d7767c377ff0a2a4f0c92107125c44 100644 --- a/modules/Bio/EnsEMBL/IdMapping/TinyGene.pm +++ b/modules/Bio/EnsEMBL/IdMapping/TinyGene.pm @@ -71,8 +71,7 @@ package Bio::EnsEMBL::IdMapping::TinyGene; # 10 status # 11 logic_name # 12 is_known -# 13 gene_name -# 14 [transcripts] +# 13 [transcripts] use strict; @@ -260,7 +259,7 @@ sub add_Transcript { throw('Need a Bio::EnsEMBL::IdMapping::TinyTranscript.'); } - push @{ $self->[14] }, $tr; + push @{ $self->[13] }, $tr; } @@ -279,7 +278,7 @@ sub add_Transcript { =cut sub get_all_Transcripts { - return $_[0]->[14] || []; + return $_[0]->[13] || []; } @@ -300,29 +299,5 @@ sub length { } -=head2 gene_name - - Description : optional name of gene - Return type : String - Exceptions : none - Caller : general - Status : At Risk - : under development - -=cut - -sub gene_name { - my ( $self, $value ) = @_; - - # EG optional gene name for improved discrimination - - if ( defined($value) ) { - $self->[13] = $value; - } - - return $self->[13]; -} - - 1;