diff --git a/modules/Bio/EnsEMBL/Registry.pm b/modules/Bio/EnsEMBL/Registry.pm index 0bc06a90a33e3b30f58ed1d86f31a4ce9a1abff2..a61bead72d818380ddc12900b26d4b254d9ed83e 100644 --- a/modules/Bio/EnsEMBL/Registry.pm +++ b/modules/Bio/EnsEMBL/Registry.pm @@ -150,6 +150,7 @@ my %group2adaptor = ( 'ontology' => 'Bio::EnsEMBL::DBSQL::OntologyDBAdaptor', 'otherfeatures' => 'Bio::EnsEMBL::DBSQL::DBAdaptor', 'pipeline' => 'Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor', + 'production' => 'Bio::EnsEMBL::Production::DBSQL::DBAdaptor', 'snp' => 'Bio::EnsEMBL::ExternalData::SNPSQL::DBAdaptor', 'stable_ids' => 'Bio::EnsEMBL::DBSQL::DBAdaptor', 'variation' => 'Bio::EnsEMBL::Variation::DBSQL::DBAdaptor', @@ -1587,6 +1588,9 @@ sub load_registry_from_db { my $ontology_db; my $ontology_version; + my $production_db; + my $production_version; + my $stable_ids_db; my $stable_ids_version; @@ -1652,6 +1656,19 @@ sub load_registry_from_db { $ontology_db = $db; $ontology_version = $1; } + } elsif ( $db =~ /^ensembl(?:genomes)?_production(_\d+)?/x ) { + # production db can come with no version (i.e. that on ens-staging1), + # but it's backed up with a release number + my $version = $1; + if ($version) { + $version =~ s/_//; + if ($software_version and $version eq $software_version) { + $production_db = $db; + $production_version = $version; + } + } else { # this is the default choice + $production_db = $db if $db =~ /^ensembl(?:genomes)?_production$/; + } } elsif ( $db =~ /^ensembl(?:genomes)?_stable_ids_(?:\d+_)?(\d+)/x ) { if ( $1 eq $software_version ) { $stable_ids_db = $db; @@ -2127,6 +2144,30 @@ sub load_registry_from_db { print("No ontology database found\n"); } + # Production + + if ( defined($production_db) ) { + require Bio::EnsEMBL::Production::DBSQL::DBAdaptor; + + my $dba = + Bio::EnsEMBL::Production::DBSQL::DBAdaptor->new( + '-species' => 'multi' . $species_suffix, + '-group' => 'production', + '-host' => $host, + '-port' => $port, + '-user' => $user, + '-pass' => $pass, + '-dbname' => $production_db, ); + + if ($verbose) { + printf( "%s loaded\n", $production_db ); + } + } + elsif ($verbose) { + print("No production database found\n"); + } + + # Stable IDs if ( defined($stable_ids_db) && $stable_ids_version != 0 ) { @@ -2155,6 +2196,9 @@ sub load_registry_from_db { -species => 'multi'.$species_suffix, -alias => ['ontology'.$species_suffix] ); + Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( + -species => 'multi'.$species_suffix, + -alias => ['production'.$species_suffix] ); Bio::EnsEMBL::Utils::ConfigRegistry->add_alias( -species => 'multi'.$species_suffix, diff --git a/modules/Bio/EnsEMBL/Utils/BiotypeMapper.pm b/modules/Bio/EnsEMBL/Utils/BiotypeMapper.pm index 101e9675d0060f746da6f5b01447095af3bce7ff..f6936c576722c0827083b8e481415e506824ac50 100644 --- a/modules/Bio/EnsEMBL/Utils/BiotypeMapper.pm +++ b/modules/Bio/EnsEMBL/Utils/BiotypeMapper.pm @@ -142,7 +142,9 @@ my %grouping_of_biotypes = ( 'short_noncoding'=> [qw( miRNA misc_RNA Mt_tRNA rRNA snoRNA snRNA )], + # practical Ensembl core categories for fasta dumping + # protein coding/pseudogene 'cdna' => [qw( protein_coding polymorphic_pseudogene IG_V_gene TR_V_gene IG_J_gene TR_J_gene IG_D_gene IG_C_gene TR_C_gene pseudogene retrotransposed IG_V_pseudogene TR_V_pseudogene @@ -150,10 +152,15 @@ my %grouping_of_biotypes = ( antisense ambiguous_orf transcribed_processed_pseudogene disrupted_domain processed_pseudogene )], + + # protein coding transcript + # protein coding 'peptide_producing' => [qw( protein_coding polymorphic_pseudogene IG_V_gene TR_V_gene IG_J_gene TR_J_gene IG_D_gene IG_C_gene TR_C_gene IG_LV_gene nonsense_mediated_decay )], + + # short/long noncoding 'ncrna' => [qw( ncRNA miRNA miRNA_pseudogene misc_RNA misc_RNA_pseudogene Mt_tRNA Mt_tRNA_pseudogene Mt_rRNA rRNA rRNA_pseudogene scRNA_pseudogene snoRNA snoRNA_pseudogene snRNA snRNA_pseudogene tRNA_pseudogene diff --git a/modules/Bio/EnsEMBL/Utils/SequenceOntologyMapper.pm b/modules/Bio/EnsEMBL/Utils/SequenceOntologyMapper.pm index 898b8eeb0231c8714dd80ab92619a29be8a0b4a9..4f3dab17a52d52f418c7b8e890622a2c3f25c453 100644 --- a/modules/Bio/EnsEMBL/Utils/SequenceOntologyMapper.pm +++ b/modules/Bio/EnsEMBL/Utils/SequenceOntologyMapper.pm @@ -18,15 +18,15 @@ SequenceOntologyMapper - Translates EnsEMBL objects into Sequence Ontology terms use Bio::EnsEMBL::Utils::SequenceOntologyMapper -# get an Ensembl feature somehow in $feature +# get an Ensembl feature somehow in scalar $feature ... ... my $ontology_adaptor = $registry->get_adaptor( 'Multi', 'Ontology', 'OntologyTerm' ); my $mapper = SequenceOntologyMapper->new($ontology_adaptor); -print $mapper->to_SO_accession($feature), "\n"; -print $mapper->to_SO_name($feature), "\n"; +print $mapper->to_accession($feature), "\n"; +print $mapper->to_name($feature), "\n"; =head1 DESCRIPTION @@ -90,7 +90,6 @@ my %transcript_so_mapping = '3prime_overlapping_ncrna' => 'SO:0000483', 'TR_V_gene' => 'SO:0000466', 'TR_V_pseudogene' => 'SO:0000336', - 'TR_J_gene' => 'SO:0000470', 'IG_C_gene' => 'SO:0000478', 'IG_C_pseudogene' => 'SO:0000336',