From 5d0819811cca58d96f8366ab08d540d03ccf9d27 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20Kusalananda=20K=C3=A4h=C3=A4ri?=
 <ak4@sanger.ac.uk>
Date: Tue, 13 Jul 2010 09:21:47 +0000
Subject: [PATCH] Stable ID generators from Dan S. for Ensembl Genomes.

---
 .../StableIdGenerator/EnsemblBacteria.pm      | 144 ++++++++++++++++++
 .../StableIdGenerator/EnsemblFungi.pm         |  44 ++++++
 .../StableIdGenerator/EnsemblProtists.pm      |  41 +++++
 3 files changed, 229 insertions(+)
 create mode 100644 modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblBacteria.pm
 create mode 100644 modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblFungi.pm
 create mode 100644 modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblProtists.pm

diff --git a/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblBacteria.pm b/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblBacteria.pm
new file mode 100644
index 0000000000..a46e9d6f45
--- /dev/null
+++ b/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblBacteria.pm
@@ -0,0 +1,144 @@
+=head1 LICENSE
+
+  Copyright (c) 1999-2010 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <ensembl-dev@ebi.ac.uk>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+=cut
+
+package Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblBacteria;
+use strict;
+use warnings;
+no warnings 'uninitialized';
+use Bio::EnsEMBL::Utils::Exception qw(throw warning);
+use base qw(Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblGeneric);
+
+#
+# new generator for EnsemblBacteria
+# 1. generates EB style IDs
+# 2. updates base ID to allow for multi-species DBs to be correctly incremented
+#
+sub initial_stable_id {
+	my $self = shift;
+	my $type = shift;
+	my $base = $self->get_base();
+	# retrieve last good stable ID from hash
+	my $init_stable_id = $self->{stable_id_list}{$type};
+	if ( !$init_stable_id ) {
+		$self->logger->debug(
+			"Finding new init_stable_id as base for new $type stable IDs.\n");
+
+		# use stable ID from configuration if set
+		if ( $init_stable_id =
+			$self->conf->param("starting_${type}_stable_id") )
+		{
+			$self->logger->debug(
+"Using pre-configured $init_stable_id as base for new $type stable IDs.\n"
+			);
+			return $init_stable_id;
+		}
+		my $s_dba = $self->cache->get_DBAdaptor('source');
+		my $s_dbh = $s_dba->dbc->db_handle;
+
+		# look in the ${type}_stable_id table first
+		my $sql =
+qq(SELECT MAX(stable_id) FROM ${type}_stable_id where stable_id like '${base}%');
+		print $sql;
+		$init_stable_id = $self->fetch_value_from_db( $s_dbh, $sql );
+
+		# also look in gene_archive to make sure there are no larger Ids there
+		unless ( $type eq 'exon' ) {
+			$sql = qq(SELECT MAX(${type}_stable_id) FROM gene_archive);
+			my $archived_stable_id = $self->fetch_value_from_db( $s_dbh, $sql );
+			if (    $archived_stable_id
+				and $self->is_valid($archived_stable_id)
+				and ( $archived_stable_id gt $init_stable_id ) )
+			{
+				$init_stable_id = $archived_stable_id;
+			}
+		}
+		$self->{stable_id_list}{$type} = $init_stable_id;
+	} else {
+		$self->logger->debug(
+"Using preexisting initial $init_stable_id as base for new $type stable IDs.\n"
+		);
+	}
+	if ($init_stable_id) {
+
+	 # since $init_stable_id now is the highest existing stable Id for this
+	 # object type, we need to increment it to find the first one we want to use
+	 # for new assignments
+		$init_stable_id = $self->increment_stable_id( $init_stable_id, $type );
+		$self->logger->debug(
+			"Using $init_stable_id as base for new $type stable IDs.\n");
+	} else {
+		$self->logger->warning(
+			"Can't find highest ${type}_stable_id in source db.\n");
+		my $pref =
+		  $self->cache->get_DBAdaptor('target')->get_MetaContainer()
+		  ->list_value_by_key('species.stable_id_prefix');
+		if ($pref) {
+			$init_stable_id =
+			  $pref->[0] . substr( uc($type), 0, 1 ) . '00000000000';
+			$self->logger->debug(
+				"Using $init_stable_id as base for new $type stable IDs.\n");
+		}
+	}
+	return $init_stable_id;
+}
+
+sub increment_stable_id {
+	my $self      = shift;
+	my $stable_id = shift;
+	my $type      = shift;
+	unless ( $self->is_valid($stable_id) ) {
+		throw("Unknown or missing stable ID: $stable_id.");
+	}
+	my $base = $self->get_base();
+	$stable_id =~ /$base([A-Z]{1,4})(\d{11})/;
+	my $number = $2;
+	my $new_stable_id = $base . $1 . ( ++$number );
+	$self->{stable_id_list}{$type} = $new_stable_id;
+	return $new_stable_id;
+}
+
+=head2 is_valid
+
+  Arg[1]      : String $stable_id - the stable Id to check
+  Example     : unless ($generator->is_valid($stable_id)) {
+                  die "Invalid stable Id: $stable_id.\n";
+                }
+  Description : Tests a stable Id to be valid (according to the Ensembl stable
+                Id format definition).
+  Return type : Boolean - TRUE if valid, FALSE otherwise
+  Exceptions  : none
+  Caller      : general
+  Status      : At Risk
+              : under development
+
+=cut
+
+sub is_valid {
+  my ( $self, $stable_id ) = @_;
+
+  my $base = $self->get_base();
+
+  return ( $stable_id
+           and ( $stable_id =~ /$base([A-z]{1,4})(\d{11})/ ) );
+}
+
+sub get_base { return 'EB' }
+
+1;
diff --git a/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblFungi.pm b/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblFungi.pm
new file mode 100644
index 0000000000..d1cedea771
--- /dev/null
+++ b/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblFungi.pm
@@ -0,0 +1,44 @@
+=head1 LICENSE
+
+  Copyright (c) 1999-2010 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <ensembl-dev@ebi.ac.uk>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+=cut
+
+package Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblFungi;
+
+use strict;
+use warnings;
+no warnings 'uninitialized';
+use Bio::EnsEMBL::Utils::Exception qw(throw warning);
+use base qw(Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblBacteria);
+
+# new generator providing EF IDs
+
+sub is_valid {
+  my ( $self, $stable_id ) = @_;
+
+  my $base = $self->get_base();
+
+  return ( $stable_id and (    $stable_id =~ /$base([A-z]{1,4})(\d{11})/
+                            or $stable_id =~ /SP.*/ ) );
+}
+
+
+sub get_base { return 'EF' }
+
+1;
+
diff --git a/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblProtists.pm b/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblProtists.pm
new file mode 100644
index 0000000000..fe0e1265f3
--- /dev/null
+++ b/modules/Bio/EnsEMBL/IdMapping/StableIdGenerator/EnsemblProtists.pm
@@ -0,0 +1,41 @@
+=head1 LICENSE
+
+  Copyright (c) 1999-2010 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <ensembl-dev@ebi.ac.uk>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+=cut
+
+package Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblProtists;
+use strict;
+use warnings;
+no warnings 'uninitialized';
+use Bio::EnsEMBL::Utils::Exception qw(throw warning);
+use base qw(Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblBacteria);
+
+# new generator to create new protist IDs (and also deal with existing plasmodial IDs)
+
+sub is_valid {
+  my ( $self, $stable_id ) = @_;
+  my $base = $self->get_base();
+  return ( $stable_id and (    $stable_id =~ /$base([A-z]{1,4})(\d{11})/
+                            or $stable_id =~ /PVX.*/
+                            or $stable_id =~ /PKH.*/ ) );
+
+}
+
+sub get_base { return 'EPr' }
+
+1;
-- 
GitLab