From cc1793300c1a53bc56b65efe6c6b0e2d658f0dea Mon Sep 17 00:00:00 2001
From: Glenn Proctor <gp1@sanger.ac.uk>
Date: Mon, 29 Jan 2007 10:07:56 +0000
Subject: [PATCH] Added dumping of variation IDs (on by default, can be
 switched off with -no_variation option)

---
 misc-scripts/ebi_search_dump/dump_ebi.pl | 82 ++++++++++++++++++++----
 1 file changed, 69 insertions(+), 13 deletions(-)

diff --git a/misc-scripts/ebi_search_dump/dump_ebi.pl b/misc-scripts/ebi_search_dump/dump_ebi.pl
index 5a826f67d9..e3879eafc6 100644
--- a/misc-scripts/ebi_search_dump/dump_ebi.pl
+++ b/misc-scripts/ebi_search_dump/dump_ebi.pl
@@ -10,11 +10,13 @@ use DBI;
 use Getopt::Long;
 use IO::Zlib;
 
+use Bio::EnsEMBL::Registry;
 use Bio::EnsEMBL::DBSQL::DBAdaptor;
+use Bio::EnsEMBL::Variation::DBSQL::DBAdaptor;
 
 use HTML::Entities;
 
-my ( $host, $user, $pass, $port, $dbpattern, $max_genes, $gzip );
+my ($host, $user, $pass, $port, $dbpattern, $max_genes, $gzip, $no_variation);
 
 GetOptions( "host=s",              \$host,
 	    "user=s",              \$user,
@@ -23,6 +25,7 @@ GetOptions( "host=s",              \$host,
 	    "dbpattern|pattern=s", \$dbpattern,
 	    "gzip!",               \$gzip,
             "max_genes=i",         \$max_genes,
+	    "no_variation",        \$no_variation,
 	    "help" ,               \&usage
 	  );
 
@@ -38,6 +41,11 @@ run();
 
 sub run() {
 
+  Bio::EnsEMBL::Registry->load_registry_from_db(-host => $host,
+						-port => $port,
+						-user => $user,
+						-pass => $pass);
+
   # loop over databases
 
   my $dsn = "DBI:mysql:host=$host";
@@ -130,6 +138,13 @@ sub content {
   my $meta_container = $dba->get_MetaContainer();
   my $species = $meta_container->get_Species()->common_name();
 
+  my $db_variation = variation_attach($dba) unless $no_variation;
+
+  my $trv_adaptor;
+  if ($db_variation) { # not all species have variation databases
+    $trv_adaptor = $db_variation->get_TranscriptVariationAdaptor();
+  }
+
   foreach my $gene (@{$gene_adaptor->fetch_all()}) {
 
     last if ($max_genes && $entry_count >= $max_genes);
@@ -175,7 +190,8 @@ sub content {
     # additional fields - transcript, translation, species etc
     p ("<additional_fields>");
 
-    foreach my $transcript (@{$gene->get_all_Transcripts()}) {
+    my $transcripts = $gene->get_all_Transcripts();
+    foreach my $transcript (@{$transcripts}) {
 
       p ("<field name=\"transcript\">" . $transcript->stable_id() . "</field>");
 
@@ -190,6 +206,13 @@ sub content {
 
     p ("<field name=\"species\">" . $species . "</field>");
 
+    # SNP IDs
+    if ($db_variation) {
+      foreach my $tv (@{$trv_adaptor->fetch_all_by_Transcripts($transcripts)}){
+	p ("<field name=\"variation_id\">" . $tv->variation_feature()->variation_name() . "</field>");
+      }
+    }
+
     p ("</additional_fields>");
 
     # close tag
@@ -199,8 +222,6 @@ sub content {
 
   }
 
-
-
 }
 
 # -------------------------------------------------------------------------------
@@ -223,7 +244,7 @@ sub footer {
   } else {
     close(FILE);
   }
-  
+
 }
 
 
@@ -309,26 +330,61 @@ sub print_time {
 
 # -------------------------------------------------------------------------------
 
+#
+# Figure out the name of a variation database from the core database name
+#
+
+sub variation_attach {
+
+  my $db = shift;
+
+  my $core_db_name;
+  $core_db_name = $db->dbc->dbname();
+  return undef if ($core_db_name !~ /_core_/);
+
+  my $dbc = $db->dbc();
+  my $sth = $dbc->prepare("show databases");
+  $sth->execute();
+  my $all_db_names = $sth->fetchall_arrayref();
+  my %all_db_names = map {( $_->[0] , 1)} @$all_db_names;
+  my $variation_db_name = $core_db_name;
+  $variation_db_name =~ s/_core_/_variation_/;
+
+  return undef if (! exists $all_db_names{$variation_db_name});
+
+  # register the dbadaptor with the Registry
+  return Bio::EnsEMBL::Variation::DBSQL::DBAdaptor->new(-host => $dbc->host(),
+							-user => $dbc->username(),
+							-pass => $dbc->password(),
+							-port => $dbc->port(),
+							-dbname => $variation_db_name);
+
+}
+
+# -------------------------------------------------------------------------------
+
 sub usage {
   print <<EOF; exit(0);
 
 Usage: perl $0 <options>
 
-  -host       Database host to connect to.
+  -host         Database host to connect to.
+
+  -port         Database port to connect to.
 
-  -port       Database port to connect to.
+  -dbpattern    Database name regexp
 
-  -dbpattern  Database name regexp
+  -user         Database username.
 
-  -user       Database username.
+  -pass         Password for user.
 
-  -pass       Password for user.
+  -gzip         Compress output as it's written.
 
-  -gzip       Compress output as it's written.
+  -max_genes    Only dump this many genes for testing.
 
-  -max_genes  Only dump this many genes for testing.
+  -no_variation Don't dump variation IDs.
 
-  -help       This message.
+  -help         This message.
 
 EOF
 
-- 
GitLab