From cc1793300c1a53bc56b65efe6c6b0e2d658f0dea Mon Sep 17 00:00:00 2001 From: Glenn Proctor <gp1@sanger.ac.uk> Date: Mon, 29 Jan 2007 10:07:56 +0000 Subject: [PATCH] Added dumping of variation IDs (on by default, can be switched off with -no_variation option) --- misc-scripts/ebi_search_dump/dump_ebi.pl | 82 ++++++++++++++++++++---- 1 file changed, 69 insertions(+), 13 deletions(-) diff --git a/misc-scripts/ebi_search_dump/dump_ebi.pl b/misc-scripts/ebi_search_dump/dump_ebi.pl index 5a826f67d9..e3879eafc6 100644 --- a/misc-scripts/ebi_search_dump/dump_ebi.pl +++ b/misc-scripts/ebi_search_dump/dump_ebi.pl @@ -10,11 +10,13 @@ use DBI; use Getopt::Long; use IO::Zlib; +use Bio::EnsEMBL::Registry; use Bio::EnsEMBL::DBSQL::DBAdaptor; +use Bio::EnsEMBL::Variation::DBSQL::DBAdaptor; use HTML::Entities; -my ( $host, $user, $pass, $port, $dbpattern, $max_genes, $gzip ); +my ($host, $user, $pass, $port, $dbpattern, $max_genes, $gzip, $no_variation); GetOptions( "host=s", \$host, "user=s", \$user, @@ -23,6 +25,7 @@ GetOptions( "host=s", \$host, "dbpattern|pattern=s", \$dbpattern, "gzip!", \$gzip, "max_genes=i", \$max_genes, + "no_variation", \$no_variation, "help" , \&usage ); @@ -38,6 +41,11 @@ run(); sub run() { + Bio::EnsEMBL::Registry->load_registry_from_db(-host => $host, + -port => $port, + -user => $user, + -pass => $pass); + # loop over databases my $dsn = "DBI:mysql:host=$host"; @@ -130,6 +138,13 @@ sub content { my $meta_container = $dba->get_MetaContainer(); my $species = $meta_container->get_Species()->common_name(); + my $db_variation = variation_attach($dba) unless $no_variation; + + my $trv_adaptor; + if ($db_variation) { # not all species have variation databases + $trv_adaptor = $db_variation->get_TranscriptVariationAdaptor(); + } + foreach my $gene (@{$gene_adaptor->fetch_all()}) { last if ($max_genes && $entry_count >= $max_genes); @@ -175,7 +190,8 @@ sub content { # additional fields - transcript, translation, species etc p ("<additional_fields>"); - foreach my $transcript (@{$gene->get_all_Transcripts()}) { + my $transcripts = $gene->get_all_Transcripts(); + foreach my $transcript (@{$transcripts}) { p ("<field name=\"transcript\">" . $transcript->stable_id() . "</field>"); @@ -190,6 +206,13 @@ sub content { p ("<field name=\"species\">" . $species . "</field>"); + # SNP IDs + if ($db_variation) { + foreach my $tv (@{$trv_adaptor->fetch_all_by_Transcripts($transcripts)}){ + p ("<field name=\"variation_id\">" . $tv->variation_feature()->variation_name() . "</field>"); + } + } + p ("</additional_fields>"); # close tag @@ -199,8 +222,6 @@ sub content { } - - } # ------------------------------------------------------------------------------- @@ -223,7 +244,7 @@ sub footer { } else { close(FILE); } - + } @@ -309,26 +330,61 @@ sub print_time { # ------------------------------------------------------------------------------- +# +# Figure out the name of a variation database from the core database name +# + +sub variation_attach { + + my $db = shift; + + my $core_db_name; + $core_db_name = $db->dbc->dbname(); + return undef if ($core_db_name !~ /_core_/); + + my $dbc = $db->dbc(); + my $sth = $dbc->prepare("show databases"); + $sth->execute(); + my $all_db_names = $sth->fetchall_arrayref(); + my %all_db_names = map {( $_->[0] , 1)} @$all_db_names; + my $variation_db_name = $core_db_name; + $variation_db_name =~ s/_core_/_variation_/; + + return undef if (! exists $all_db_names{$variation_db_name}); + + # register the dbadaptor with the Registry + return Bio::EnsEMBL::Variation::DBSQL::DBAdaptor->new(-host => $dbc->host(), + -user => $dbc->username(), + -pass => $dbc->password(), + -port => $dbc->port(), + -dbname => $variation_db_name); + +} + +# ------------------------------------------------------------------------------- + sub usage { print <<EOF; exit(0); Usage: perl $0 <options> - -host Database host to connect to. + -host Database host to connect to. + + -port Database port to connect to. - -port Database port to connect to. + -dbpattern Database name regexp - -dbpattern Database name regexp + -user Database username. - -user Database username. + -pass Password for user. - -pass Password for user. + -gzip Compress output as it's written. - -gzip Compress output as it's written. + -max_genes Only dump this many genes for testing. - -max_genes Only dump this many genes for testing. + -no_variation Don't dump variation IDs. - -help This message. + -help This message. EOF -- GitLab