From 4409c9b9f6d3334cb36b75ecf4c1e5d9e18a1fcc Mon Sep 17 00:00:00 2001 From: Glenn Proctor <gp1@sanger.ac.uk> Date: Thu, 26 Oct 2006 13:02:25 +0000 Subject: [PATCH] Script to insert genebuild.level (etc) keys into meta table if all genes are on toplevel seq regions. Also checks transcripts, exons and repeat features. The API can use these keys to optimise fetching significantly. --- misc-scripts/meta_levels.pl | 149 ++++++++++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 misc-scripts/meta_levels.pl diff --git a/misc-scripts/meta_levels.pl b/misc-scripts/meta_levels.pl new file mode 100644 index 0000000000..9690b48a7a --- /dev/null +++ b/misc-scripts/meta_levels.pl @@ -0,0 +1,149 @@ +# Populate meta table with (e.g.) genebuild.level = toplevel if all genes are +# top level. Using v41 API code this can speed fetching & dumping greatly. +# + +use strict; +use DBI; + +use Getopt::Long; + +use Bio::EnsEMBL::DBSQL::DBAdaptor; + +my ( $host, $user, $pass, $port, $dbpattern, $print); + +GetOptions( "host=s", \$host, + "user=s", \$user, + "pass=s", \$pass, + "port=i", \$port, + "dbpattern|pattern=s", \$dbpattern, + "print", \$print, + "help" , \&usage + ); + +if( !$host || !$dbpattern ) { + usage(); +} + +my @feature_types = qw[gene transcript exon repeat_feature]; + +run(); + +sub run() { + + # loop over databases + + my $dsn = "DBI:mysql:host=$host"; + $dsn .= ";port=$port" if ($port); + + my $db = DBI->connect( $dsn, $user, $pass ); + + my @dbnames = map {$_->[0] } @{$db->selectall_arrayref("show databases")}; + + for my $dbname (@dbnames) { + + next if ($dbname !~ /$dbpattern/); + + my $dba = new Bio::EnsEMBL::DBSQL::DBAdaptor('-host' => $host, + '-port' => $port, + '-user' => $user, + '-pass' => $pass, + '-dbname' => $dbname, + '-species' => $dbname); + + my $ma = $dba->get_MetaContainer(); + + my @inserted; + my @not_inserted; + + foreach my $type (@feature_types) { + + delete_existing($ma, $type) if (!$print); + + if (can_use_key($dba, $type)) { + + insert_key($ma, $type) if (!$print); + push @inserted, $type; + + } else { + + push @not_inserted, $type; + + } + + } + + print "$dbname inserted keys for " . join(", ", @inserted) . ".\n" if (@inserted); + print "$dbname did not insert keys for " . join(", ", @not_inserted) . ".\n" if (@not_inserted); + + } + +} + +# ------------------------------------------------------------------------------- + +sub delete_existing { + + my ($ma, $type) = @_; + + $ma->delete_key($type . "build.level"); + +} + +# ------------------------------------------------------------------------------- + +sub can_use_key { + + my ($dba, $type) = @_; + + # compare total count of typewith number of toplevel type, if they're the same, + # then we can use the key + + my $sth = $dba->dbc()->prepare("SELECT COUNT(*) FROM $type"); + $sth->execute(); + my $total = ($sth->fetchrow_array())[0]; + + $sth = $dba->dbc()->prepare("SELECT COUNT(*) FROM $type t, seq_region_attrib sra, attrib_type at WHERE t.seq_region_id=sra.seq_region_id AND sra.attrib_type_id=at.attrib_type_id AND at.code='toplevel'"); + $sth->execute(); + my $toplevel = ($sth->fetchrow_array())[0]; + + return $total == $toplevel; + +} + +# ------------------------------------------------------------------------------- + +sub insert_key { + + my ($ma, $type) = @_; + + $ma->store_key_value($type . "build.level", "toplevel"); + +} + +# ------------------------------------------------------------------------------- + +sub usage { + print <<EOF; exit(0); + +Populate meta table with (e.g.) genebuild.level = toplevel if all genes are +top level. Using v41 API code this can speed fetching & dumping greatly. + +Usage: perl $0 <options> + + -host Database host to connect to. + + -port Database port to connect to. + + -dbpattern Database name regexp + + -user Database username. + + -pass Password for user. + + -print Just print, don't insert or delete keys. + + -help This message. + +EOF + +} -- GitLab