Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
ebi_search_dump_generic.pl 61.96 KiB
#!/usr/local/bin/perl

# Dump variation information to an XML file for indexing by the EBI's search engine.
#
# To copy files to the EBI so that they can be picked up:
# scp *.xml.gz glenn@puffin.ebi.ac.uk:xml/
#
# Email eb-eye@ebi.ac.uk after copying so the files can be indexed.
package ebi_search_dump;

use strict;
use DBI;
use Carp;
use File::Basename qw( dirname );    
use File::Find;
use FindBin qw($Bin); 
use Getopt::Long;
use IO::Zlib;
use Data::Dumper;

                            
BEGIN{                                                                                                                                                                         
  unshift @INC, "$Bin/../../../conf";                                                                                                                                            
  unshift @INC, "$Bin/../../..";                                                                                                                                                 
  eval{ require utils::Tool };                                                                                                                                                    
  if ($@){ warn "Can't use utils::Tool (required for ensemblgenomes)\n"; }                                                                                                                               
} 

my (
    $host,    $user,        $pass,   $port,     $species, $ind,
    $release, $max_entries, $nogzip, $parallel, $dir,     $inifile,
    $site_type
);

my %rHash = map { $_ } @ARGV;
if ( $inifile = $rHash{'-inifile'} ) {
    my $icontent = `cat $inifile`;
    warn $icontent;
    eval $icontent;
}

GetOptions(
    "host=s",        \$host,        "port=i",    \$port,
    "user=s",        \$user,        "pass=s",    \$pass,
    "species=s",     \$species,     "release=s", \$release,
    "index=s",       \$ind,         "nogzip!",   \$nogzip,
    "max_entries=i", \$max_entries, "parallel",  \$parallel,
    "dir=s",         \$dir,         "help",      \&usage,
    "inifile=s",     \$inifile,     "site_type=s", \$site_type,
);

$ind     ||= 'ALL';
$dir     ||= ".";
$release ||= 'LATEST';

usage() and exit unless ( $host && $port && $user && $site_type);

print "Site type: $site_type\n";

## HACK 1 - if the INDEX is set to all grab all dumper methods...
my @indexes = split ',', $ind;
@indexes = map { /dump(\w+)/ ? $1 : () } keys %ebi_search_dump::
  if $ind eq 'ALL';
#warn Dumper \@indexes;

my $dbHash = get_databases();
#warn Dumper $dbHash;
print "*** No databases found ***\n" unless %{$dbHash};

my @species_list = split ',', $species;