Skip to content
Snippets Groups Projects
Commit 805432c6 authored by Magali Ruffier's avatar Magali Ruffier
Browse files

ENSCORESW-2850: optimised for single species run

parent 170c0c41
No related branches found
No related tags found
4 merge requests!342Feature/schema update 96,!295ENSCORESW-2850: optimised for single species run,!342Feature/schema update 96,!295ENSCORESW-2850: optimised for single species run
...@@ -55,7 +55,7 @@ sub run { ...@@ -55,7 +55,7 @@ sub run {
my $unzip = $ref_arg->{unzip}; my $unzip = $ref_arg->{unzip};
my $stats = $ref_arg->{stats}; my $stats = $ref_arg->{stats};
my $cleanup = $ref_arg->{cleanup}; my $cleanup = $ref_arg->{cleanup};
my $rspecies = $ref_arg->{speciesr}; my $species = $ref_arg->{species};
my $taxon_id = $ref_arg->{taxon}; my $taxon_id = $ref_arg->{taxon};
my $division = $ref_arg->{division}; my $division = $ref_arg->{division};
my $sources = $ref_arg->{sourcesr}; my $sources = $ref_arg->{sourcesr};
...@@ -101,21 +101,23 @@ DSS ...@@ -101,21 +101,23 @@ DSS
my $dep_sth = $dbi->prepare($sql); my $dep_sth = $dbi->prepare($sql);
# validate species names # validate species names
if (defined $division) { push @$rspecies, $division; } my $species_id = $self->validate_species($species, $verbose);
my @species_ids = $self->validate_species($rspecies, $verbose); my $division_id = $self->validate_species($division, $verbose);
if (defined $taxon_id) { push @species_ids, $taxon_id; } my @species_sources = ($species_id);
push @species_sources, $division_id if defined $division_id;
push @species_sources, $taxon_id if defined $taxon_id;
# validate source names # validate source names
exit(1) if ( !$self->validate_sources(\@species_ids,$sources, $verbose) ); exit(1) if ( !$self->validate_sources(\@species_sources,$sources, $verbose) );
exit(1) if ( !$self->validate_sources(\@species_ids,$notsources, $verbose) ); exit(1) if ( !$self->validate_sources(\@species_sources,$notsources, $verbose) );
# build SQL # build SQL
my $species_sql = ""; my $species_sql = "";
if (@species_ids) { if (@species_sources) {
$species_sql .= " AND su.species_id IN ("; $species_sql .= " AND su.species_id IN (";
for ( my $i = 0 ; $i < @species_ids ; $i++ ) { for ( my $i = 0 ; $i < @species_sources; $i++ ) {
$species_sql .= "," if ( $i != 0 ); $species_sql .= "," if ( $i != 0 );
$species_sql .= $species_ids[$i]; $species_sql .= $species_sources[$i];
} }
$species_sql .= ") "; $species_sql .= ") ";
} }
...@@ -156,12 +158,12 @@ DSS ...@@ -156,12 +158,12 @@ DSS
$sth->execute(); $sth->execute();
my ( $source_id, $source_url_id, $name, $url, $release_url, my ( $source_id, $source_url_id, $name, $url, $release_url,
$checksum, $parser, $species_id ); $checksum, $parser, $species_source_id );
$sth->bind_columns( \$source_id, \$source_url_id, $sth->bind_columns( \$source_id, \$source_url_id,
\$name, \$url, \$name, \$url,
\$release_url, \$checksum, \$release_url, \$checksum,
\$parser, \$species_id ); \$parser, \$species_source_id );
my $dir; my $dir;
my %summary = (); my %summary = ();
...@@ -241,7 +243,7 @@ DSS ...@@ -241,7 +243,7 @@ DSS
foreach my $file (@files) { foreach my $file (@files) {
# check dependencies are loaded all ready # check dependencies are loaded all ready
if(!($self->all_dependencies_loaded($source_id, $species_id, $name, $dep_sth))){ if(!($self->all_dependencies_loaded($source_id, $species_source_id, $name, $dep_sth))){
++$summary{$name}->{$parser}; ++$summary{$name}->{$parser};
next; next;
} }
...@@ -682,26 +684,21 @@ sub dbi { ...@@ -682,26 +684,21 @@ sub dbi {
########################################################### ###########################################################
sub validate_species { sub validate_species {
my ($self, $species, $verbose) = @_; my ($self, $species, $verbose) = @_;
my @species_ids;
my $dbi = $self->dbi(); my $dbi = $self->dbi();
my $sth = $dbi->prepare("SELECT species_id, name FROM species WHERE LOWER(name)=? OR LOWER(aliases) REGEXP ?"); my $sth = $dbi->prepare("SELECT species_id, name FROM species WHERE LOWER(name)=? OR LOWER(aliases) REGEXP ?");
my ($species_id, $species_name); my ($species_id, $species_name);
foreach my $sp (@$species) { my $bind_arg = "^".lc($species).",|^".lc($species)."\$|,[ ]{0,1}".lc($species)."[ ]{0,1},|,[ ]{0,1}".lc($species)."\$";
$sth->execute(lc($species), $bind_arg );
my $bind_arg = "^".lc($sp).",|^".lc($sp)."\$|,[ ]{0,1}".lc($sp)."[ ]{0,1},|,[ ]{0,1}".lc($sp)."\$"; $sth->bind_columns(\$species_id, \$species_name);
$sth->execute(lc($sp), $bind_arg ); if (my @row = $sth->fetchrow_array()) {
$sth->bind_columns(\$species_id, \$species_name); print "Species $species is valid (name = " . $species_name . ", ID = " . $species_id . ")\n" if($verbose);
if (my @row = $sth->fetchrow_array()) { } else {
print "Species $sp is valid (name = " . $species_name . ", ID = " . $species_id . ")\n" if($verbose); print STDERR "Species $species is not valid; valid species are:\n";
push @species_ids, $species_id; $self->show_valid_species();
} else {
print STDERR "Species $sp is not valid; valid species are:\n";
$self->show_valid_species();
}
} }
return @species_ids; return $species_id;
} }
############################################################ ############################################################
......
...@@ -72,7 +72,6 @@ if($ARGV[0]){ ...@@ -72,7 +72,6 @@ if($ARGV[0]){
exit(1); exit(1);
} }
my @species = split(/,/,join(',',$species)) if $species;
my @sources = split(/,/,join(',',$sources)) if $sources; my @sources = split(/,/,join(',',$sources)) if $sources;
my @notsource = split(/,/,join(',',$notsource)) if $notsource; my @notsource = split(/,/,join(',',$notsource)) if $notsource;
...@@ -103,7 +102,7 @@ $process->run({ host => $host, ...@@ -103,7 +102,7 @@ $process->run({ host => $host,
dbname => $dbname, dbname => $dbname,
user => $user, user => $user,
pass => $pass, pass => $pass,
speciesr => \@species, species => $species,
taxon => $taxon, taxon => $taxon,
division => $division, division => $division,
sourcesr => \@sources, sourcesr => \@sources,
...@@ -147,7 +146,7 @@ sub usage { ...@@ -147,7 +146,7 @@ sub usage {
print << "EOF"; print << "EOF";
xref_parser.pl -user {user} -pass {password} -host {host} \\ xref_parser.pl -user {user} -pass {password} -host {host} \\
-port {port} -dbname {database} -species {species1,species2} \\ -port {port} -dbname {database} -species {species} \\
-source {source1,source2} -notsource {source1,source2} \\ -source {source1,source2} -notsource {source1,source2} \\
-create -setrelease -deletedownloaded -checkdownload -stats -verbose \\ -create -setrelease -deletedownloaded -checkdownload -stats -verbose \\
-cleanup -drop_db -download_path -unzip -cleanup -drop_db -download_path -unzip
...@@ -162,14 +161,11 @@ sub usage { ...@@ -162,14 +161,11 @@ sub usage {
-dbname Name of xref database to use/create. -dbname Name of xref database to use/create.
-species Which species to import. Multiple -species arguments -species Which species to import.
and/or comma, separated lists of species are Species may be referred to by genus/species
allowed. Species may be referred to by genus/species
(e.g. homo_sapiens) or common aliases (e.g. human). (e.g. homo_sapiens) or common aliases (e.g. human).
Specifying an unknown species will cause a list Specifying an unknown species will cause a list
of valid species to be printed. Not specifying a of valid species to be printed.
-species argument will result in all species being
used.
-source Which sources to import. Multiple -source arguments -source Which sources to import. Multiple -source arguments
and/or comma, separated lists of sources are and/or comma, separated lists of sources are
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment