Skip to content
Snippets Groups Projects
Commit 805432c6 authored by Magali Ruffier's avatar Magali Ruffier
Browse files

ENSCORESW-2850: optimised for single species run

parent 170c0c41
No related branches found
No related tags found
4 merge requests!342Feature/schema update 96,!295ENSCORESW-2850: optimised for single species run,!342Feature/schema update 96,!295ENSCORESW-2850: optimised for single species run
......@@ -55,7 +55,7 @@ sub run {
my $unzip = $ref_arg->{unzip};
my $stats = $ref_arg->{stats};
my $cleanup = $ref_arg->{cleanup};
my $rspecies = $ref_arg->{speciesr};
my $species = $ref_arg->{species};
my $taxon_id = $ref_arg->{taxon};
my $division = $ref_arg->{division};
my $sources = $ref_arg->{sourcesr};
......@@ -101,21 +101,23 @@ DSS
my $dep_sth = $dbi->prepare($sql);
# validate species names
if (defined $division) { push @$rspecies, $division; }
my @species_ids = $self->validate_species($rspecies, $verbose);
if (defined $taxon_id) { push @species_ids, $taxon_id; }
my $species_id = $self->validate_species($species, $verbose);
my $division_id = $self->validate_species($division, $verbose);
my @species_sources = ($species_id);
push @species_sources, $division_id if defined $division_id;
push @species_sources, $taxon_id if defined $taxon_id;
# validate source names
exit(1) if ( !$self->validate_sources(\@species_ids,$sources, $verbose) );
exit(1) if ( !$self->validate_sources(\@species_ids,$notsources, $verbose) );
exit(1) if ( !$self->validate_sources(\@species_sources,$sources, $verbose) );
exit(1) if ( !$self->validate_sources(\@species_sources,$notsources, $verbose) );
# build SQL
my $species_sql = "";
if (@species_ids) {
if (@species_sources) {
$species_sql .= " AND su.species_id IN (";
for ( my $i = 0 ; $i < @species_ids ; $i++ ) {
for ( my $i = 0 ; $i < @species_sources; $i++ ) {
$species_sql .= "," if ( $i != 0 );
$species_sql .= $species_ids[$i];
$species_sql .= $species_sources[$i];
}
$species_sql .= ") ";
}
......@@ -156,12 +158,12 @@ DSS
$sth->execute();
my ( $source_id, $source_url_id, $name, $url, $release_url,
$checksum, $parser, $species_id );
$checksum, $parser, $species_source_id );
$sth->bind_columns( \$source_id, \$source_url_id,
\$name, \$url,
\$release_url, \$checksum,
\$parser, \$species_id );
\$parser, \$species_source_id );
my $dir;
my %summary = ();
......@@ -241,7 +243,7 @@ DSS
foreach my $file (@files) {
# check dependencies are loaded all ready
if(!($self->all_dependencies_loaded($source_id, $species_id, $name, $dep_sth))){
if(!($self->all_dependencies_loaded($source_id, $species_source_id, $name, $dep_sth))){
++$summary{$name}->{$parser};
next;
}
......@@ -682,26 +684,21 @@ sub dbi {
###########################################################
sub validate_species {
my ($self, $species, $verbose) = @_;
my @species_ids;
my $dbi = $self->dbi();
my $sth = $dbi->prepare("SELECT species_id, name FROM species WHERE LOWER(name)=? OR LOWER(aliases) REGEXP ?");
my ($species_id, $species_name);
foreach my $sp (@$species) {
my $bind_arg = "^".lc($sp).",|^".lc($sp)."\$|,[ ]{0,1}".lc($sp)."[ ]{0,1},|,[ ]{0,1}".lc($sp)."\$";
$sth->execute(lc($sp), $bind_arg );
$sth->bind_columns(\$species_id, \$species_name);
if (my @row = $sth->fetchrow_array()) {
print "Species $sp is valid (name = " . $species_name . ", ID = " . $species_id . ")\n" if($verbose);
push @species_ids, $species_id;
} else {
print STDERR "Species $sp is not valid; valid species are:\n";
$self->show_valid_species();
}
my $bind_arg = "^".lc($species).",|^".lc($species)."\$|,[ ]{0,1}".lc($species)."[ ]{0,1},|,[ ]{0,1}".lc($species)."\$";
$sth->execute(lc($species), $bind_arg );
$sth->bind_columns(\$species_id, \$species_name);
if (my @row = $sth->fetchrow_array()) {
print "Species $species is valid (name = " . $species_name . ", ID = " . $species_id . ")\n" if($verbose);
} else {
print STDERR "Species $species is not valid; valid species are:\n";
$self->show_valid_species();
}
return @species_ids;
return $species_id;
}
############################################################
......
......@@ -72,7 +72,6 @@ if($ARGV[0]){
exit(1);
}
my @species = split(/,/,join(',',$species)) if $species;
my @sources = split(/,/,join(',',$sources)) if $sources;
my @notsource = split(/,/,join(',',$notsource)) if $notsource;
......@@ -103,7 +102,7 @@ $process->run({ host => $host,
dbname => $dbname,
user => $user,
pass => $pass,
speciesr => \@species,
species => $species,
taxon => $taxon,
division => $division,
sourcesr => \@sources,
......@@ -147,7 +146,7 @@ sub usage {
print << "EOF";
xref_parser.pl -user {user} -pass {password} -host {host} \\
-port {port} -dbname {database} -species {species1,species2} \\
-port {port} -dbname {database} -species {species} \\
-source {source1,source2} -notsource {source1,source2} \\
-create -setrelease -deletedownloaded -checkdownload -stats -verbose \\
-cleanup -drop_db -download_path -unzip
......@@ -162,14 +161,11 @@ sub usage {
-dbname Name of xref database to use/create.
-species Which species to import. Multiple -species arguments
and/or comma, separated lists of species are
allowed. Species may be referred to by genus/species
-species Which species to import.
Species may be referred to by genus/species
(e.g. homo_sapiens) or common aliases (e.g. human).
Specifying an unknown species will cause a list
of valid species to be printed. Not specifying a
-species argument will result in all species being
used.
of valid species to be printed.
-source Which sources to import. Multiple -source arguments
and/or comma, separated lists of sources are
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment