Commit 7d39293d authored by Leo Gordon's avatar Leo Gordon
Browse files

replaced rc_id by resource_class_id throughout the schema and added the...

replaced rc_id by resource_class_id throughout the schema and added the foreign keys on resource_class_id
parent 15aaf506
No preview for this file type
docs/hive_schema.png

238 KB | W: | H:

docs/hive_schema.png

253 KB | W: | H:

docs/hive_schema.png
docs/hive_schema.png
docs/hive_schema.png
docs/hive_schema.png
  • 2-up
  • Swipe
  • Onion skin
......@@ -262,11 +262,11 @@ sub sync_lock {
return $self->{'_sync_lock'};
}
sub rc_id {
sub resource_class_id {
my $self = shift;
$self->{'_rc_id'} = shift if(@_);
return $self->{'_rc_id'};
$self->{'_resource_class_id'} = shift if(@_);
return $self->{'_resource_class_id'};
}
sub can_be_empty {
......
......@@ -47,6 +47,17 @@ use Bio::EnsEMBL::Utils::Exception;
use base ('Bio::EnsEMBL::DBSQL::BaseAdaptor');
sub create_new_for_analysis_id_resource_class_id {
my ($self, $analysis_id, $resource_class_id) = @_;
my $insertion_method = ($self->dbc->driver eq 'sqlite') ? 'INSERT OR IGNORE' : 'INSERT IGNORE';
my $sql = "$insertion_method INTO analysis_stats (analysis_id, resource_class_id) VALUES ($analysis_id, $resource_class_id)";
my $sth = $self->prepare($sql);
$sth->execute();
$sth->finish;
}
=head2 fetch_by_analysis_id
Arg [1] : int $id
......@@ -71,15 +82,10 @@ sub fetch_by_analysis_id {
#return first element of _generic_fetch list
my ($obj) = @{$self->_generic_fetch($constraint)};
unless(defined($obj)) {
$self->_create_new_for_analysis_id($id);
($obj) = @{$self->_generic_fetch($constraint)};
}
if(!defined($obj)) {
throw("unable to fetch analysis_stats for analysis_id = $id\n");
}
return $obj;
}
......@@ -91,10 +97,10 @@ sub fetch_all {
sub fetch_by_needed_workers {
my ($self, $limit, $rc_id) = @_;
my ($self, $limit, $resource_class_id) = @_;
my $constraint = "ast.num_required_workers>0 AND ast.status in ('READY','WORKING')"
.(defined($rc_id) ? " AND ast.rc_id = $rc_id" : '');
.(defined($resource_class_id) ? " AND ast.resource_class_id = $resource_class_id" : '');
my $final_clause = 'ORDER BY priority DESC, '
.( ($self->dbc->driver eq 'sqlite') ? 'RANDOM()' : 'RAND()' )
......@@ -109,10 +115,10 @@ sub fetch_by_needed_workers {
sub fetch_by_statuses {
my ($self, $statuses, $rc_id) = @_;
my ($self, $statuses, $resource_class_id) = @_;
my $constraint = 'ast.status in ('.join(', ', map { "'$_'" } @$statuses).')'
.(defined($rc_id) ? " AND ast.rc_id = $rc_id" : '');
.(defined($resource_class_id) ? " AND ast.resource_class_id = $resource_class_id" : '');
$self->_final_clause('ORDER BY last_update');
my $results = $self->_generic_fetch($constraint);
......@@ -209,7 +215,7 @@ sub update {
$sql .= ",num_required_workers=" . $stats->num_required_workers();
$sql .= ",last_update=CURRENT_TIMESTAMP";
$sql .= ",sync_lock='0'";
$sql .= ",rc_id=". $stats->rc_id();
$sql .= ",resource_class_id=". $stats->resource_class_id();
$sql .= ",can_be_empty=". $stats->can_be_empty();
$sql .= ",priority=". $stats->priority();
$sql .= " WHERE analysis_id='".$stats->analysis_id."' ";
......@@ -434,7 +440,7 @@ sub _columns {
ast.num_required_workers
ast.last_update
ast.sync_lock
ast.rc_id
ast.resource_class_id
ast.can_be_empty
ast.priority
);
......@@ -459,7 +465,7 @@ sub _objs_from_sth {
$analStats->analysis_id($column{'analysis_id'});
$analStats->status($column{'status'});
$analStats->sync_lock($column{'sync_lock'});
$analStats->rc_id($column{'rc_id'});
$analStats->resource_class_id($column{'resource_class_id'});
$analStats->can_be_empty($column{'can_be_empty'});
$analStats->priority($column{'priority'});
$analStats->batch_size($column{'batch_size'});
......@@ -502,19 +508,5 @@ sub _final_clause {
}
sub _create_new_for_analysis_id {
my ($self, $analysis_id) = @_;
my $sql;
my $insertion_method = ($self->dbc->driver eq 'sqlite') ? 'INSERT OR IGNORE' : 'INSERT IGNORE';
$sql = "$insertion_method INTO analysis_stats (analysis_id) VALUES ($analysis_id)";
#print("$sql\n");
my $sth = $self->prepare($sql);
$sth->execute();
$sth->finish;
}
1;
......@@ -346,18 +346,19 @@ sub run {
die "Every resource has to have a unique description, please fix the PipeConfig file";
}
warn "Creating resource_class '$rc_name'.\n";
my $rc = $resource_class_adaptor->create_new(
defined($rc_id) ? (-DBID => $rc_id) : (),
-NAME => $rc_name,
);
$rc_id = $rc->dbID();
warn "Creating resource_class $rc_name($rc_id).\n";
while( my($meadow_type, $xparams) = each %$mt2param ) {
$resource_description_adaptor->create_new(
-RC_ID => $rc->dbID,
-MEADOW_TYPE => $meadow_type,
-PARAMETERS => $xparams,
-RESOURCE_CLASS_ID => $rc_id,
-MEADOW_TYPE => $meadow_type,
-PARAMETERS => $xparams,
);
}
}
......@@ -369,6 +370,7 @@ sub run {
}
my $analysis_adaptor = $hive_dba->get_AnalysisAdaptor;
my $analysis_stats_adaptor = $hive_dba->get_AnalysisStatsAdaptor;
my %seen_logic_name = ();
......@@ -414,15 +416,15 @@ sub run {
-parameters => stringify($parameters_hash || {}), # have to stringify it here, because Analysis code is external wrt Hive code
-program_file => $program_file,
);
$analysis_adaptor->store($analysis);
$analysis_stats_adaptor->create_new_for_analysis_id_resource_class_id($analysis->dbID, $rc_id);
my $stats = $analysis->stats();
$stats->batch_size( $batch_size ) if(defined($batch_size));
$stats->hive_capacity( $hive_capacity ) if(defined($hive_capacity));
$stats->failed_job_tolerance( $failed_job_tolerance ) if(defined($failed_job_tolerance));
$stats->max_retry_count( $max_retry_count ) if(defined($max_retry_count));
$stats->rc_id( $rc_id ) if(defined($rc_id));
$stats->can_be_empty( $can_be_empty ) if(defined($can_be_empty));
$stats->priority( $priority ) if(defined($priority));
$stats->status($blocked ? 'BLOCKED' : 'READY'); # be careful, as this "soft" way of blocking may be accidentally unblocked by deep sync
......
......@@ -737,7 +737,7 @@ sub schedule_workers {
$available_load -= 1.0*$workers_this_analysis/$hive_capacity;
}
my $curr_rc_id = $analysis_stats->rc_id;
my $curr_rc_id = $analysis_stats->resource_class_id;
if($pending_by_rc_id{ $curr_rc_id }) { # per-rc_id capping by pending processes, if available
my $pending_this_analysis = ($pending_by_rc_id{ $curr_rc_id } < $workers_this_analysis) ? $pending_by_rc_id{ $curr_rc_id } : $workers_this_analysis;
......@@ -750,7 +750,7 @@ sub schedule_workers {
$total_workers_to_run += $workers_this_analysis;
$workers_to_run_by_rc_id{ $curr_rc_id } += $workers_this_analysis;
$analysis_stats->print_stats();
printf("Scheduler suggests adding %d more workers of rc_id=%d for analysis_id=%d [%1.3f hive_load remaining]\n", $workers_this_analysis, $curr_rc_id, $analysis_stats->analysis_id, $available_load);
printf("Scheduler suggests adding %d more workers of resource_class_id=%d for analysis_id=%d [%1.3f hive_load remaining]\n", $workers_this_analysis, $curr_rc_id, $analysis_stats->analysis_id, $available_load);
}
printf("Scheduler suggests adding a total of %d workers [%1.5f hive_load remaining]\n", $total_workers_to_run, $available_load);
......@@ -907,7 +907,7 @@ sub _pick_best_analysis_for_new_worker {
if($stats) {
#synchronize and double check that it can be run
$self->safe_synchronize_AnalysisStats($stats);
return $stats if(($stats->status ne 'BLOCKED') and ($stats->num_required_workers > 0) and (!defined($rc_id) or ($stats->rc_id == $rc_id)));
return $stats if(($stats->status ne 'BLOCKED') and ($stats->num_required_workers > 0) and (!defined($rc_id) or ($stats->resource_class_id == $rc_id)));
}
# ok so no analyses 'need' workers with the given $rc_id.
......@@ -922,7 +922,7 @@ sub _pick_best_analysis_for_new_worker {
foreach $stats (@$stats_list) {
$self->safe_synchronize_AnalysisStats($stats);
return $stats if(($stats->status ne 'BLOCKED') and ($stats->num_required_workers > 0) and (!defined($rc_id) or ($stats->rc_id == $rc_id)));
return $stats if(($stats->status ne 'BLOCKED') and ($stats->num_required_workers > 0) and (!defined($rc_id) or ($stats->resource_class_id == $rc_id)));
}
# does the following really ever help?
......
......@@ -11,10 +11,10 @@
A data container object (the only methods are getters/setters) that corresponds to a row in 'resource_description' table:
CREATE TABLE resource_description (
rc_id int(10) unsigned DEFAULT 0 NOT NULL,
resource_class_id int(10) unsigned NOT NULL,
meadow_type varchar(40) NOT NULL,
parameters varchar(255) DEFAULT '' NOT NULL,
PRIMARY KEY(rc_id, meadow_type)
PRIMARY KEY(resource_class_id, meadow_type)
) ENGINE=InnoDB;
=head1 CONTACT
......@@ -35,11 +35,11 @@ sub new {
my $self = bless {}, $class;
my ($adaptor, $rc_id, $meadow_type, $parameters) =
rearrange([qw(adaptor rc_id meadow_type parameters) ], @_);
my ($adaptor, $resource_class_id, $meadow_type, $parameters) =
rearrange([qw(adaptor resource_class_id meadow_type parameters) ], @_);
$self->adaptor($adaptor) if(defined($adaptor));
$self->rc_id($rc_id);
$self->resource_class_id($resource_class_id);
$self->meadow_type($meadow_type);
$self->parameters($parameters);
......@@ -59,13 +59,13 @@ sub adaptor {
}
sub rc_id {
sub resource_class_id {
my $self = shift @_;
if(@_) {
$self->{'_rc_id'} = shift @_;
$self->{'_resource_class_id'} = shift @_;
}
return $self->{'_rc_id'};
return $self->{'_resource_class_id'};
}
sub meadow_type {
......@@ -89,7 +89,7 @@ sub parameters {
sub to_string {
my $self = shift @_;
return (ref($self).': '.join(', ', map { $_.'="'.$self->$_().'"' } qw(rc_id meadow_type parameters) ));
return (ref($self).': '.join(', ', map { $_.'="'.$self->$_().'"' } qw(resource_class_id meadow_type parameters) ));
}
1;
......
......@@ -316,7 +316,7 @@ sub run_autonomously {
my $worker_cmd = generate_worker_cmd($self, $run_job_id);
# pre-hash the resource_class xparams for future use:
my $rc_xparams = $self->{'dba'}->get_ResourceDescriptionAdaptor->fetch_by_meadow_type_HASHED_FROM_rc_id_TO_parameters($current_meadow->type());
my $rc_xparams = $self->{'dba'}->get_ResourceDescriptionAdaptor->fetch_by_meadow_type_HASHED_FROM_resource_class_id_TO_parameters($current_meadow->type());
my $iteration=0;
my $num_of_remaining_jobs=0;
......
# introducing the FOREIGN KEY constraints as a separate file (so that they could be optionally switched on or off):
ALTER TABLE analysis_description ADD FOREIGN KEY (analysis_id) REFERENCES analysis(analysis_id);
ALTER TABLE worker ADD FOREIGN KEY (analysis_id) REFERENCES analysis(analysis_id);
ALTER TABLE dataflow_rule ADD FOREIGN KEY (from_analysis_id) REFERENCES analysis(analysis_id);
ALTER TABLE analysis_ctrl_rule ADD FOREIGN KEY (ctrled_analysis_id) REFERENCES analysis(analysis_id);
ALTER TABLE job ADD FOREIGN KEY (analysis_id) REFERENCES analysis(analysis_id);
ALTER TABLE analysis_stats ADD FOREIGN KEY (analysis_id) REFERENCES analysis(analysis_id);
ALTER TABLE analysis_stats_monitor ADD FOREIGN KEY (analysis_id) REFERENCES analysis(analysis_id);
ALTER TABLE analysis_description ADD FOREIGN KEY (analysis_id) REFERENCES analysis(analysis_id);
ALTER TABLE worker ADD FOREIGN KEY (analysis_id) REFERENCES analysis(analysis_id);
ALTER TABLE dataflow_rule ADD FOREIGN KEY (from_analysis_id) REFERENCES analysis(analysis_id);
ALTER TABLE analysis_ctrl_rule ADD FOREIGN KEY (ctrled_analysis_id) REFERENCES analysis(analysis_id);
ALTER TABLE job ADD FOREIGN KEY (analysis_id) REFERENCES analysis(analysis_id);
ALTER TABLE analysis_stats ADD FOREIGN KEY (analysis_id) REFERENCES analysis(analysis_id);
ALTER TABLE analysis_stats_monitor ADD FOREIGN KEY (analysis_id) REFERENCES analysis(analysis_id);
ALTER TABLE job ADD FOREIGN KEY (worker_id) REFERENCES worker(worker_id);
ALTER TABLE job_message ADD FOREIGN KEY (worker_id) REFERENCES worker(worker_id);
ALTER TABLE job_file ADD FOREIGN KEY (worker_id) REFERENCES worker(worker_id);
ALTER TABLE job ADD FOREIGN KEY (worker_id) REFERENCES worker(worker_id);
ALTER TABLE job_message ADD FOREIGN KEY (worker_id) REFERENCES worker(worker_id);
ALTER TABLE job_file ADD FOREIGN KEY (worker_id) REFERENCES worker(worker_id);
ALTER TABLE job ADD FOREIGN KEY (prev_job_id) REFERENCES job(job_id);
ALTER TABLE job ADD FOREIGN KEY (semaphored_job_id) REFERENCES job(job_id);
ALTER TABLE job_message ADD FOREIGN KEY (job_id) REFERENCES job(job_id);
ALTER TABLE job_file ADD FOREIGN KEY (job_id) REFERENCES job(job_id);
ALTER TABLE job ADD FOREIGN KEY (prev_job_id) REFERENCES job(job_id);
ALTER TABLE job ADD FOREIGN KEY (semaphored_job_id) REFERENCES job(job_id);
ALTER TABLE job_message ADD FOREIGN KEY (job_id) REFERENCES job(job_id);
ALTER TABLE job_file ADD FOREIGN KEY (job_id) REFERENCES job(job_id);
## The following are not unique keys in the original table, so cannot be used as foreign keys.
# ALTER TABLE analysis_stats ADD FOREIGN KEY (rc_id) REFERENCES resource_description(rc_id);
# ALTER TABLE analysis_stats_monitor ADD FOREIGN KEY (rc_id) REFERENCES resource_description(rc_id);
ALTER TABLE resource_description ADD FOREIGN KEY (resource_class_id) REFERENCES resource_class(resource_class_id);
ALTER TABLE analysis_stats ADD FOREIGN KEY (resource_class_id) REFERENCES resource_class(resource_class_id);
ALTER TABLE analysis_stats_monitor ADD FOREIGN KEY (resource_class_id) REFERENCES resource_class(resource_class_id);
# renaming rc_id into resource_class_id throughout the schema:
ALTER TABLE resource_description CHANGE COLUMN rc_id resource_class_id int(10) unsigned NOT NULL;
ALTER TABLE analysis_stats CHANGE COLUMN rc_id resource_class_id int(10) unsigned NOT NULL;
ALTER TABLE analysis_stats_monitor CHANGE COLUMN rc_id resource_class_id int(10) unsigned NOT NULL;
......@@ -328,15 +328,6 @@ CREATE TABLE analysis_data (
) COLLATE=latin1_swedish_ci ENGINE=InnoDB;
CREATE TABLE resource_description (
rc_id int(10) unsigned DEFAULT 0 NOT NULL,
meadow_type varchar(40) NOT NULL,
parameters varchar(255) DEFAULT '' NOT NULL,
PRIMARY KEY(rc_id, meadow_type)
) COLLATE=latin1_swedish_ci ENGINE=InnoDB;
CREATE TABLE resource_class (
resource_class_id int(10) unsigned NOT NULL AUTO_INCREMENT, # unique internal id
name varchar(40) NOT NULL,
......@@ -346,6 +337,15 @@ CREATE TABLE resource_class (
) COLLATE=latin1_swedish_ci ENGINE=InnoDB;
CREATE TABLE resource_description (
resource_class_id int(10) unsigned NOT NULL,
meadow_type varchar(40) NOT NULL,
parameters varchar(255) DEFAULT '' NOT NULL,
PRIMARY KEY(resource_class_id, meadow_type)
) COLLATE=latin1_swedish_ci ENGINE=InnoDB;
-- ---------------------------------------------------------------------------------
--
-- Table structure for table 'analysis_stats'
......@@ -360,7 +360,7 @@ CREATE TABLE resource_class (
-- analysis_id - foreign key to analysis table
-- status - overview status of the jobs (cached state)
-- failed_job_tolerance - % of tolerated failed jobs
-- rc_id - resource class id (analyses are grouped into disjoint classes)
-- resource_class_id - resource class id (analyses are grouped into disjoint classes)
CREATE TABLE analysis_stats (
analysis_id int(10) unsigned NOT NULL,
......@@ -385,7 +385,7 @@ CREATE TABLE analysis_stats (
num_required_workers int(10) NOT NULL,
last_update datetime NOT NULL,
sync_lock int(10) default 0 NOT NULL,
rc_id int(10) unsigned default 0 NOT NULL,
resource_class_id int(10) unsigned NOT NULL,
can_be_empty TINYINT UNSIGNED DEFAULT 0 NOT NULL,
priority TINYINT DEFAULT 0 NOT NULL,
......@@ -418,7 +418,7 @@ CREATE TABLE analysis_stats_monitor (
num_required_workers int(10) NOT NULL,
last_update datetime NOT NULL,
sync_lock int(10) default 0 NOT NULL,
rc_id int(10) unsigned default 0 NOT NULL,
resource_class_id int(10) unsigned NOT NULL,
can_be_empty TINYINT UNSIGNED DEFAULT 0 NOT NULL,
priority TINYINT DEFAULT 0 NOT NULL
......@@ -451,5 +451,5 @@ CREATE TABLE monitor (
# Auto add schema version to database (should be overridden by Compara's table.sql)
INSERT IGNORE INTO meta (species_id, meta_key, meta_value) VALUES (NULL, 'schema_version', '67');
INSERT IGNORE INTO meta (species_id, meta_key, meta_value) VALUES (NULL, 'schema_version', '68');
......@@ -303,15 +303,6 @@ CREATE TABLE analysis_data (
CREATE INDEX IF NOT EXISTS data_idx ON analysis_data (data);
CREATE TABLE resource_description (
rc_id INTEGER NOT NULL,
meadow_type TEXT, /* enum('LSF', 'LOCAL') DEFAULT 'LSF' NOT NULL, */
parameters varchar(255) DEFAULT '' NOT NULL,
PRIMARY KEY(rc_id, meadow_type)
);
CREATE TABLE resource_class (
resource_class_id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
name varchar(40) NOT NULL
......@@ -320,6 +311,16 @@ CREATE TABLE resource_class (
CREATE UNIQUE INDEX IF NOT EXISTS resource_class_name_idx ON resource_class (name);
CREATE TABLE resource_description (
resource_class_id INTEGER NOT NULL,
meadow_type TEXT, /* enum('LSF', 'LOCAL') DEFAULT 'LSF' NOT NULL, */
parameters varchar(255) DEFAULT '' NOT NULL,
PRIMARY KEY(resource_class_id, meadow_type)
);
-- ---------------------------------------------------------------------------------
--
-- Table structure for table 'analysis_stats'
......@@ -334,7 +335,7 @@ CREATE UNIQUE INDEX IF NOT EXISTS resource_class_name_idx ON resource_class (nam
-- analysis_id - foreign key to analysis table
-- status - overview status of the jobs (cached state)
-- failed_job_tolerance - % of tolerated failed jobs
-- rc_id - resource class id (analyses are grouped into disjoint classes)
-- resource_class_id - resource class id (analyses are grouped into disjoint classes)
CREATE TABLE analysis_stats (
analysis_id INTEGER NOT NULL,
......@@ -358,7 +359,7 @@ CREATE TABLE analysis_stats (
num_required_workers int(10) NOT NULL DEFAULT 0,
last_update datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
sync_lock int(10) NOT NULL DEFAULT 0,
rc_id INTEGER NOT NULL DEFAULT 0,
resource_class_id INTEGER NOT NULL,
can_be_empty TINYINT UNSIGNED DEFAULT 0 NOT NULL,
priority TINYINT DEFAULT 0 NOT NULL
);
......@@ -388,7 +389,7 @@ CREATE TABLE analysis_stats_monitor (
num_required_workers int(10) NOT NULL DEFAULT 0,
last_update datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
sync_lock int(10) default 0 NOT NULL,
rc_id INTEGER DEFAULT 0 NOT NULL,
resource_class_id INTEGER NOT NULL,
can_be_empty TINYINT UNSIGNED DEFAULT 0 NOT NULL,
priority TINYINT DEFAULT 0 NOT NULL
);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment