Commit 4751bf0a authored by Leo Gordon's avatar Leo Gordon
Browse files

schema_change: detect and register RELOCATED events that used to mess up...

schema_change: detect and register RELOCATED events that used to mess up things on LSF 9.0 ("job rescheduled" in LSF parlance)
parent 9de58274
......@@ -114,6 +114,18 @@ sub create_new_worker {
rearrange([qw(meadow_type meadow_name process_id exec_host resource_class_id resource_class_name
no_write debug worker_log_dir hive_log_dir job_limit life_span no_cleanup retry_throwing_jobs can_respecialize) ], @args);
foreach my $prev_worker_incarnation (@{ $self->fetch_all( "status!='DEAD' AND meadow_type='$meadow_type' AND meadow_name='$meadow_name' AND process_id='$process_id'" ) }) {
# so far 'RELOCATED events' has been detected on LSF 9.0 in response to sending signal #99 or #100
# Since I don't know how to avoid them, I am trying to register them when they happen.
# The following snippet buries the previous incarnation of the Worker before starting a new one.
#
# FIXME: if GarabageCollector (beekeeper -dead) gets to these processes first, it will register them as DEAD/UNKNOWN.
# LSF 9.0 does not report "rescheduling" events in the output of 'bacct', but does mention them in 'bhist'.
# So parsing 'bhist' output would probably yield the most accurate & confident registration of these events.
$prev_worker_incarnation->cause_of_death( 'RELOCATED' );
$self->register_worker_death( $prev_worker_incarnation );
}
if( defined($resource_class_name) ) {
my $rc = $self->db->get_ResourceClassAdaptor->fetch_by_name($resource_class_name)
or die "resource_class with name='$resource_class_name' could not be fetched from the database";
......
-- Add 'RELOCATED' to the possible values of cause_of_death:
ALTER TABLE worker MODIFY COLUMN cause_of_death ENUM('NO_ROLE', 'NO_WORK', 'JOB_LIMIT', 'HIVE_OVERLOAD', 'LIFESPAN', 'CONTAMINATED', 'RELOCATED', 'KILLED_BY_USER', 'MEMLIMIT', 'RUNLIMIT', 'SEE_MSG', 'UNKNOWN') DEFAULT NULL;
-- UPDATE hive_sql_schema_version
UPDATE hive_meta SET meta_value=55 WHERE meta_key='hive_sql_schema_version' AND meta_value='54';
-- Add 'RELOCATED' to the possible values of cause_of_death:
ALTER TYPE worker_cod ADD VALUE 'RELOCATED' AFTER 'CONTAMINATED';
-- UPDATE hive_sql_schema_version
UPDATE hive_meta SET meta_value=55 WHERE meta_key='hive_sql_schema_version' AND meta_value='54';
-- Add 'RELOCATED' to the possible values of cause_of_death:
-- (since SQLite uses TEXT instead of ENUM types, our change here is trivial)
-- UPDATE hive_sql_schema_version
UPDATE hive_meta SET meta_value=55 WHERE meta_key='hive_sql_schema_version' AND meta_value='54';
......@@ -456,7 +456,7 @@ CREATE TABLE worker (
born TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
last_check_in TIMESTAMP NOT NULL,
died TIMESTAMP NULL, -- mysql's special for "TIMESTAMP DEFAULT NULL"
cause_of_death ENUM('NO_ROLE', 'NO_WORK', 'JOB_LIMIT', 'HIVE_OVERLOAD', 'LIFESPAN', 'CONTAMINATED', 'KILLED_BY_USER', 'MEMLIMIT', 'RUNLIMIT', 'SEE_MSG', 'UNKNOWN') DEFAULT NULL,
cause_of_death ENUM('NO_ROLE', 'NO_WORK', 'JOB_LIMIT', 'HIVE_OVERLOAD', 'LIFESPAN', 'CONTAMINATED', 'RELOCATED', 'KILLED_BY_USER', 'MEMLIMIT', 'RUNLIMIT', 'SEE_MSG', 'UNKNOWN') DEFAULT NULL,
log_dir VARCHAR(255) DEFAULT NULL,
KEY analysis_status (analysis_id, status)
......
......@@ -446,7 +446,7 @@ CREATE INDEX ON meta (species_id, meta_value);
@column log_dir if defined, a filesystem directory where this Worker's output is logged
*/
CREATE TYPE worker_cod AS ENUM ('NO_ROLE', 'NO_WORK', 'JOB_LIMIT', 'HIVE_OVERLOAD', 'LIFESPAN', 'CONTAMINATED', 'KILLED_BY_USER', 'MEMLIMIT', 'RUNLIMIT', 'SEE_MSG', 'UNKNOWN');
CREATE TYPE worker_cod AS ENUM ('NO_ROLE', 'NO_WORK', 'JOB_LIMIT', 'HIVE_OVERLOAD', 'LIFESPAN', 'CONTAMINATED', 'RELOCATED', 'KILLED_BY_USER', 'MEMLIMIT', 'RUNLIMIT', 'SEE_MSG', 'UNKNOWN');
CREATE TABLE worker (
worker_id SERIAL PRIMARY KEY,
meadow_type VARCHAR(255) NOT NULL,
......
......@@ -448,7 +448,7 @@ CREATE TABLE worker (
born TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
last_check_in TIMESTAMP NOT NULL,
died TIMESTAMP DEFAULT NULL,
cause_of_death TEXT DEFAULT NULL, /* enum('NO_ROLE', 'NO_WORK', 'JOB_LIMIT', 'HIVE_OVERLOAD', 'LIFESPAN', 'CONTAMINATED', 'KILLED_BY_USER', 'MEMLIMIT', 'RUNLIMIT', 'SEE_MSG', 'UNKNOWN') DEFAULT NULL */
cause_of_death TEXT DEFAULT NULL, /* enum('NO_ROLE', 'NO_WORK', 'JOB_LIMIT', 'HIVE_OVERLOAD', 'LIFESPAN', 'CONTAMINATED', 'RELOCATED', 'KILLED_BY_USER', 'MEMLIMIT', 'RUNLIMIT', 'SEE_MSG', 'UNKNOWN') DEFAULT NULL */
log_dir VARCHAR(255) DEFAULT NULL
);
CREATE INDEX worker_analysis_id_status_idx ON worker (analysis_id, status);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment