Commit 6726ec54 authored by Leo Gordon's avatar Leo Gordon
Browse files

schema_change: switching to using worker_resource_usage table for all Meadows

parent 61069ad7
......@@ -60,42 +60,6 @@ sub main {
my $dbc = $hive_dba->dbc();
warn "Creating the 'lsf_report' table if it doesn't exist...\n";
$dbc->do (qq{
CREATE TABLE IF NOT EXISTS lsf_report (
meadow_name varchar(255) NOT NULL,
process_id varchar(255) NOT NULL,
status varchar(20) NOT NULL,
mem_megs float NOT NULL,
swap_megs float NOT NULL,
pending_sec integer NOT NULL,
cpu_sec float NOT NULL,
lifespan_sec integer NOT NULL,
exception_status varchar(40) NOT NULL,
PRIMARY KEY (meadow_name,process_id),
KEY process_id_idx (process_id)
) ENGINE=InnoDB;
});
warn "Creating the 'lsf_usage' view if it doesn't exist...\n";
$dbc->do (qq{
CREATE OR REPLACE VIEW lsf_usage AS
SELECT CONCAT(logic_name,'(',analysis_id,')') analysis,
CONCAT(rc.name,'(',rc.resource_class_id,')') resource_class,
count(*) workers,
min(mem_megs), avg(mem_megs), max(mem_megs),
min(swap_megs), avg(swap_megs), max(swap_megs)
FROM analysis_base
JOIN resource_class rc USING(resource_class_id)
LEFT JOIN worker w USING(analysis_id)
LEFT JOIN lsf_report USING (meadow_name, process_id)
WHERE w.meadow_type='LSF'
GROUP BY analysis_id
ORDER BY analysis_id;
});
my $this_lsf_farm = Bio::EnsEMBL::Hive::Meadow::LSF::name();
die "Cannot find the name of the current farm.\n" unless $this_lsf_farm;
......@@ -144,8 +108,10 @@ sub main {
warn 'Will run the following command to obtain '.($tee ? 'and dump ' : '')."bacct information: '$bacct_source_line' (may take a few minutes)\n";
}
my $processid_2_workerid = $hive_dba->get_WorkerAdaptor()->fetch_by_meadow_type_AND_meadow_name_HASHED_FROM_process_id_TO_worker_id( 'LSF', $this_lsf_farm );
my $sth_replace = $dbc->prepare( 'REPLACE INTO lsf_report (meadow_name, process_id, status, mem_megs, swap_megs, pending_sec, cpu_sec, lifespan_sec, exception_status) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)' );
my $sth_replace = $dbc->prepare( 'REPLACE INTO worker_resource_usage (worker_id, exit_status, mem_megs, swap_megs, pending_sec, cpu_sec, lifespan_sec, exception_status) VALUES (?, ?, ?, ?, ?, ?, ?, ?)' );
{
local $/ = "------------------------------------------------------------------------------\n\n";
my %units_converter = ( 'K' => 1.0/1024, 'M' => 1, 'G' => 1024, 'T' => 1024*1024 );
......@@ -180,7 +146,12 @@ sub main {
my $swap_megs = $swap_in_units * $units_converter{$swap_unit};
#warn join(', ', map {sprintf('%s=%s', $_, $usage{$_})} (sort keys %usage)), "\n";
$sth_replace->execute( $this_lsf_farm, $process_id, $usage{STATUS}, $mem_megs, $swap_megs, $usage{WAIT}, $usage{CPU_T}, $usage{TURNAROUND}, $exception_status );
if( my $worker_id = $processid_2_workerid->{$process_id} ) {
$sth_replace->execute( $worker_id, $usage{STATUS}, $mem_megs, $swap_megs, $usage{WAIT}, $usage{CPU_T}, $usage{TURNAROUND}, $exception_status );
} else {
warn "\tDiscarding process_id=$process_id as probably not ours because it could not be mapped to a Worker\n";
}
}
}
......
......@@ -43,6 +43,7 @@ ALTER TABLE worker ADD FOREIGN KEY (resource_class_id)
ALTER TABLE job ADD FOREIGN KEY (worker_id) REFERENCES worker(worker_id) ON DELETE CASCADE;
ALTER TABLE log_message ADD FOREIGN KEY (worker_id) REFERENCES worker(worker_id) ON DELETE CASCADE;
ALTER TABLE job_file ADD FOREIGN KEY (worker_id) REFERENCES worker(worker_id) ON DELETE CASCADE;
ALTER TABLE worker_resource_usage ADD FOREIGN KEY (worker_id) REFERENCES worker(worker_id) ON DELETE CASCADE;
ALTER TABLE job ADD FOREIGN KEY (prev_job_id) REFERENCES job(job_id) ON DELETE CASCADE;
ALTER TABLE job ADD FOREIGN KEY (semaphored_job_id) REFERENCES job(job_id) ON DELETE CASCADE;
......
-- add a new meadow-agnostic table for tracking the resource usage:
CREATE TABLE worker_resource_usage (
worker_id INTEGER NOT NULL,
exit_status VARCHAR(255) DEFAULT NULL,
mem_megs FLOAT DEFAULT NULL,
swap_megs FLOAT DEFAULT NULL,
pending_sec INTEGER DEFAULT NULL,
cpu_sec FLOAT DEFAULT NULL,
lifespan_sec INTEGER DEFAULT NULL,
exception_status VARCHAR(255) DEFAULT NULL,
PRIMARY KEY (worker_id)
) COLLATE=latin1_swedish_ci ENGINE=InnoDB;
-- add a foreign key:
ALTER TABLE worker_resource_usage ADD FOREIGN KEY (worker_id) REFERENCES worker(worker_id) ON DELETE CASCADE;
-- add a stats view over the new table:
CREATE OR REPLACE VIEW resource_usage_stats AS
SELECT CONCAT(logic_name,'(',analysis_id,')') analysis,
w.meadow_type,
CONCAT(rc.name,'(',rc.resource_class_id,')') resource_class,
count(*) workers,
min(mem_megs), avg(mem_megs), max(mem_megs),
min(swap_megs), avg(swap_megs), max(swap_megs)
FROM analysis_base
JOIN resource_class rc USING(resource_class_id)
LEFT JOIN worker w USING(analysis_id)
LEFT JOIN worker_resource_usage USING (worker_id)
GROUP BY analysis_id, w.meadow_type
ORDER BY analysis_id, w.meadow_type;
-- add a new key to worker table to speed up mapping between process_id and worker_id:
ALTER TABLE worker ADD KEY meadow_process (meadow_type, meadow_name, process_id);
-- UPDATE hive_sql_schema_version
UPDATE hive_meta SET meta_value=59 WHERE meta_key='hive_sql_schema_version' AND meta_value='58';
-- add a new meadow-agnostic table for tracking the resource usage:
CREATE TABLE worker_resource_usage (
worker_id INTEGER NOT NULL,
exit_status VARCHAR(255) DEFAULT NULL,
mem_megs FLOAT DEFAULT NULL,
swap_megs FLOAT DEFAULT NULL,
pending_sec INTEGER DEFAULT NULL,
cpu_sec FLOAT DEFAULT NULL,
lifespan_sec INTEGER DEFAULT NULL,
exception_status VARCHAR(255) DEFAULT NULL,
PRIMARY KEY (worker_id)
);
-- add a foreign key:
ALTER TABLE worker_resource_usage ADD FOREIGN KEY (worker_id) REFERENCES worker(worker_id) ON DELETE CASCADE;
-- add a stats view over the new table:
CREATE OR REPLACE VIEW resource_usage_stats AS
SELECT a.logic_name || '(' || a.analysis_id || ')' analysis,
w.meadow_type,
rc.name || '(' || rc.resource_class_id || ')' resource_class,
count(*) workers,
min(mem_megs) AS min_mem_megs, avg(mem_megs) AS avg_mem_megs, max(mem_megs) AS max_mem_megs,
min(swap_megs) AS min_swap_megs, avg(swap_megs) AS avg_swap_megs, max(swap_megs) AS max_swap_megs
FROM analysis_base a
JOIN resource_class rc USING(resource_class_id)
LEFT JOIN worker w USING(analysis_id)
LEFT JOIN worker_resource_usage USING (worker_id)
GROUP BY analysis_id, w.meadow_type, rc.resource_class_id
ORDER BY analysis_id, w.meadow_type;
-- add a new key to worker table to speed up mapping between process_id and worker_id:
CREATE INDEX ON worker (meadow_type, meadow_name, process_id);
-- UPDATE hive_sql_schema_version
UPDATE hive_meta SET meta_value=59 WHERE meta_key='hive_sql_schema_version' AND meta_value='58';
-- add a new meadow-agnostic table for tracking the resource usage:
CREATE TABLE worker_resource_usage (
worker_id INTEGER NOT NULL,
exit_status VARCHAR(255) DEFAULT NULL,
mem_megs FLOAT DEFAULT NULL,
swap_megs FLOAT DEFAULT NULL,
pending_sec INTEGER DEFAULT NULL,
cpu_sec FLOAT DEFAULT NULL,
lifespan_sec INTEGER DEFAULT NULL,
exception_status VARCHAR(255) DEFAULT NULL,
PRIMARY KEY (worker_id)
);
-- add a stats view over the new table:
CREATE VIEW IF NOT EXISTS resource_usage_stats AS
SELECT a.logic_name || '(' || a.analysis_id || ')' analysis,
w.meadow_type,
rc.name || '(' || rc.resource_class_id || ')' resource_class,
count(*) workers,
min(mem_megs) AS min_mem_megs, avg(mem_megs) AS avg_mem_megs, max(mem_megs) AS max_mem_megs,
min(swap_megs) AS min_swap_megs, avg(swap_megs) AS avg_swap_megs, max(swap_megs) AS max_swap_megs
FROM resource_class rc
JOIN analysis_base a USING(resource_class_id)
LEFT JOIN worker w USING(analysis_id)
LEFT JOIN worker_resource_usage USING (worker_id)
GROUP BY analysis_id, w.meadow_type, rc.resource_class_id
ORDER BY analysis_id, w.meadow_type;
-- add a new key to worker table to speed up mapping between process_id and worker_id:
CREATE INDEX meadow_process ON worker (meadow_type, meadow_name, process_id);
-- UPDATE hive_sql_schema_version
UPDATE hive_meta SET meta_value=59 WHERE meta_key='hive_sql_schema_version' AND meta_value='58';
......@@ -62,6 +62,27 @@ CREATE OR REPLACE VIEW msg AS
LEFT JOIN analysis_base a ON (a.analysis_id=j.analysis_id);
-- show statistics of Workers' real resource usage by analysis -------------------------------------------
--
-- Usage:
-- select * from resource_usage_stats;
-- select * from resource_usage_stats where logic_name like 'family_blast%';
CREATE OR REPLACE VIEW resource_usage_stats AS
SELECT CONCAT(logic_name,'(',analysis_id,')') analysis,
w.meadow_type,
CONCAT(rc.name,'(',rc.resource_class_id,')') resource_class,
count(*) workers,
min(mem_megs), avg(mem_megs), max(mem_megs),
min(swap_megs), avg(swap_megs), max(swap_megs)
FROM analysis_base
JOIN resource_class rc USING(resource_class_id)
LEFT JOIN worker w USING(analysis_id)
LEFT JOIN worker_resource_usage USING (worker_id)
GROUP BY analysis_id, w.meadow_type
ORDER BY analysis_id, w.meadow_type;
#### time an analysis or group of analyses (given by a name pattern) ######################################
# You'll get better precision when the analyses involved have been done or failed rather than still running.
#
......
......@@ -61,3 +61,24 @@ CREATE OR REPLACE VIEW msg AS
LEFT JOIN job j ON (j.job_id=m.job_id)
LEFT JOIN analysis_base a ON (a.analysis_id=j.analysis_id);
-- show statistics of Workers' real resource usage by analysis -------------------------------------------
--
-- Usage:
-- select * from resource_usage_stats;
-- select * from resource_usage_stats where logic_name like 'family_blast%';
CREATE OR REPLACE VIEW resource_usage_stats AS
SELECT a.logic_name || '(' || a.analysis_id || ')' analysis,
w.meadow_type,
rc.name || '(' || rc.resource_class_id || ')' resource_class,
count(*) workers,
min(mem_megs) AS min_mem_megs, avg(mem_megs) AS avg_mem_megs, max(mem_megs) AS max_mem_megs,
min(swap_megs) AS min_swap_megs, avg(swap_megs) AS avg_swap_megs, max(swap_megs) AS max_swap_megs
FROM analysis_base a
JOIN resource_class rc USING(resource_class_id)
LEFT JOIN worker w USING(analysis_id)
LEFT JOIN worker_resource_usage USING (worker_id)
GROUP BY analysis_id, w.meadow_type, rc.resource_class_id
ORDER BY analysis_id, w.meadow_type;
......@@ -61,3 +61,24 @@ CREATE VIEW IF NOT EXISTS msg AS
LEFT JOIN job j ON (j.job_id=m.job_id)
LEFT JOIN analysis_base a ON (a.analysis_id=j.analysis_id);
-- show statistics of Workers' real resource usage by analysis -------------------------------------------
--
-- Usage:
-- select * from resource_usage_stats;
-- select * from resource_usage_stats where logic_name like 'family_blast%';
CREATE VIEW IF NOT EXISTS resource_usage_stats AS
SELECT a.logic_name || '(' || a.analysis_id || ')' analysis,
w.meadow_type,
rc.name || '(' || rc.resource_class_id || ')' resource_class,
count(*) workers,
min(mem_megs) AS min_mem_megs, avg(mem_megs) AS avg_mem_megs, max(mem_megs) AS max_mem_megs,
min(swap_megs) AS min_swap_megs, avg(swap_megs) AS avg_swap_megs, max(swap_megs) AS max_swap_megs
FROM resource_class rc
JOIN analysis_base a USING(resource_class_id)
LEFT JOIN worker w USING(analysis_id)
LEFT JOIN worker_resource_usage USING (worker_id)
GROUP BY analysis_id, w.meadow_type, rc.resource_class_id
ORDER BY analysis_id, w.meadow_type;
......@@ -470,7 +470,8 @@ CREATE TABLE worker (
cause_of_death ENUM('NO_ROLE', 'NO_WORK', 'JOB_LIMIT', 'HIVE_OVERLOAD', 'LIFESPAN', 'CONTAMINATED', 'RELOCATED', 'KILLED_BY_USER', 'MEMLIMIT', 'RUNLIMIT', 'SEE_MSG', 'UNKNOWN') DEFAULT NULL,
log_dir VARCHAR(255) DEFAULT NULL,
KEY analysis_status (analysis_id, status)
KEY analysis_status (analysis_id, status),
KEY meadow_process (meadow_type, meadow_name, process_id)
) COLLATE=latin1_swedish_ci ENGINE=InnoDB;
......@@ -479,10 +480,41 @@ CREATE TABLE worker (
@colour #F4D20C
*/
/**
@table worker_resource_usage
@colour #F4D20C
@desc A table with post-mortem resource usage statistics of a Worker.
@column worker_id links to the worker table
@column exit_status meadow-dependent, in case of LSF it's usually 'done' (normal) or 'exit' (abnormal)
@column mem_megs how much memory the Worker process used
@column swap_megs how much swap the Worker process used
@column pending_sec time spent by the process in the queue before it became a Worker
@column cpu_sec cpu time used by the Worker process
@column lifespan_sec walltime used by the Worker process
@column exception_status meadow-specific flags, in case of LSF it can be 'underrun', 'overrun' or 'idle'
*/
CREATE TABLE worker_resource_usage (
worker_id INTEGER NOT NULL,
exit_status VARCHAR(255) DEFAULT NULL,
mem_megs FLOAT DEFAULT NULL,
swap_megs FLOAT DEFAULT NULL,
pending_sec INTEGER DEFAULT NULL,
cpu_sec FLOAT DEFAULT NULL,
lifespan_sec INTEGER DEFAULT NULL,
exception_status VARCHAR(255) DEFAULT NULL,
PRIMARY KEY (worker_id)
) COLLATE=latin1_swedish_ci ENGINE=InnoDB;
/**
@table log_message
@colour #08DAD8
@colour #F4D20C
@desc When a Job or a job-less Worker (job_id=NULL) throws a "die" message
for any reason, the message is recorded in this table.
......
......@@ -483,10 +483,41 @@ CREATE INDEX ON worker (analysis_id, status);
@colour #F4D20C
*/
/**
@table worker_resource_usage
@colour #F4D20C
@desc A table with post-mortem resource usage statistics of a Worker.
@column worker_id links to the worker table
@column exit_status meadow-dependent, in case of LSF it's usually 'done' (normal) or 'exit' (abnormal)
@column mem_megs how much memory the Worker process used
@column swap_megs how much swap the Worker process used
@column pending_sec time spent by the process in the queue before it became a Worker
@column cpu_sec cpu time used by the Worker process
@column lifespan_sec walltime used by the Worker process
@column exception_status meadow-specific flags, in case of LSF it can be 'underrun', 'overrun' or 'idle'
*/
CREATE TABLE worker_resource_usage (
worker_id INTEGER NOT NULL,
exit_status VARCHAR(255) DEFAULT NULL,
mem_megs FLOAT DEFAULT NULL,
swap_megs FLOAT DEFAULT NULL,
pending_sec INTEGER DEFAULT NULL,
cpu_sec FLOAT DEFAULT NULL,
lifespan_sec INTEGER DEFAULT NULL,
exception_status VARCHAR(255) DEFAULT NULL,
PRIMARY KEY (worker_id)
);
/**
@table log_message
@colour #08DAD8
@colour #F4D20C
@desc When a Job or a job-less Worker (job_id=NULL) throws a "die" message
for any reason, the message is recorded in this table.
......
......@@ -473,10 +473,41 @@ CREATE INDEX worker_analysis_id_status_idx ON worker (analysis_id, status
@colour #F4D20C
*/
/**
@table worker_resource_usage
@colour #F4D20C
@desc A table with post-mortem resource usage statistics of a Worker.
@column worker_id links to the worker table
@column exit_status meadow-dependent, in case of LSF it's usually 'done' (normal) or 'exit' (abnormal)
@column mem_megs how much memory the Worker process used
@column swap_megs how much swap the Worker process used
@column pending_sec time spent by the process in the queue before it became a Worker
@column cpu_sec cpu time used by the Worker process
@column lifespan_sec walltime used by the Worker process
@column exception_status meadow-specific flags, in case of LSF it can be 'underrun', 'overrun' or 'idle'
*/
CREATE TABLE worker_resource_usage (
worker_id INTEGER NOT NULL,
exit_status VARCHAR(255) DEFAULT NULL,
mem_megs FLOAT DEFAULT NULL,
swap_megs FLOAT DEFAULT NULL,
pending_sec INTEGER DEFAULT NULL,
cpu_sec FLOAT DEFAULT NULL,
lifespan_sec INTEGER DEFAULT NULL,
exception_status VARCHAR(255) DEFAULT NULL,
PRIMARY KEY (worker_id)
);
/**
@table log_message
@colour #08DAD8
@colour #F4D20C
@desc When a Job or a job-less Worker (job_id=NULL) throws a "die" message
for any reason, the message is recorded in this table.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment