procedures.mysql 7.41 KB
Newer Older
1 2 3 4 5 6 7 8 9
/*

DESCRIPTION

    Some stored functions, views and procedures used in eHive


LICENSE

10
    Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
nwillhoft's avatar
nwillhoft committed
11
    Copyright [2016-2021] EMBL-European Bioinformatics Institute
12 13 14 15 16 17 18 19 20 21

    Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

         http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software distributed under the License
    is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and limitations under the License.

22 23 24 25
CONTACT

    Please subscribe to the Hive mailing list:  http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users  to discuss Hive-related questions or to be notified of our updates

26
*/
27 28


29 30 31 32 33 34 35 36
-- show hive progress for analyses (turned into a view to give extra flexibility) ----------------
--
-- Thanks to Greg Jordan for the idea and the original version
--
-- Usage:
--       select * from progress;                                         # the whole table (may take ages to generate, depending on the size of your pipeline)
--       select * from progress where logic_name like 'family_blast%';   # only show family_blast-related analyses
--       select * from progress where retry_count>1;                     # only show jobs that have been tried more than once
37

38
CREATE OR REPLACE VIEW progress AS
39
    SELECT CONCAT( a.logic_name, '(', a.analysis_id, ')') analysis_name_and_id,
40 41 42 43 44 45 46 47
        MIN(rc.name) resource_class,
        j.status,
        j.retry_count,
        CASE WHEN j.status IS NULL THEN 0 ELSE count(*) END cnt,
        MIN(job_id) example_job_id
    FROM        analysis_base a
    LEFT JOIN   job j USING (analysis_id)
    LEFT JOIN   resource_class rc ON (a.resource_class_id=rc.resource_class_id)
48 49 50 51
    GROUP BY a.analysis_id, j.status, j.retry_count
    ORDER BY a.analysis_id, j.status;


52 53 54 55 56 57
-- a convenient view that also incorporates (otherwise redundant) analysis_id and logic_name ------------
--
-- Usage:
--       select * from msg;
--       select * from msg where analysis_id=18;
--       select * from msg where logic_name like 'family_blast%';
58 59 60

CREATE OR REPLACE VIEW msg AS
    SELECT a.analysis_id, a.logic_name, m.*
61
    FROM log_message m
62 63
    LEFT JOIN role USING (role_id)
    LEFT JOIN analysis_base a USING (analysis_id);
64 65


66 67 68 69 70 71 72
-- show statistics of Workers' real resource usage by analysis -------------------------------------------
--
-- Usage:
--       select * from resource_usage_stats;
--       select * from resource_usage_stats where logic_name like 'family_blast%';

CREATE OR REPLACE VIEW resource_usage_stats AS
73
    SELECT CONCAT(a.logic_name,'(',a.analysis_id,')') analysis,
74 75
           w.meadow_type,
           CONCAT(rc.name,'(',rc.resource_class_id,')') resource_class,
76
           u.exit_status,
77
           count(*) workers,
78 79
           min(mem_megs) AS min_mem_megs, round(avg(mem_megs),2) AS avg_mem_megs, max(mem_megs) AS max_mem_megs,
           min(swap_megs) AS min_swap_megs, round(avg(swap_megs),2) AS avg_swap_megs, max(swap_megs) AS max_swap_megs
80 81 82 83
    FROM resource_class rc
    JOIN analysis_base a USING(resource_class_id)
    LEFT JOIN role r USING(analysis_id)
    LEFT JOIN worker w USING(worker_id)
84
    LEFT JOIN worker_resource_usage u USING (worker_id)
85 86
    GROUP BY a.analysis_id, w.meadow_type, rc.resource_class_id, u.exit_status
    ORDER BY a.analysis_id, w.meadow_type, rc.resource_class_id, u.exit_status;
87 88


89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
-- show the roles that are currently live (grouped by meadow_users, resource_classes and analyses) -------
--
-- Usage:
--       select * from live_roles;
--       select * from live_roles where resource_class_id=12;

CREATE OR REPLACE VIEW live_roles AS
    SELECT w.meadow_user, w.meadow_type, w.resource_class_id, rc.name resource_class_name, r.analysis_id, a.logic_name, count(*)
    FROM worker w
    JOIN role r USING(worker_id)
    LEFT JOIN resource_class rc ON w.resource_class_id=rc.resource_class_id
    LEFT JOIN analysis_base a USING(analysis_id)
    WHERE r.when_finished IS NULL
    GROUP BY w.meadow_user, w.meadow_type, w.resource_class_id, rc.name, r.analysis_id, a.logic_name;


105 106 107 108 109 110
-- time an analysis or group of analyses (given by a name pattern) ----------------------------------------
--
-- Usage:
--       call time_analysis('%');                # time the whole pipeline
--       call time_analysis('load_uniprot%');    # time the group of analyses dealing with loading Uniprot members
--       call time_analysis('mcl');              # time one specific analysis
111 112

DROP PROCEDURE IF EXISTS time_analysis;
113
CREATE PROCEDURE time_analysis(IN analyses_pattern CHAR(64))
114
READS SQL DATA
115
    SELECT
116 117 118 119
        COUNT(*)-COUNT(when_finished) AS still_running,
        (UNIX_TIMESTAMP(CASE WHEN COUNT(*)>COUNT(when_finished) THEN CURRENT_TIMESTAMP ELSE max(when_finished) END) - UNIX_TIMESTAMP(min(when_started)))/60 AS measured_in_minutes,
        (UNIX_TIMESTAMP(CASE WHEN COUNT(*)>COUNT(when_finished) THEN CURRENT_TIMESTAMP ELSE max(when_finished) END) - UNIX_TIMESTAMP(min(when_started)))/3600 AS measured_in_hours,
        (UNIX_TIMESTAMP(CASE WHEN COUNT(*)>COUNT(when_finished) THEN CURRENT_TIMESTAMP ELSE max(when_finished) END) - UNIX_TIMESTAMP(min(when_started)))/3600/24 AS measured_in_days
120
        FROM role JOIN analysis_base USING (analysis_id)
121
        WHERE logic_name LIKE analyses_pattern;
122 123


124
#### searches for a given string in job.input_id or analysis_data.data, and returns the  matching jobs.
125 126 127 128 129 130
#
# Thanks to Greg Jordan for the idea and the original version
#
# Usage:
#       call job_search('other_415');           # return all jobs whose input_id or analysis_data match the pattern

131 132 133
DROP PROCEDURE IF EXISTS job_search;
CREATE PROCEDURE job_search(IN srch CHAR(40))
READS SQL DATA
134 135 136
  SELECT
    a.analysis_id,
    a.logic_name,
137
    j.job_id AS job_id,
138 139 140
    j.status,
    j.retry_count,
    IFNULL(d.data, j.input_id) input_id
141
  FROM job j JOIN analysis_base a USING (analysis_id)
142
    LEFT JOIN analysis_data d ON j.input_id=concat('_ext_input_analysis_data_id ',d.analysis_data_id)
143 144
  WHERE j.input_id LIKE concat('%',srch,'%') OR d.data LIKE concat('%',srch,'%');

145

146
############## reset failed jobs for analysis #############################################
147 148 149
#
# Usage:
#       call reset_failed_jobs_for_analysis('load_uniprot');    # reset failed jobs of this particular analysis
150 151 152

DROP PROCEDURE IF EXISTS reset_failed_jobs_for_analysis;
CREATE PROCEDURE reset_failed_jobs_for_analysis(IN param_logic_name char(64))
153
MODIFIES SQL DATA
154
    UPDATE job j JOIN analysis_base a USING (analysis_id)
155
    SET j.status='READY', j.retry_count=0
156 157 158
    WHERE a.logic_name=param_logic_name
    AND   j.status='FAILED';

159

160
############## drop hive tables ###########################################################
161 162 163
#
# Usage:
#       call drop_hive_tables;      # just drop them all
164 165

DROP PROCEDURE IF EXISTS drop_hive_tables;
166
DELIMITER //
167
CREATE PROCEDURE drop_hive_tables()
168
MODIFIES SQL DATA
169
BEGIN
170
    DROP VIEW IF EXISTS msg, progress, resource_usage_stats, live_roles;
171
    DROP TABLE IF EXISTS pipeline_wide_parameters, analysis_stats_monitor, worker_resource_usage, resource_description, analysis_data, job_file, dataflow_target, dataflow_rule, analysis_ctrl_rule, analysis_stats, log_message, accu, job, role, worker, analysis_base, resource_class, hive_meta;
172 173
END; //
DELIMITER ;
174