procedures.mysql 4.55 KB
Newer Older
1 2 3 4
-- ------------------------------------------------------------------------
--
-- Some stored functions, views and procedures used in hive:
--
5 6


7 8 9 10 11 12 13 14
-- show hive progress for analyses (turned into a view to give extra flexibility) ----------------
--
-- Thanks to Greg Jordan for the idea and the original version
--
-- Usage:
--       select * from progress;                                         # the whole table (may take ages to generate, depending on the size of your pipeline)
--       select * from progress where logic_name like 'family_blast%';   # only show family_blast-related analyses
--       select * from progress where retry_count>1;                     # only show jobs that have been tried more than once
15

16
CREATE OR REPLACE VIEW progress AS
17
    SELECT CONCAT( a.logic_name, '(', a.analysis_id, ')') analysis_name_and_id,
18 19 20 21 22 23 24 25
        MIN(rc.name) resource_class,
        j.status,
        j.retry_count,
        CASE WHEN j.status IS NULL THEN 0 ELSE count(*) END cnt,
        MIN(job_id) example_job_id
    FROM        analysis_base a
    LEFT JOIN   job j USING (analysis_id)
    LEFT JOIN   resource_class rc ON (a.resource_class_id=rc.resource_class_id)
26 27 28 29
    GROUP BY a.analysis_id, j.status, j.retry_count
    ORDER BY a.analysis_id, j.status;


30 31 32 33 34 35
-- a convenient view that also incorporates (otherwise redundant) analysis_id and logic_name ------------
--
-- Usage:
--       select * from msg;
--       select * from msg where analysis_id=18;
--       select * from msg where logic_name like 'family_blast%';
36 37 38

CREATE OR REPLACE VIEW msg AS
    SELECT a.analysis_id, a.logic_name, m.*
39
    FROM log_message m
40 41
    LEFT JOIN job j ON (j.job_id=m.job_id)
    LEFT JOIN analysis_base a ON (a.analysis_id=j.analysis_id);
42 43


44 45 46 47 48 49 50
#### time an analysis or group of analyses (given by a name pattern) ######################################
# You'll get better precision when the analyses involved have been done or failed rather than still running.
#
# Usage:
#       call time_analysis('%');                # time the whole pipeline
#       call time_analysis('load_uniprot%');    # time the group of analyses dealing with loading Uniprot members
#       call time_analysis('mcl');              # time one specific analysis
51 52

DROP PROCEDURE IF EXISTS time_analysis;
53 54
CREATE PROCEDURE time_analysis(IN param_logic_name_pattern char(64))
READS SQL DATA
55
    SELECT
Leo Gordon's avatar
Leo Gordon committed
56
        (UNIX_TIMESTAMP(max(last_check_in))-UNIX_TIMESTAMP(min(born)))/60 AS measured_in_minutes,
57 58
        (UNIX_TIMESTAMP(max(last_check_in))-UNIX_TIMESTAMP(min(born)))/3600 AS measured_in_hours,
        (UNIX_TIMESTAMP(max(last_check_in))-UNIX_TIMESTAMP(min(born)))/3600/24 AS measured_in_days
59
        FROM worker JOIN analysis_base USING (analysis_id)
60 61 62
        WHERE logic_name like param_logic_name_pattern;


63
#### searches for a given string in job.input_id or analysis_data.data, and returns the  matching jobs.
64 65 66 67 68 69
#
# Thanks to Greg Jordan for the idea and the original version
#
# Usage:
#       call job_search('other_415');           # return all jobs whose input_id or analysis_data match the pattern

70 71 72
DROP PROCEDURE IF EXISTS job_search;
CREATE PROCEDURE job_search(IN srch CHAR(40))
READS SQL DATA
73 74 75
  SELECT
    a.analysis_id,
    a.logic_name,
76
    j.job_id AS job_id,
77 78 79
    j.status,
    j.retry_count,
    IFNULL(d.data, j.input_id) input_id
80
  FROM job j JOIN analysis_base a USING (analysis_id)
81
    LEFT JOIN analysis_data d ON j.input_id=concat('_ext_input_analysis_data_id ',d.analysis_data_id)
82 83
  WHERE j.input_id LIKE concat('%',srch,'%') OR d.data LIKE concat('%',srch,'%');

84

85
############## reset failed jobs for analysis #############################################
86 87 88
#
# Usage:
#       call reset_failed_jobs_for_analysis('load_uniprot');    # reset failed jobs of this particular analysis
89 90 91

DROP PROCEDURE IF EXISTS reset_failed_jobs_for_analysis;
CREATE PROCEDURE reset_failed_jobs_for_analysis(IN param_logic_name char(64))
92
MODIFIES SQL DATA
93
    UPDATE job j JOIN analysis_base a USING (analysis_id)
94
    SET j.status='READY', j.retry_count=0
95 96 97
    WHERE a.logic_name=param_logic_name
    AND   j.status='FAILED';

98

99
############## drop hive tables ###########################################################
100 101 102
#
# Usage:
#       call drop_hive_tables;      # just drop them all
103 104

DROP PROCEDURE IF EXISTS drop_hive_tables;
105
DELIMITER //
106
CREATE PROCEDURE drop_hive_tables()
107
MODIFIES SQL DATA
108 109
BEGIN
    DROP VIEW IF EXISTS msg, progress;
110
    DROP TABLE IF EXISTS monitor, analysis_stats_monitor, resource_description, analysis_data, job_file, dataflow_rule, analysis_ctrl_rule, analysis_stats, log_message, accu, job, worker, analysis_base, resource_class, hive_meta;
111 112
END; //
DELIMITER ;
113