long_mult_example_pipeline.txt 2.91 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
############################################################################################################################
#
#    Bio::EnsEMBL::Hive::RunnableDB::LongMult is an example eHive pipeline that demonstates the following features:
#
# A) A pipeline can have multiple analyses (this one has three: 'start', 'part_multiply' and 'add_together').
#
# B) A job of one analysis can create jobs of another analysis (one 'start' job creates up to 8 'part_multiply' jobs).
#
# C) A job of one analysis can "flow the data" into another analysis (a 'start' job "flows into" an 'add_together' job).
#
# D) Execution of one analysis can be blocked until all jobs of another analysis have been successfully completed
#    ('add_together' is blocked both by 'start' and 'part_multiply').
#
# E) As filesystems are frequently a bottleneck for big pipelines, it is advised that eHive processes store intermediate
#    and final results in a database (in this pipeline, 'intermediate_result' and 'final_result' tables are used).
#
############################################################################################################################

19 20
# 0. Cache MySQL connection parameters in a variable (they will work as eHive connection parameters as well) :
export MYCONN="--host=hostname --port=port_number --user=username --password=secret"
21 22 23
#
# also, set the ENS_CODE_ROOT to the directory where ensembl packages are installed:
export ENS_CODE_ROOT="$HOME/ensembl_main"
24

25
# 1. Create an empty database:
26 27
mysql $MYCONN -e 'DROP DATABASE IF EXISTS long_mult_test'
mysql $MYCONN -e 'CREATE DATABASE long_mult_test'
28 29

# 2. Create eHive infrastructure:
30
mysql $MYCONN long_mult_test <$ENS_CODE_ROOT/ensembl-hive/sql/tables.sql
31 32

# 3. Create analyses/control_rules/dataflow_rules of the LongMult pipeline:
33
mysql $MYCONN long_mult_test <$ENS_CODE_ROOT/ensembl-hive/sql/create_long_mult.sql
34 35

# 4. "Load" the pipeline with a multiplication task:
36
mysql $MYCONN long_mult_test <$ENS_CODE_ROOT/ensembl-hive/sql/load_long_mult.sql
37 38
#
# or you can add your own task(s). Several tasks can be added at once:
39
mysql $MYCONN long_mult_test <<EoF
40 41
INSERT INTO analysis_job (analysis_id, input_id) VALUES ( 1, "{ 'a_multiplier' => '9650516169', 'b_multiplier' => '327358788' }");
INSERT INTO analysis_job (analysis_id, input_id) VALUES ( 1, "{ 'a_multiplier' => '327358788', 'b_multiplier' => '9650516169' }");
42 43 44
EoF

# 5. Initialize the newly created eHive for the first time:
45
beekeeper.pl $MYCONN --database=long_mult_test -sync
46 47

# 6. You can either execute three individual workers (each picking one analysis of the pipeline):
48
runWorker.pl $MYCONN --database=long_mult_test
49 50 51
#
#
# ... or run an automatic loop that will run workers for you:
52
beekeeper.pl $MYCONN --database=long_mult_test -loop
53 54

# 7. The results of the computations are to be found in 'final_result' table:
55
mysql $MYCONN long_mult_test -e 'SELECT * FROM final_result'
56 57 58

# 8. You can add more multiplication tasks by repeating from step 4.