Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
ensembl-gh-mirror
ensembl-hive
Commits
df8280ab
Commit
df8280ab
authored
Aug 13, 2010
by
Leo Gordon
Browse files
default behaviour on whether to retry failing jobs by default or not is now centrally-controllable
parent
ca8a48cb
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
49 additions
and
26 deletions
+49
-26
modules/Bio/EnsEMBL/Hive/Worker.pm
modules/Bio/EnsEMBL/Hive/Worker.pm
+14
-1
scripts/beekeeper.pl
scripts/beekeeper.pl
+15
-11
scripts/runWorker.pl
scripts/runWorker.pl
+20
-14
No files found.
modules/Bio/EnsEMBL/Hive/Worker.pm
View file @
df8280ab
...
...
@@ -290,6 +290,14 @@ sub last_check_in {
return
$self
->
{'
_last_check_in
'};
}
# this is a setter/getter that defines default behaviour when a job throws: should it be retried or not?
sub
retry_throwing_jobs
{
my
(
$self
,
$value
)
=
@_
;
$self
->
{'
_retry_throwing_jobs
'}
=
$value
if
(
$value
);
return
$self
->
{'
_retry_throwing_jobs
'}
||
0
;
}
=head2 hive_output_dir
Arg [1] : (optional) string directory path
...
...
@@ -515,7 +523,12 @@ sub run
my
$job_status_when_died
=
$job
->
status
();
warn
"
Job with id=
$job_id
died in status '
$job_status_when_died
' for the following reason:
$error_msg
\n
";
$self
->
db
()
->
get_JobErrorAdaptor
()
->
register_error
(
$job_id
,
$error_msg
);
if
(
$job
->
transient_error
)
{
# If the job specifically said what to do next, respect that last wish.
# Otherwise follow the default behaviour set by the beekeeper in $worker:
#
my
$attempt_to_retry_this_job
=
defined
(
$job
->
transient_error
)
?
$job
->
transient_error
:
$self
->
retry_throwing_jobs
;
if
(
$attempt_to_retry_this_job
)
{
$job
->
adaptor
->
reset_dead_job_by_dbID
(
$job_id
);
}
else
{
$job
->
update_status
('
FAILED
');
...
...
scripts/beekeeper.pl
View file @
df8280ab
...
...
@@ -61,6 +61,7 @@ sub main {
$self
->
{'
verbose_stats
'}
=
1
;
$self
->
{'
reg_name
'}
=
'
hive
';
$self
->
{'
maximise_concurrency
'}
=
0
;
$self
->
{'
retry_throwing_jobs
'}
=
undef
;
$self
->
{'
hive_output_dir
'}
=
undef
;
GetOptions
(
...
...
@@ -94,8 +95,9 @@ sub main {
'
batch_size=i
'
=>
\
$self
->
{'
batch_size
'},
'
lifespan=i
'
=>
\
$self
->
{'
lifespan
'},
'
logic_name=s
'
=>
\
$self
->
{'
logic_name
'},
'
maximise_concurrency
'
=>
\
$self
->
{'
maximise_concurrency
'},
'
hive_output_dir=s
'
=>
\
$self
->
{'
hive_output_dir
'},
'
maximise_concurrency=i
'
=>
\
$self
->
{'
maximise_concurrency
'},
'
retry_throwing_jobs=i
'
=>
\
$self
->
{'
retry_throwing_jobs
'},
# other commands/options
'
h|help
'
=>
\
$help
,
...
...
@@ -318,11 +320,12 @@ sub generate_worker_cmd {
if
(
$self
->
{'
run_job_id
'})
{
$worker_cmd
.=
"
-job_id
"
.
$self
->
{'
run_job_id
'};
}
else
{
$worker_cmd
.=
(
(
defined
$self
->
{'
job_limit
'})
?
('
-limit
'
.
$self
->
{'
job_limit
'}
)
:
'')
.
(
(
defined
$self
->
{'
batch_size
'})
?
('
-batch_size
'
.
$self
->
{'
batch_size
'}
)
:
'')
.
(
(
defined
$self
->
{'
lifespan
'})
?
('
-lifespan
'
.
$self
->
{'
lifespan
'}
)
:
'')
.
(
(
defined
$self
->
{'
logic_name
'})
?
('
-logic_name
'
.
$self
->
{'
logic_name
'}
)
:
'')
$worker_cmd
.=
(
defined
(
$self
->
{'
job_limit
'})
?
"
-limit
$self
->{'job_limit'}
"
:
'')
.
(
defined
(
$self
->
{'
batch_size
'})
?
"
-batch_size
$self
->{'batch_size'}
"
:
'')
.
(
defined
(
$self
->
{'
lifespan
'})
?
"
-lifespan
$self
->{'lifespan'}
"
:
'')
.
(
defined
(
$self
->
{'
logic_name
'})
?
"
-logic_name
$self
->{'logic_name'}
"
:
'')
.
(
$self
->
{'
maximise_concurrency
'}
?
'
-maximise_concurrency 1
'
:
'')
.
(
defined
(
$self
->
{'
retry_throwing_jobs
'})
?
"
-retry_throwing_jobs
$self
->{'retry_throwing_jobs'}
"
:
'')
.
(
$self
->
{'
hive_output_dir
'}
?
"
-hive_output_dir
$self
->{'hive_output_dir'}
"
:
'');
}
...
...
@@ -500,12 +503,13 @@ __DATA__
=head2 Worker control
-jlimit <num> : #jobs to run before worker can die naturally
-batch_size <num> : #jobs a worker can claim at once
-lifespan <num> : lifespan limit for each worker
-logic_name <string> : restrict the pipeline stat/runs to this analysis logic_name
-maximise_concurrency 1 : try to run more different analyses at the same time
-hive_output_dir <path> : directory where stdout/stderr of the hive is redirected
-jlimit <num> : #jobs to run before worker can die naturally
-batch_size <num> : #jobs a worker can claim at once
-lifespan <num> : lifespan limit for each worker
-logic_name <string> : restrict the pipeline stat/runs to this analysis logic_name
-maximise_concurrency 1 : try to run more different analyses at the same time
-retry_throwing_jobs 0|1 : if a job dies *knowingly*, should we retry it by default?
-hive_output_dir <path> : directory where stdout/stderr of the hive is redirected
=head2 Other commands/options
...
...
scripts/runWorker.pl
View file @
df8280ab
...
...
@@ -40,6 +40,7 @@ $self->{'process_id'} = undef;
$self
->
{'
debug
'}
=
undef
;
$self
->
{'
no_write
'}
=
undef
;
$self
->
{'
maximise_concurrency
'}
=
undef
;
$self
->
{'
retry_throwing_jobs
'}
=
undef
;
my
$conf_file
;
my
(
$help
,
$adaptor
,
$url
);
...
...
@@ -75,7 +76,8 @@ GetOptions(
'
analysis_stats
'
=>
\
$self
->
{'
show_analysis_stats
'},
'
no_write
'
=>
\
$self
->
{'
no_write
'},
'
nowrite
'
=>
\
$self
->
{'
no_write
'},
'
maximise_concurrency
'
=>
\
$self
->
{'
maximise_concurrency
'},
'
maximise_concurrency=i
'
=>
\
$self
->
{'
maximise_concurrency
'},
'
retry_throwing_jobs=i
'
=>
\
$self
->
{'
retry_throwing_jobs
'},
# Other commands
'
h|help
'
=>
\
$help
,
...
...
@@ -191,6 +193,9 @@ if($self->{'lifespan'}) {
if
(
$self
->
{'
no_global_cleanup
'})
{
$worker
->
perform_global_cleanup
(
0
);
}
if
(
defined
$self
->
{'
retry_throwing_jobs
'})
{
$worker
->
retry_throwing_jobs
(
$self
->
{'
retry_throwing_jobs
'});
}
$worker
->
print_worker
();
...
...
@@ -306,19 +311,20 @@ __DATA__
=head2 Job/Analysis control parameters:
-analysis_id <id> : analysis_id in db
-logic_name <string> : logic_name of analysis to make this worker
-batch_size <num> : #jobs to claim at a time
-limit <num> : #jobs to run before worker can die naturally
-lifespan <num> : number of minutes this worker is allowed to run
-hive_output_dir <path> : directory where stdout/stderr of the hive is redirected
-bk <string> : beekeeper identifier (deprecated and ignored)
-pid <string> : externally set process_id descriptor (e.g. lsf job_id, array_id)
-input_id <string> : test input_id on specified analysis (analysis_id or logic_name)
-job_id <id> : run specific job defined by analysis_job_id
-analysis_stats : show status of each analysis in hive
-no_cleanup : don't perform global_cleanup when worker exits
-no_write : don't write_output or auto_dataflow input_job
-analysis_id <id> : analysis_id in db
-logic_name <string> : logic_name of analysis to make this worker
-batch_size <num> : #jobs to claim at a time
-limit <num> : #jobs to run before worker can die naturally
-lifespan <num> : number of minutes this worker is allowed to run
-hive_output_dir <path> : directory where stdout/stderr of the hive is redirected
-bk <string> : beekeeper identifier (deprecated and ignored)
-pid <string> : externally set process_id descriptor (e.g. lsf job_id, array_id)
-input_id <string> : test input_id on specified analysis (analysis_id or logic_name)
-job_id <id> : run specific job defined by analysis_job_id
-analysis_stats : show status of each analysis in hive
-no_cleanup : don't perform global_cleanup when worker exits
-no_write : don't write_output or auto_dataflow input_job
-retry_throwing_jobs 0|1 : if a job dies *knowingly*, should we retry it by default?
=head2 Other options:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment