Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
ChEMBL
C
ChEMBL
Delayed Jobs
Delayed Jobs API
Commits
a3296a09
Commit
a3296a09
authored
Apr 01, 2021
by
David Mendez
Browse files
Add function to determine when the job checker is assumed dead
parent
0474cb87
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
43 additions
and
13 deletions
+43
-13
app/config.py
app/config.py
+1
-1
app/models/delayed_job_models.py
app/models/delayed_job_models.py
+12
-2
app/models/test/test_job_lsf_status.py
app/models/test/test_job_lsf_status.py
+30
-10
No files found.
app/config.py
View file @
a3296a09
...
...
@@ -91,7 +91,7 @@ STATUS_AGENT_CONFIG = RUN_CONFIG.get('status_agent', {})
RUN_CONFIG
[
'status_agent'
]
=
{
'lock_validity_seconds'
:
1
,
'sleep_time'
:
1
,
'dea
d
_assumption_seconds'
:
10
,
'dea
th
_assumption_seconds'
:
10
,
**
STATUS_AGENT_CONFIG
,
}
...
...
app/models/delayed_job_models.py
View file @
a3296a09
...
...
@@ -209,8 +209,18 @@ class DelayedJob(DB.Model):
"""
:return: True if the job needs to be checked in lsf, false otherwise
"""
needs_to_be_checked_by_status
=
self
.
status
in
[
JobStatuses
.
QUEUED
,
JobStatuses
.
RUNNING
,
JobStatuses
.
UNKNOWN
]
return
needs_to_be_checked_by_status
return
self
.
status
in
[
JobStatuses
.
QUEUED
,
JobStatuses
.
RUNNING
,
JobStatuses
.
UNKNOWN
]
def
job_checker_seems_to_have_died
(
self
):
"""
:return: True if the checker seems to have died, false otherwise
"""
lsf_script_errored
=
self
.
last_lsf_check_status
!=
0
death_assumption_seconds
=
RUN_CONFIG
.
get
(
'status_agent'
).
get
(
'death_assumption_seconds'
)
checker_assumed_dead_time
=
app_utils
.
get_utc_now
()
-
datetime
.
timedelta
(
seconds
=
death_assumption_seconds
)
checker_is_assumed_dead
=
self
.
last_lsf_checked_at
<
checker_assumed_dead_time
return
lsf_script_errored
and
checker_is_assumed_dead
# ----------------------------------------------------------------------------------------------------------------------
...
...
app/models/test/test_job_lsf_status.py
View file @
a3296a09
...
...
@@ -6,6 +6,8 @@ import datetime
from
app
import
create_app
from
app.models
import
delayed_job_models
from
app.config
import
RUN_CONFIG
from
app
import
utils
class
TestJobLSFStatus
(
unittest
.
TestCase
):
...
...
@@ -47,7 +49,7 @@ class TestJobLSFStatus(unittest.TestCase):
job
.
status
=
status
needs_to_be_checked_got
=
job
.
needs_to_be_checked_in_lsf
()
self
.
assertFalse
(
needs_to_be_checked_got
,
msg
=
f
'A job with status
{
status
}
does not need to be checked in LSF!'
)
msg
=
f
'A job with status
{
status
}
does not need to be checked in LSF!'
)
def
test_determines_when_the_job_checker_seems_to_have_died
(
self
):
"""
...
...
@@ -63,15 +65,33 @@ class TestJobLSFStatus(unittest.TestCase):
}
docker_image_url_must_be
=
'some_url'
job
=
delayed_job_models
.
get_or_create
(
job_type
,
params
,
docker_image_url_must_be
)
death_assumption_seconds
=
RUN_CONFIG
.
get
(
'status_agent'
).
get
(
'death_assumption_seconds'
)
last_lsf_checked_at
=
utils
.
get_utc_now
()
-
datetime
.
timedelta
(
seconds
=
death_assumption_seconds
)
job
.
last_lsf_checked_at
=
last_lsf_checked_at
job
.
last_lsf_check_status
=
1
last_lsf_checked_at
=
datetime
.
datetime
.
utcnow
()
+
datetime
.
timedelta
(
seconds
=-
3
)
print
(
'last_lsf_checked_at: '
,
last_lsf_checked_at
)
job_checker_seems_to_have_died_got
=
job
.
job_checker_seems_to_have_died
()
self
.
assertTrue
(
job_checker_seems_to_have_died_got
,
msg
=
'When the last script execution errors and the last time it reported is greater than '
'the assumed dead time it must assume that the checker died.'
)
# job.last_lsf_checked_at = 0
# the last checked at time must be older than the seconds at which it is considered dead
job
.
last_lsf_checked_at
=
utils
.
get_utc_now
()
job_checker_seems_to_have_died_got
=
job
.
job_checker_seems_to_have_died
()
self
.
assertFalse
(
job_checker_seems_to_have_died_got
,
msg
=
'When the last script execution errors but the last time it reported is less than '
'the assumed dead time it must NOT assume that the checker died.'
)
# for status in [delayed_job_models.JobStatuses.QUEUED, delayed_job_models.JobStatuses.RUNNING,
# delayed_job_models.JobStatuses.UNKNOWN]:
# job.status = status
# needs_to_be_checked_got = job.needs_to_be_checked_in_lsf()
# self.assertTrue(needs_to_be_checked_got, msg=f'A job with status {status} need to be checked in LSF!')
job
.
last_lsf_checked_at
=
utils
.
get_utc_now
()
job
.
last_lsf_check_status
=
0
job_checker_seems_to_have_died_got
=
job
.
job_checker_seems_to_have_died
()
self
.
assertFalse
(
job_checker_seems_to_have_died_got
,
msg
=
'When the last script execution was successful and the last time it reported is less '
'than the assumed dead time it must NOT assume that the checker died.'
)
last_lsf_checked_at
=
utils
.
get_utc_now
()
-
datetime
.
timedelta
(
seconds
=
death_assumption_seconds
)
job
.
last_lsf_checked_at
=
last_lsf_checked_at
job
.
last_lsf_check_status
=
0
job_checker_seems_to_have_died_got
=
job
.
job_checker_seems_to_have_died
()
self
.
assertFalse
(
job_checker_seems_to_have_died_got
,
msg
=
'When the last script execution was successful but the last time it reported is '
'greater than the assumed dead time it must NOT assume that the checker died.'
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment