Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
ChEMBL
C
ChEMBL
Delayed Jobs
Delayed Jobs API
Commits
a3296a09
Commit
a3296a09
authored
Apr 01, 2021
by
David Mendez
Browse files
Add function to determine when the job checker is assumed dead
parent
0474cb87
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
43 additions
and
13 deletions
+43
-13
app/config.py
app/config.py
+1
-1
app/models/delayed_job_models.py
app/models/delayed_job_models.py
+12
-2
app/models/test/test_job_lsf_status.py
app/models/test/test_job_lsf_status.py
+30
-10
No files found.
app/config.py
View file @
a3296a09
...
@@ -91,7 +91,7 @@ STATUS_AGENT_CONFIG = RUN_CONFIG.get('status_agent', {})
...
@@ -91,7 +91,7 @@ STATUS_AGENT_CONFIG = RUN_CONFIG.get('status_agent', {})
RUN_CONFIG
[
'status_agent'
]
=
{
RUN_CONFIG
[
'status_agent'
]
=
{
'lock_validity_seconds'
:
1
,
'lock_validity_seconds'
:
1
,
'sleep_time'
:
1
,
'sleep_time'
:
1
,
'dea
d
_assumption_seconds'
:
10
,
'dea
th
_assumption_seconds'
:
10
,
**
STATUS_AGENT_CONFIG
,
**
STATUS_AGENT_CONFIG
,
}
}
...
...
app/models/delayed_job_models.py
View file @
a3296a09
...
@@ -209,8 +209,18 @@ class DelayedJob(DB.Model):
...
@@ -209,8 +209,18 @@ class DelayedJob(DB.Model):
"""
"""
:return: True if the job needs to be checked in lsf, false otherwise
:return: True if the job needs to be checked in lsf, false otherwise
"""
"""
needs_to_be_checked_by_status
=
self
.
status
in
[
JobStatuses
.
QUEUED
,
JobStatuses
.
RUNNING
,
JobStatuses
.
UNKNOWN
]
return
self
.
status
in
[
JobStatuses
.
QUEUED
,
JobStatuses
.
RUNNING
,
JobStatuses
.
UNKNOWN
]
return
needs_to_be_checked_by_status
def
job_checker_seems_to_have_died
(
self
):
"""
:return: True if the checker seems to have died, false otherwise
"""
lsf_script_errored
=
self
.
last_lsf_check_status
!=
0
death_assumption_seconds
=
RUN_CONFIG
.
get
(
'status_agent'
).
get
(
'death_assumption_seconds'
)
checker_assumed_dead_time
=
app_utils
.
get_utc_now
()
-
datetime
.
timedelta
(
seconds
=
death_assumption_seconds
)
checker_is_assumed_dead
=
self
.
last_lsf_checked_at
<
checker_assumed_dead_time
return
lsf_script_errored
and
checker_is_assumed_dead
# ----------------------------------------------------------------------------------------------------------------------
# ----------------------------------------------------------------------------------------------------------------------
...
...
app/models/test/test_job_lsf_status.py
View file @
a3296a09
...
@@ -6,6 +6,8 @@ import datetime
...
@@ -6,6 +6,8 @@ import datetime
from
app
import
create_app
from
app
import
create_app
from
app.models
import
delayed_job_models
from
app.models
import
delayed_job_models
from
app.config
import
RUN_CONFIG
from
app
import
utils
class
TestJobLSFStatus
(
unittest
.
TestCase
):
class
TestJobLSFStatus
(
unittest
.
TestCase
):
...
@@ -63,15 +65,33 @@ class TestJobLSFStatus(unittest.TestCase):
...
@@ -63,15 +65,33 @@ class TestJobLSFStatus(unittest.TestCase):
}
}
docker_image_url_must_be
=
'some_url'
docker_image_url_must_be
=
'some_url'
job
=
delayed_job_models
.
get_or_create
(
job_type
,
params
,
docker_image_url_must_be
)
job
=
delayed_job_models
.
get_or_create
(
job_type
,
params
,
docker_image_url_must_be
)
death_assumption_seconds
=
RUN_CONFIG
.
get
(
'status_agent'
).
get
(
'death_assumption_seconds'
)
last_lsf_checked_at
=
utils
.
get_utc_now
()
-
datetime
.
timedelta
(
seconds
=
death_assumption_seconds
)
job
.
last_lsf_checked_at
=
last_lsf_checked_at
job
.
last_lsf_check_status
=
1
last_lsf_checked_at
=
datetime
.
datetime
.
utcnow
()
+
datetime
.
timedelta
(
seconds
=-
3
)
job_checker_seems_to_have_died_got
=
job
.
job_checker_seems_to_have_died
()
print
(
'last_lsf_checked_at: '
,
last_lsf_checked_at
)
self
.
assertTrue
(
job_checker_seems_to_have_died_got
,
msg
=
'When the last script execution errors and the last time it reported is greater than '
'the assumed dead time it must assume that the checker died.'
)
# job.last_lsf_checked_at = 0
job
.
last_lsf_checked_at
=
utils
.
get_utc_now
()
# the last checked at time must be older than the seconds at which it is considered dead
job_checker_seems_to_have_died_got
=
job
.
job_checker_seems_to_have_died
()
self
.
assertFalse
(
job_checker_seems_to_have_died_got
,
msg
=
'When the last script execution errors but the last time it reported is less than '
'the assumed dead time it must NOT assume that the checker died.'
)
# for status in [delayed_job_models.JobStatuses.QUEUED, delayed_job_models.JobStatuses.RUNNING,
job
.
last_lsf_checked_at
=
utils
.
get_utc_now
()
# delayed_job_models.JobStatuses.UNKNOWN]:
job
.
last_lsf_check_status
=
0
# job.status = status
job_checker_seems_to_have_died_got
=
job
.
job_checker_seems_to_have_died
()
# needs_to_be_checked_got = job.needs_to_be_checked_in_lsf()
self
.
assertFalse
(
job_checker_seems_to_have_died_got
,
# self.assertTrue(needs_to_be_checked_got, msg=f'A job with status {status} need to be checked in LSF!')
msg
=
'When the last script execution was successful and the last time it reported is less '
'than the assumed dead time it must NOT assume that the checker died.'
)
last_lsf_checked_at
=
utils
.
get_utc_now
()
-
datetime
.
timedelta
(
seconds
=
death_assumption_seconds
)
job
.
last_lsf_checked_at
=
last_lsf_checked_at
job
.
last_lsf_check_status
=
0
job_checker_seems_to_have_died_got
=
job
.
job_checker_seems_to_have_died
()
self
.
assertFalse
(
job_checker_seems_to_have_died_got
,
msg
=
'When the last script execution was successful but the last time it reported is '
'greater than the assumed dead time it must NOT assume that the checker died.'
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment