Skip to content

Commit bc72c28

Browse files
authored
feat(crons): Move over check_missing task to parallel task (#56579)
Moves over `check_missing` to run in parallel with the newly registered task.
1 parent 9749ec9 commit bc72c28

File tree

2 files changed

+42
-39
lines changed

2 files changed

+42
-39
lines changed

src/sentry/monitors/tasks.py

+1-28
Original file line numberDiff line numberDiff line change
@@ -204,34 +204,7 @@ def check_missing(current_datetime: datetime):
204204
)
205205
metrics.gauge("sentry.monitors.tasks.check_missing.count", qs.count(), sample_rate=1.0)
206206
for monitor_environment in qs:
207-
try:
208-
logger.info(
209-
"monitor.missed-checkin", extra={"monitor_environment_id": monitor_environment.id}
210-
)
211-
212-
monitor = monitor_environment.monitor
213-
expected_time = monitor_environment.next_checkin
214-
215-
# add missed checkin.
216-
#
217-
# XXX(epurkhiser): The date_added is backdated so that this missed
218-
# check-in correctly reflects the time of when the checkin SHOULD
219-
# have happened. It is the same as the expected_time.
220-
checkin = MonitorCheckIn.objects.create(
221-
project_id=monitor_environment.monitor.project_id,
222-
monitor=monitor_environment.monitor,
223-
monitor_environment=monitor_environment,
224-
status=CheckInStatus.MISSED,
225-
date_added=expected_time,
226-
expected_time=expected_time,
227-
monitor_config=monitor.get_validated_config(),
228-
)
229-
# TODO(epurkhiser): To properly fix GH-55874 we need to actually
230-
# pass a timestamp here. But I'm not 100% sure what that should
231-
# look like yet.
232-
mark_failed(checkin, ts=None)
233-
except Exception:
234-
logger.exception("Exception in check_monitors - mark missed", exc_info=True)
207+
mark_environment_missing.delay(monitor_environment.id)
235208

236209

237210
@instrumented_task(

tests/sentry/monitors/test_tasks.py

+41-11
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from unittest import mock
33

44
import msgpack
5+
import pytest
56
from arroyo.backends.kafka import KafkaPayload
67
from django.test import override_settings
78
from django.utils import timezone
@@ -20,6 +21,7 @@
2021
check_missing,
2122
check_timeout,
2223
clock_pulse,
24+
mark_environment_missing,
2325
try_monitor_tasks_trigger,
2426
)
2527
from sentry.testutils.cases import TestCase
@@ -48,7 +50,8 @@ def make_ref_time():
4850

4951

5052
class MonitorTaskCheckMissingTest(TestCase):
51-
def test_missing_checkin(self):
53+
@mock.patch("sentry.monitors.tasks.mark_environment_missing")
54+
def test_missing_checkin(self, mark_environment_missing_mock):
5255
org = self.create_organization()
5356
project = self.create_project(organization=org)
5457

@@ -78,6 +81,14 @@ def test_missing_checkin(self):
7881

7982
check_missing(task_run_ts)
8083

84+
# assert that task is called for the specific environment
85+
assert mark_environment_missing_mock.delay.call_count == 1
86+
assert mark_environment_missing_mock.delay.mock_calls[0] == mock.call(
87+
monitor_environment.id
88+
)
89+
90+
mark_environment_missing(monitor_environment.id)
91+
8192
# Monitor status is updated
8293
monitor_environment = MonitorEnvironment.objects.get(
8394
id=monitor_environment.id, status=MonitorStatus.MISSED_CHECKIN
@@ -98,7 +109,8 @@ def test_missing_checkin(self):
98109
).replace(second=0, microsecond=0)
99110
assert missed_checkin.monitor_config == monitor.config
100111

101-
def test_missing_checkin_with_margin(self):
112+
@mock.patch("sentry.monitors.tasks.mark_environment_missing")
113+
def test_missing_checkin_with_margin(self, mark_environment_missing_mock):
102114
org = self.create_organization()
103115
project = self.create_project(organization=org)
104116

@@ -132,6 +144,9 @@ def test_missing_checkin_with_margin(self):
132144
# No missed check-in generated as we're still within the check-in margin
133145
check_missing(task_run_ts)
134146

147+
# assert that task is not called for the specific environment
148+
assert mark_environment_missing_mock.delay.call_count == 0
149+
135150
assert not MonitorEnvironment.objects.filter(
136151
id=monitor_environment.id, status=MonitorStatus.MISSED_CHECKIN
137152
).exists()
@@ -142,6 +157,14 @@ def test_missing_checkin_with_margin(self):
142157
# Missed check-in generated as clock now exceeds expected time plus margin
143158
check_missing(task_run_ts + timedelta(minutes=4))
144159

160+
# assert that task is called for the specific environment
161+
assert mark_environment_missing_mock.delay.call_count == 1
162+
assert mark_environment_missing_mock.delay.mock_calls[0] == mock.call(
163+
monitor_environment.id
164+
)
165+
166+
mark_environment_missing(monitor_environment.id)
167+
145168
monitor_environment = MonitorEnvironment.objects.get(
146169
id=monitor_environment.id, status=MonitorStatus.MISSED_CHECKIN
147170
)
@@ -157,7 +180,7 @@ def test_missing_checkin_with_margin(self):
157180
)
158181

159182
# Missed checkins are back-dated to when the checkin was expected to
160-
# happpen. In this case the expected_time is equal to the date_added.
183+
# happen. In this case the expected_time is equal to the date_added.
161184
assert missed_check.date_added == (
162185
monitor_environment.last_checkin + timedelta(minutes=10)
163186
).replace(second=0, microsecond=0)
@@ -269,8 +292,8 @@ def test_not_missing_checkin(self):
269292
monitor_environment=monitor_environment.id, status=CheckInStatus.MISSED
270293
).exists()
271294

272-
@mock.patch("sentry.monitors.tasks.logger")
273-
def test_missed_exception_handling(self, logger):
295+
@mock.patch("sentry.monitors.tasks.mark_environment_missing")
296+
def test_missed_exception_handling(self, mark_environment_missing_mock):
274297
org = self.create_organization()
275298
project = self.create_project(organization=org)
276299

@@ -287,7 +310,7 @@ def test_missed_exception_handling(self, logger):
287310
"schedule": [-2, "minute"],
288311
},
289312
)
290-
MonitorEnvironment.objects.create(
313+
failing_monitor_environment = MonitorEnvironment.objects.create(
291314
monitor=exception_monitor,
292315
environment=self.environment,
293316
next_checkin=ts - timedelta(minutes=1),
@@ -301,7 +324,7 @@ def test_missed_exception_handling(self, logger):
301324
type=MonitorType.CRON_JOB,
302325
config={"schedule": "* * * * *"},
303326
)
304-
monitor_environment = MonitorEnvironment.objects.create(
327+
successful_monitor_environment = MonitorEnvironment.objects.create(
305328
monitor=monitor,
306329
environment=self.environment,
307330
next_checkin=ts - timedelta(minutes=1),
@@ -311,15 +334,22 @@ def test_missed_exception_handling(self, logger):
311334

312335
check_missing(task_run_ts)
313336

314-
# Logged the exception
315-
assert logger.exception.call_count == 1
337+
# assert that task is called for the specific environments
338+
assert mark_environment_missing_mock.delay.call_count == 2
339+
340+
# assert failing monitor raises an error
341+
with pytest.raises(ValueError):
342+
mark_environment_missing(failing_monitor_environment.id)
343+
344+
# assert regular monitor works
345+
mark_environment_missing(successful_monitor_environment.id)
316346

317347
# We still marked a monitor as missed
318348
assert MonitorEnvironment.objects.filter(
319-
id=monitor_environment.id, status=MonitorStatus.MISSED_CHECKIN
349+
id=successful_monitor_environment.id, status=MonitorStatus.MISSED_CHECKIN
320350
).exists()
321351
assert MonitorCheckIn.objects.filter(
322-
monitor_environment=monitor_environment.id, status=CheckInStatus.MISSED
352+
monitor_environment=successful_monitor_environment.id, status=CheckInStatus.MISSED
323353
).exists()
324354

325355

0 commit comments

Comments
 (0)