Skip to content

feat(crons): Move over check_missing task to parallel task #56579

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 1 addition & 28 deletions src/sentry/monitors/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,34 +204,7 @@ def check_missing(current_datetime: datetime):
)
metrics.gauge("sentry.monitors.tasks.check_missing.count", qs.count(), sample_rate=1.0)
for monitor_environment in qs:
try:
logger.info(
"monitor.missed-checkin", extra={"monitor_environment_id": monitor_environment.id}
)

monitor = monitor_environment.monitor
expected_time = monitor_environment.next_checkin

# add missed checkin.
#
# XXX(epurkhiser): The date_added is backdated so that this missed
# check-in correctly reflects the time of when the checkin SHOULD
# have happened. It is the same as the expected_time.
checkin = MonitorCheckIn.objects.create(
project_id=monitor_environment.monitor.project_id,
monitor=monitor_environment.monitor,
monitor_environment=monitor_environment,
status=CheckInStatus.MISSED,
date_added=expected_time,
expected_time=expected_time,
monitor_config=monitor.get_validated_config(),
)
# TODO(epurkhiser): To properly fix GH-55874 we need to actually
# pass a timestamp here. But I'm not 100% sure what that should
# look like yet.
mark_failed(checkin, ts=None)
except Exception:
logger.exception("Exception in check_monitors - mark missed", exc_info=True)
mark_environment_missing.delay(monitor_environment.id)


@instrumented_task(
Expand Down
52 changes: 41 additions & 11 deletions tests/sentry/monitors/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from unittest import mock

import msgpack
import pytest
from arroyo.backends.kafka import KafkaPayload
from django.test import override_settings
from django.utils import timezone
Expand All @@ -20,6 +21,7 @@
check_missing,
check_timeout,
clock_pulse,
mark_environment_missing,
try_monitor_tasks_trigger,
)
from sentry.testutils.cases import TestCase
Expand Down Expand Up @@ -48,7 +50,8 @@ def make_ref_time():


class MonitorTaskCheckMissingTest(TestCase):
def test_missing_checkin(self):
@mock.patch("sentry.monitors.tasks.mark_environment_missing")
def test_missing_checkin(self, mark_environment_missing_mock):
org = self.create_organization()
project = self.create_project(organization=org)

Expand Down Expand Up @@ -78,6 +81,14 @@ def test_missing_checkin(self):

check_missing(task_run_ts)

# assert that task is called for the specific environment
assert mark_environment_missing_mock.delay.call_count == 1
assert mark_environment_missing_mock.delay.mock_calls[0] == mock.call(
monitor_environment.id
)

mark_environment_missing(monitor_environment.id)

# Monitor status is updated
monitor_environment = MonitorEnvironment.objects.get(
id=monitor_environment.id, status=MonitorStatus.MISSED_CHECKIN
Expand All @@ -98,7 +109,8 @@ def test_missing_checkin(self):
).replace(second=0, microsecond=0)
assert missed_checkin.monitor_config == monitor.config

def test_missing_checkin_with_margin(self):
@mock.patch("sentry.monitors.tasks.mark_environment_missing")
def test_missing_checkin_with_margin(self, mark_environment_missing_mock):
org = self.create_organization()
project = self.create_project(organization=org)

Expand Down Expand Up @@ -132,6 +144,9 @@ def test_missing_checkin_with_margin(self):
# No missed check-in generated as we're still within the check-in margin
check_missing(task_run_ts)

# assert that task is not called for the specific environment
assert mark_environment_missing_mock.delay.call_count == 0

assert not MonitorEnvironment.objects.filter(
id=monitor_environment.id, status=MonitorStatus.MISSED_CHECKIN
).exists()
Expand All @@ -142,6 +157,14 @@ def test_missing_checkin_with_margin(self):
# Missed check-in generated as clock now exceeds expected time plus margin
check_missing(task_run_ts + timedelta(minutes=4))

# assert that task is called for the specific environment
assert mark_environment_missing_mock.delay.call_count == 1
assert mark_environment_missing_mock.delay.mock_calls[0] == mock.call(
monitor_environment.id
)

mark_environment_missing(monitor_environment.id)

monitor_environment = MonitorEnvironment.objects.get(
id=monitor_environment.id, status=MonitorStatus.MISSED_CHECKIN
)
Expand All @@ -157,7 +180,7 @@ def test_missing_checkin_with_margin(self):
)

# Missed checkins are back-dated to when the checkin was expected to
# happpen. In this case the expected_time is equal to the date_added.
# happen. In this case the expected_time is equal to the date_added.
assert missed_check.date_added == (
monitor_environment.last_checkin + timedelta(minutes=10)
).replace(second=0, microsecond=0)
Expand Down Expand Up @@ -269,8 +292,8 @@ def test_not_missing_checkin(self):
monitor_environment=monitor_environment.id, status=CheckInStatus.MISSED
).exists()

@mock.patch("sentry.monitors.tasks.logger")
def test_missed_exception_handling(self, logger):
@mock.patch("sentry.monitors.tasks.mark_environment_missing")
def test_missed_exception_handling(self, mark_environment_missing_mock):
org = self.create_organization()
project = self.create_project(organization=org)

Expand All @@ -287,7 +310,7 @@ def test_missed_exception_handling(self, logger):
"schedule": [-2, "minute"],
},
)
MonitorEnvironment.objects.create(
failing_monitor_environment = MonitorEnvironment.objects.create(
monitor=exception_monitor,
environment=self.environment,
next_checkin=ts - timedelta(minutes=1),
Expand All @@ -301,7 +324,7 @@ def test_missed_exception_handling(self, logger):
type=MonitorType.CRON_JOB,
config={"schedule": "* * * * *"},
)
monitor_environment = MonitorEnvironment.objects.create(
successful_monitor_environment = MonitorEnvironment.objects.create(
monitor=monitor,
environment=self.environment,
next_checkin=ts - timedelta(minutes=1),
Expand All @@ -311,15 +334,22 @@ def test_missed_exception_handling(self, logger):

check_missing(task_run_ts)

# Logged the exception
assert logger.exception.call_count == 1
# assert that task is called for the specific environments
assert mark_environment_missing_mock.delay.call_count == 2

# assert failing monitor raises an error
with pytest.raises(ValueError):
mark_environment_missing(failing_monitor_environment.id)

# assert regular monitor works
mark_environment_missing(successful_monitor_environment.id)

# We still marked a monitor as missed
assert MonitorEnvironment.objects.filter(
id=monitor_environment.id, status=MonitorStatus.MISSED_CHECKIN
id=successful_monitor_environment.id, status=MonitorStatus.MISSED_CHECKIN
).exists()
assert MonitorCheckIn.objects.filter(
monitor_environment=monitor_environment.id, status=CheckInStatus.MISSED
monitor_environment=successful_monitor_environment.id, status=CheckInStatus.MISSED
).exists()


Expand Down