Skip to content

Commit 2ac4ef7

Browse files
authored
dev(similarity): add more telemetry to backfill (#70947)
- [x] adds a log for each backfill task start - [x] a metric for seer timing - [x] also gets rid of the delay as now grouping has its own separate deployment, so no need to worry about overloading seer. - [x] change the exception loggers to logger.exception
1 parent adcc638 commit 2ac4ef7

File tree

2 files changed

+25
-16
lines changed

2 files changed

+25
-16
lines changed

src/sentry/tasks/backfill_seer_grouping_records.py

+21-12
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
from sentry.utils.snuba import bulk_snuba_queries
3434

3535
BATCH_SIZE = 20
36-
SEER_BACKFILL_DELAY_PER_RECORD = 0.1
3736
BACKFILL_NAME = "backfill_grouping_records"
3837
LAST_PROCESSED_REDIS_KEY = "grouping_record_backfill.last_processed_id"
3938

@@ -67,6 +66,14 @@ def backfill_seer_grouping_records(
6766
Task to backfill seer grouping_records table.
6867
Pass in last_processed_id = 0 if running project for the first time, else None
6968
"""
69+
logger.info(
70+
"backfill_seer_grouping_records.start",
71+
extra={
72+
"project_id": project_id,
73+
"last_processed_id": last_processed_id,
74+
"dry_run": dry_run,
75+
},
76+
)
7077
project = Project.objects.get_from_cache(id=project_id)
7178
if not features.has("projects:similarity-embeddings-backfill", project):
7279
return
@@ -141,13 +148,14 @@ def backfill_seer_grouping_records(
141148
project, rows, group_id_message_batch, group_hashes_dict
142149
)
143150

144-
response = post_bulk_grouping_records(
145-
CreateGroupingRecordsRequest(
146-
group_id_list=group_id_batch,
147-
data=data["data"],
148-
stacktrace_list=data["stacktrace_list"],
151+
with metrics.timer(f"{BACKFILL_NAME}.post_bulk_grouping_records", sample_rate=1.0):
152+
response = post_bulk_grouping_records(
153+
CreateGroupingRecordsRequest(
154+
group_id_list=group_id_batch,
155+
data=data["data"],
156+
stacktrace_list=data["stacktrace_list"],
157+
)
149158
)
150-
)
151159
if response["success"]:
152160
groups = Group.objects.filter(project_id=project.id, id__in=group_id_batch)
153161
for group in groups:
@@ -174,7 +182,6 @@ def backfill_seer_grouping_records(
174182
) # needed for typing
175183
backfill_seer_grouping_records.apply_async(
176184
args=[project.id, last_processed_id],
177-
countdown=BATCH_SIZE * SEER_BACKFILL_DELAY_PER_RECORD,
178185
)
179186
return
180187

@@ -204,15 +211,17 @@ def lookup_group_data_stacktrace_bulk_with_fallback(
204211
"group_id": group_id,
205212
"event_id": event_id,
206213
}
207-
logger.info("tasks.backfill_seer_grouping_records.event_lookup_error", extra=extra)
214+
logger.exception(
215+
"tasks.backfill_seer_grouping_records.event_lookup_error", extra=extra
216+
)
208217
continue
209218
except KeyError:
210219
extra = {
211220
"organization_id": project.organization.id,
212221
"project_id": project.id,
213222
"group_id": group_id,
214223
}
215-
logger.info("tasks.backfill_seer_grouping_records.no_group_hash", extra=extra)
224+
logger.exception("tasks.backfill_seer_grouping_records.no_group_hash", extra=extra)
216225
continue
217226

218227
return bulk_group_data_stacktraces
@@ -249,7 +258,7 @@ def lookup_group_data_stacktrace_bulk(
249258
"group_data": json.dumps(rows),
250259
"error": e.message,
251260
}
252-
logger.info(
261+
logger.exception(
253262
"tasks.backfill_seer_grouping_records.bulk_event_lookup_exception",
254263
extra=extra,
255264
)
@@ -322,7 +331,7 @@ def lookup_group_data_stacktrace_single(
322331
"event_id": event_id,
323332
"error": e.message,
324333
}
325-
logger.info(
334+
logger.exception(
326335
"tasks.backfill_seer_grouping_records.event_lookup_exception", extra=extra
327336
)
328337

tests/sentry/tasks/test_backfill_seer_grouping_records.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ def test_lookup_group_data_stacktrace_single_exceptions(self, mock_logger, mock_
172172
self.group_hashes[event.group.id],
173173
)
174174
assert (group_data, stacktrace_string) == (None, "")
175-
mock_logger.info.assert_called_with(
175+
mock_logger.exception.assert_called_with(
176176
"tasks.backfill_seer_grouping_records.event_lookup_exception",
177177
extra={
178178
"organization_id": self.project.organization.id,
@@ -261,7 +261,7 @@ def test_lookup_group_data_stacktrace_bulk_exceptions(self, mock_logger, mock_ge
261261
assert invalid_event_ids == set()
262262
assert bulk_group_data_stacktraces["data"] == []
263263
assert bulk_group_data_stacktraces["stacktrace_list"] == []
264-
mock_logger.info.assert_called_with(
264+
mock_logger.exception.assert_called_with(
265265
"tasks.backfill_seer_grouping_records.bulk_event_lookup_exception",
266266
extra={
267267
"organization_id": self.project.organization.id,
@@ -510,7 +510,7 @@ def test_lookup_group_data_stacktrace_bulk_with_fallback_no_hash(
510510
assert bulk_group_data_stacktraces["stacktrace_list"] == expected_stacktraces
511511
assert bulk_group_data_stacktraces["data"] == expected_group_data
512512
assert bulk_group_data_stacktraces["stacktrace_list"] == expected_stacktraces
513-
mock_logger.info.assert_called_with(
513+
mock_logger.exception.assert_called_with(
514514
"tasks.backfill_seer_grouping_records.no_group_hash",
515515
extra={
516516
"organization_id": self.project.organization.id,
@@ -550,7 +550,7 @@ def test_lookup_group_data_stacktrace_bulk_with_fallback_event_lookup_error(self
550550
]
551551
assert bulk_group_data_stacktraces["data"] == expected_group_data
552552
assert bulk_group_data_stacktraces["stacktrace_list"] == expected_stacktraces
553-
mock_logger.info.assert_called_with(
553+
mock_logger.exception.assert_called_with(
554554
"tasks.backfill_seer_grouping_records.event_lookup_error",
555555
extra={
556556
"organization_id": self.project.organization.id,

0 commit comments

Comments
 (0)