Skip to content

Commit 3c02b74

Browse files
authored
fix(weekly-reports): dupe on report timestamp instead of batch (#76031)
- It has been observed that a portion of our duplicate weekly emails come from the schedule celery task being killed and restarted, which will cause duplicates for any orgs that were queued in the first batch. - this also generates a new batch_id, which causes our de-duplication logic to not work, as batch_id is included in the redis key that we use. - [x] This PR modifies our logic so that the dupe checker instead uses the timestamp for which the weekly report is generated on. this should be a safe way to ensure no duplicates occur across batches. - [x] Updates the test.
1 parent 8505e05 commit 3c02b74

File tree

2 files changed

+19
-8
lines changed

2 files changed

+19
-8
lines changed

src/sentry/tasks/summaries/weekly_reports.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import uuid
66
from collections.abc import Mapping, Sequence
77
from dataclasses import dataclass
8-
from datetime import timedelta
8+
from datetime import datetime, timedelta
99
from functools import partial
1010
from typing import Any, cast
1111

@@ -293,7 +293,7 @@ def _send_to_user(self, user_template_context: Mapping[str, Any]) -> None:
293293
template_context: Mapping[str, Any] | None = user_template_context.get("context")
294294
user_id: int | None = user_template_context.get("user_id")
295295
if template_context and user_id:
296-
dupe_check = _DuplicateDeliveryCheck(self, user_id)
296+
dupe_check = _DuplicateDeliveryCheck(self, user_id, self.ctx.timestamp)
297297
if not dupe_check.check_for_duplicate_delivery():
298298
self.send_email(template_ctx=template_context, user_id=user_id)
299299
dupe_check.record_delivery()
@@ -337,9 +337,13 @@ def send_email(self, template_ctx: Mapping[str, Any], user_id: int) -> None:
337337

338338

339339
class _DuplicateDeliveryCheck:
340-
def __init__(self, batch: OrganizationReportBatch, user_id: int):
340+
def __init__(self, batch: OrganizationReportBatch, user_id: int, timestamp: float):
341341
self.batch = batch
342342
self.user_id = user_id
343+
# note that if the timestamps between batches cross a UTC day boundary,
344+
# this will not work correctly. but we always start reports at midnight UTC,
345+
# so that is unlikely to be an issue.
346+
self.report_date = datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d")
343347

344348
# Tracks state from `check_for_duplicate_delivery` to `record_delivery`
345349
self.count: int | None = None
@@ -349,7 +353,11 @@ def _get_redis_cluster(self) -> LocalClient:
349353

350354
@property
351355
def _redis_name(self) -> str:
352-
name_parts = (self.batch.batch_id, self.batch.ctx.organization.id, self.user_id)
356+
name_parts = (
357+
self.report_date,
358+
self.batch.ctx.organization.id,
359+
self.user_id,
360+
)
353361
return ":".join(str(part) for part in name_parts)
354362

355363
def _get_log_extras(self) -> dict[str, Any]:
@@ -358,6 +366,7 @@ def _get_log_extras(self) -> dict[str, Any]:
358366
"organization": self.batch.ctx.organization.id,
359367
"user_id": self.user_id,
360368
"has_email_override": bool(self.batch.email_override),
369+
"report_date": self.report_date,
361370
}
362371

363372
def check_for_duplicate_delivery(self) -> bool:

tests/sentry/tasks/test_weekly_reports.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,24 +1011,26 @@ def test_duplicate_detection(self, mock_send_email, mock_prepare_template_contex
10111011
ctx = OrganizationReportContext(0, 0, self.organization)
10121012
template_context = prepare_template_context(ctx, [self.user.id])
10131013
mock_prepare_template_context.return_value = template_context
1014-
batch_id = UUID("abe8ba3e-90af-4a98-b925-5f30250ae6a0")
1014+
batch1_id = UUID("abe8ba3e-90af-4a98-b925-5f30250ae6a0")
1015+
batch2_id = UUID("abe8ba3e-90af-4a98-b925-5f30250ae6a1")
10151016
self._set_option_value("always")
10161017

10171018
# First send
1018-
OrganizationReportBatch(ctx, batch_id).deliver_reports()
1019+
OrganizationReportBatch(ctx, batch1_id).deliver_reports()
10191020
assert mock_send_email.call_count == 1
10201021
mock_logger.error.assert_not_called()
10211022

10221023
# Duplicate send
1023-
OrganizationReportBatch(ctx, batch_id).deliver_reports()
1024+
OrganizationReportBatch(ctx, batch2_id).deliver_reports()
10241025
assert mock_send_email.call_count == 1
10251026
assert mock_logger.error.call_count == 1
10261027
mock_logger.error.assert_called_once_with(
10271028
"weekly_report.delivery_record.duplicate_detected",
10281029
extra={
1029-
"batch_id": str(batch_id),
1030+
"batch_id": str(batch2_id),
10301031
"organization": self.organization.id,
10311032
"user_id": self.user.id,
10321033
"has_email_override": False,
1034+
"report_date": "1970-01-01",
10331035
},
10341036
)

0 commit comments

Comments
 (0)