Skip to content

Commit 104352c

Browse files
Revert "chore(similarity): Do not send > 30 system frames to seer (#81259)"
This reverts commit 2e3a412. Co-authored-by: jangjodi <[email protected]>
1 parent c7b83be commit 104352c

File tree

7 files changed

+15
-212
lines changed

7 files changed

+15
-212
lines changed

src/sentry/grouping/ingest/seer.py

+3-9
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,9 @@
1717
from sentry.seer.similarity.similar_issues import get_similarity_data_from_seer
1818
from sentry.seer.similarity.types import SimilarIssuesEmbeddingsRequest
1919
from sentry.seer.similarity.utils import (
20-
ReferrerOptions,
2120
event_content_is_seer_eligible,
2221
filter_null_from_string,
23-
get_stacktrace_string_with_metrics,
22+
get_stacktrace_string,
2423
killswitch_enabled,
2524
)
2625
from sentry.utils import metrics
@@ -183,9 +182,7 @@ def _circuit_breaker_broken(event: Event, project: Project) -> bool:
183182

184183

185184
def _has_empty_stacktrace_string(event: Event, variants: Mapping[str, BaseVariant]) -> bool:
186-
stacktrace_string = get_stacktrace_string_with_metrics(
187-
get_grouping_info_from_variants(variants), event.platform, ReferrerOptions.INGEST
188-
)
185+
stacktrace_string = get_stacktrace_string(get_grouping_info_from_variants(variants))
189186
if stacktrace_string == "":
190187
metrics.incr(
191188
"grouping.similarity.did_call_seer",
@@ -220,10 +217,7 @@ def get_seer_similar_issues(
220217
"hash": event_hash,
221218
"project_id": event.project.id,
222219
"stacktrace": event.data.get(
223-
"stacktrace_string",
224-
get_stacktrace_string_with_metrics(
225-
get_grouping_info_from_variants(variants), event.platform, ReferrerOptions.INGEST
226-
),
220+
"stacktrace_string", get_stacktrace_string(get_grouping_info_from_variants(variants))
227221
),
228222
"exception_type": filter_null_from_string(exception_type) if exception_type else None,
229223
"k": num_neighbors,

src/sentry/issues/endpoints/group_similar_issues_embeddings.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
from sentry.seer.similarity.similar_issues import get_similarity_data_from_seer
1919
from sentry.seer.similarity.types import SeerSimilarIssueData, SimilarIssuesEmbeddingsRequest
2020
from sentry.seer.similarity.utils import (
21-
TooManyOnlySystemFramesException,
2221
event_content_has_stacktrace,
2322
get_stacktrace_string,
2423
killswitch_enabled,
@@ -83,12 +82,9 @@ def get(self, request: Request, group) -> Response:
8382
stacktrace_string = ""
8483
if latest_event and event_content_has_stacktrace(latest_event):
8584
grouping_info = get_grouping_info(None, project=group.project, event=latest_event)
86-
try:
87-
stacktrace_string = get_stacktrace_string(grouping_info)
88-
except TooManyOnlySystemFramesException:
89-
stacktrace_string = ""
85+
stacktrace_string = get_stacktrace_string(grouping_info)
9086

91-
if not stacktrace_string or not latest_event:
87+
if stacktrace_string == "" or not latest_event:
9288
return Response([]) # No exception, stacktrace or in-app frames, or event
9389

9490
similar_issues_params: SimilarIssuesEmbeddingsRequest = {

src/sentry/seer/similarity/utils.py

-41
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import logging
2-
from enum import StrEnum
32
from typing import Any, TypeVar
43

54
from sentry import options
@@ -152,15 +151,6 @@
152151
]
153152

154153

155-
class ReferrerOptions(StrEnum):
156-
INGEST = "ingest"
157-
BACKFILL = "backfill"
158-
159-
160-
class TooManyOnlySystemFramesException(Exception):
161-
pass
162-
163-
164154
def _get_value_if_exists(exception_value: dict[str, Any]) -> str:
165155
return exception_value["values"][0] if exception_value.get("values") else ""
166156

@@ -187,7 +177,6 @@ def get_stacktrace_string(data: dict[str, Any]) -> str:
187177

188178
frame_count = 0
189179
html_frame_count = 0 # for a temporary metric
190-
is_frames_truncated = False
191180
stacktrace_str = ""
192181
found_non_snipped_context_line = False
193182

@@ -196,15 +185,12 @@ def get_stacktrace_string(data: dict[str, Any]) -> str:
196185
def _process_frames(frames: list[dict[str, Any]]) -> list[str]:
197186
nonlocal frame_count
198187
nonlocal html_frame_count
199-
nonlocal is_frames_truncated
200188
nonlocal found_non_snipped_context_line
201189
frame_strings = []
202190

203191
contributing_frames = [
204192
frame for frame in frames if frame.get("id") == "frame" and frame.get("contributes")
205193
]
206-
if len(contributing_frames) + frame_count > MAX_FRAME_COUNT:
207-
is_frames_truncated = True
208194
contributing_frames = _discard_excess_frames(
209195
contributing_frames, MAX_FRAME_COUNT, frame_count
210196
)
@@ -269,8 +255,6 @@ def _process_frames(frames: list[dict[str, Any]]) -> list[str]:
269255
exc_value = _get_value_if_exists(exception_value)
270256
elif exception_value.get("id") == "stacktrace" and frame_count < MAX_FRAME_COUNT:
271257
frame_strings = _process_frames(exception_value["values"])
272-
if is_frames_truncated and not app_hash:
273-
raise TooManyOnlySystemFramesException
274258
# Only exceptions have the type and value properties, so we don't need to handle the threads
275259
# case here
276260
header = f"{exc_type}: {exc_value}\n" if exception["id"] == "exception" else ""
@@ -306,31 +290,6 @@ def _process_frames(frames: list[dict[str, Any]]) -> list[str]:
306290
return stacktrace_str.strip()
307291

308292

309-
def get_stacktrace_string_with_metrics(
310-
data: dict[str, Any], platform: str | None, referrer: ReferrerOptions
311-
) -> str | None:
312-
try:
313-
stacktrace_string = get_stacktrace_string(data)
314-
except TooManyOnlySystemFramesException:
315-
platform = platform if platform else "unknown"
316-
metrics.incr(
317-
"grouping.similarity.over_threshold_only_system_frames",
318-
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
319-
tags={"platform": platform, "referrer": referrer},
320-
)
321-
if referrer == ReferrerOptions.INGEST:
322-
metrics.incr(
323-
"grouping.similarity.did_call_seer",
324-
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
325-
tags={
326-
"call_made": False,
327-
"blocker": "over-threshold-only-system-frames",
328-
},
329-
)
330-
stacktrace_string = None
331-
return stacktrace_string
332-
333-
334293
def event_content_has_stacktrace(event: Event) -> bool:
335294
# If an event has no stacktrace, there's no data for Seer to analyze, so no point in making the
336295
# API call. If we ever start analyzing message-only events, we'll need to add `event.title in

src/sentry/tasks/embeddings_grouping/utils.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,9 @@
3232
SimilarHashNotFoundError,
3333
)
3434
from sentry.seer.similarity.utils import (
35-
ReferrerOptions,
3635
event_content_has_stacktrace,
3736
filter_null_from_string,
38-
get_stacktrace_string_with_metrics,
37+
get_stacktrace_string,
3938
)
4039
from sentry.snuba.dataset import Dataset
4140
from sentry.snuba.referrer import Referrer
@@ -356,10 +355,8 @@ def get_events_from_nodestore(
356355
event._project_cache = project
357356
if event and event.data and event_content_has_stacktrace(event):
358357
grouping_info = get_grouping_info(None, project=project, event=event)
359-
stacktrace_string = get_stacktrace_string_with_metrics(
360-
grouping_info, event.platform, ReferrerOptions.BACKFILL
361-
)
362-
if not stacktrace_string:
358+
stacktrace_string = get_stacktrace_string(grouping_info)
359+
if stacktrace_string == "":
363360
invalid_event_group_ids.append(group_id)
364361
continue
365362
primary_hash = event.get_primary_hash()

tests/sentry/grouping/ingest/test_seer.py

-49
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from sentry.grouping.ingest.seer import get_seer_similar_issues, should_call_seer_for_grouping
99
from sentry.models.grouphash import GroupHash
1010
from sentry.seer.similarity.types import SeerSimilarIssueData
11-
from sentry.seer.similarity.utils import MAX_FRAME_COUNT
1211
from sentry.testutils.cases import TestCase
1312
from sentry.testutils.helpers.eventprocessing import save_new_event
1413
from sentry.testutils.helpers.options import override_options
@@ -307,51 +306,3 @@ def test_returns_no_grouphash_and_empty_metadata_if_no_similar_group_found(self)
307306
expected_metadata,
308307
None,
309308
)
310-
311-
@patch("sentry.seer.similarity.utils.metrics")
312-
def test_too_many_only_system_frames(self, mock_metrics: Mock) -> None:
313-
type = "FailedToFetchError"
314-
value = "Charlie didn't bring the ball back"
315-
context_line = f"raise {type}('{value}')"
316-
new_event = Event(
317-
project_id=self.project.id,
318-
event_id="22312012112120120908201304152013",
319-
data={
320-
"title": f"{type}('{value}')",
321-
"exception": {
322-
"values": [
323-
{
324-
"type": type,
325-
"value": value,
326-
"stacktrace": {
327-
"frames": [
328-
{
329-
"function": f"play_fetch_{i}",
330-
"filename": f"dogpark{i}.py",
331-
"context_line": context_line,
332-
}
333-
for i in range(MAX_FRAME_COUNT + 1)
334-
]
335-
},
336-
}
337-
]
338-
},
339-
"platform": "python",
340-
},
341-
)
342-
get_seer_similar_issues(new_event, new_event.get_grouping_variants())
343-
344-
sample_rate = options.get("seer.similarity.metrics_sample_rate")
345-
mock_metrics.incr.assert_any_call(
346-
"grouping.similarity.over_threshold_only_system_frames",
347-
sample_rate=sample_rate,
348-
tags={"platform": "python", "referrer": "ingest"},
349-
)
350-
mock_metrics.incr.assert_any_call(
351-
"grouping.similarity.did_call_seer",
352-
sample_rate=1.0,
353-
tags={
354-
"call_made": False,
355-
"blocker": "over-threshold-only-system-frames",
356-
},
357-
)

tests/sentry/seer/similarity/test_utils.py

+6-36
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,10 @@
33
from typing import Any, Literal, cast
44
from uuid import uuid1
55

6-
import pytest
7-
86
from sentry.eventstore.models import Event
97
from sentry.seer.similarity.utils import (
108
BASE64_ENCODED_PREFIXES,
11-
MAX_FRAME_COUNT,
129
SEER_ELIGIBLE_PLATFORMS,
13-
TooManyOnlySystemFramesException,
1410
_is_snipped_context_line,
1511
event_content_is_seer_eligible,
1612
filter_null_from_string,
@@ -674,18 +670,18 @@ def test_chained_too_many_frames_minified_js_frame_limit(self):
674670
)
675671

676672
def test_chained_too_many_exceptions(self):
677-
"""Test that we restrict number of chained exceptions to MAX_FRAME_COUNT."""
673+
"""Test that we restrict number of chained exceptions to 30."""
678674
data_chained_exception = copy.deepcopy(self.CHAINED_APP_DATA)
679675
data_chained_exception["app"]["component"]["values"][0]["values"] = [
680676
self.create_exception(
681677
exception_type_str="Exception",
682678
exception_value=f"exception {i} message!",
683679
frames=self.create_frames(num_frames=1, context_line_factory=lambda i: f"line {i}"),
684680
)
685-
for i in range(1, MAX_FRAME_COUNT + 2)
681+
for i in range(1, 32)
686682
]
687683
stacktrace_str = get_stacktrace_string(data_chained_exception)
688-
for i in range(2, MAX_FRAME_COUNT + 2):
684+
for i in range(2, 32):
689685
assert f"exception {i} message!" in stacktrace_str
690686
assert "exception 1 message!" not in stacktrace_str
691687

@@ -714,35 +710,9 @@ def test_no_app_no_system(self):
714710
stacktrace_str = get_stacktrace_string(data)
715711
assert stacktrace_str == ""
716712

717-
def test_too_many_system_frames_single_exception(self):
718-
data_system = copy.deepcopy(self.BASE_APP_DATA)
719-
data_system["system"] = data_system.pop("app")
720-
data_system["system"]["component"]["values"][0]["values"][0][
721-
"values"
722-
] += self.create_frames(MAX_FRAME_COUNT + 1, True)
723-
724-
with pytest.raises(TooManyOnlySystemFramesException):
725-
get_stacktrace_string(data_system)
726-
727-
def test_too_many_system_frames_chained_exception(self):
728-
data_system = copy.deepcopy(self.CHAINED_APP_DATA)
729-
data_system["system"] = data_system.pop("app")
730-
# Split MAX_FRAME_COUNT across the two exceptions
731-
data_system["system"]["component"]["values"][0]["values"][0]["values"][0][
732-
"values"
733-
] += self.create_frames(MAX_FRAME_COUNT // 2, True)
734-
data_system["system"]["component"]["values"][0]["values"][1]["values"][0][
735-
"values"
736-
] += self.create_frames(MAX_FRAME_COUNT // 2, True)
737-
738-
with pytest.raises(TooManyOnlySystemFramesException):
739-
get_stacktrace_string(data_system)
713+
def test_over_30_contributing_frames(self):
714+
"""Check that when there are over 30 contributing frames, the last 30 are included."""
740715

741-
def test_too_many_in_app_contributing_frames(self):
742-
"""
743-
Check that when there are over MAX_FRAME_COUNT contributing frames, the last MAX_FRAME_COUNT
744-
are included.
745-
"""
746716
data_frames = copy.deepcopy(self.BASE_APP_DATA)
747717
# Create 30 contributing frames, 1-20 -> last 10 should be included
748718
data_frames["app"]["component"]["values"][0]["values"][0]["values"] = self.create_frames(
@@ -769,7 +739,7 @@ def test_too_many_in_app_contributing_frames(self):
769739
for i in range(41, 61):
770740
num_frames += 1
771741
assert ("test = " + str(i) + "!") in stacktrace_str
772-
assert num_frames == MAX_FRAME_COUNT
742+
assert num_frames == 30
773743

774744
def test_too_many_frames_minified_js_frame_limit(self):
775745
"""Test that we restrict fully-minified stacktraces to 20 frames, and all other stacktraces to 30 frames."""

0 commit comments

Comments
 (0)