Skip to content

Commit 43b5124

Browse files
authored
ref(proguard): Deobfuscate view hierarchies as part of symbolication (#74196)
Depends on getsentry/symbolicator#1496. This moves view hierarchy deobfuscation out of the plugin/preprocessor and into symbolication.
1 parent 7f57483 commit 43b5124

File tree

8 files changed

+170
-122
lines changed

8 files changed

+170
-122
lines changed

src/sentry/lang/java/plugin.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from typing import Any
55

66
from sentry.lang.java.processing import deobfuscate_exception_value
7-
from sentry.lang.java.utils import deobfuscate_view_hierarchy, has_proguard_file
7+
from sentry.lang.java.utils import has_proguard_file
88
from sentry.plugins.base.v2 import EventPreprocessor, Plugin2
99

1010

@@ -19,6 +19,6 @@ def get_stacktrace_processors(self, data, stacktrace_infos, platforms, **kwargs)
1919

2020
def get_event_preprocessors(self, data: Mapping[str, Any]) -> Sequence[EventPreprocessor]:
2121
if has_proguard_file(data):
22-
return [deobfuscate_exception_value, deobfuscate_view_hierarchy]
22+
return [deobfuscate_exception_value]
2323
else:
2424
return []

src/sentry/lang/java/processing.py

+88-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
from collections.abc import Mapping
44
from typing import Any
55

6+
import orjson
7+
8+
from sentry.attachments import CachedAttachment, attachment_cache
9+
from sentry.ingest.consumer.processors import CACHE_TIMEOUT
610
from sentry.lang.java.utils import get_jvm_images, get_proguard_images
711
from sentry.lang.native.error import SymbolicationFailed, write_error
812
from sentry.lang.native.symbolicator import Symbolicator
@@ -11,6 +15,7 @@
1115
from sentry.models.release import Release
1216
from sentry.stacktraces.processing import find_stacktraces_in_data
1317
from sentry.utils import metrics
18+
from sentry.utils.cache import cache_key_for_event
1419
from sentry.utils.safe import get_path
1520

1621
logger = logging.getLogger(__name__)
@@ -139,6 +144,76 @@ def _get_release_package(project: Project, release_name: str | None) -> str | No
139144
return release.package if release else None
140145

141146

147+
def _get_window_class_names(attachments: list[CachedAttachment]) -> list[str]:
148+
"""Returns the class names of all windows in all view hierarchies
149+
contained in `attachments`."""
150+
151+
class_names = []
152+
windows_to_deobfuscate = []
153+
154+
for attachment in attachments:
155+
if attachment.type == "event.view_hierarchy":
156+
view_hierarchy = orjson.loads(attachment_cache.get_data(attachment))
157+
windows_to_deobfuscate.extend(view_hierarchy.get("windows"))
158+
159+
while windows_to_deobfuscate:
160+
window = windows_to_deobfuscate.pop()
161+
if window.get("type") is not None:
162+
class_names.append(window["type"])
163+
if children := window.get("children"):
164+
windows_to_deobfuscate.extend(children)
165+
166+
return class_names
167+
168+
169+
def _deobfuscate_view_hierarchy(view_hierarchy: Any, class_names: dict[str, str]) -> None:
170+
"""Deobfuscates a view hierarchy in-place.
171+
172+
The `class_names` dict is used to resolve obfuscated to deobfuscated names. If
173+
an obfuscated class name isn't present in `class_names`, it is left unchanged."""
174+
175+
windows_to_deobfuscate = [*view_hierarchy.get("windows")]
176+
177+
while windows_to_deobfuscate:
178+
window = windows_to_deobfuscate.pop()
179+
if (
180+
window.get("type") is not None
181+
and (mapped_type := class_names.get(window["type"])) is not None
182+
):
183+
window["type"] = mapped_type
184+
if children := window.get("children"):
185+
windows_to_deobfuscate.extend(children)
186+
187+
188+
def _deobfuscate_view_hierarchies(
189+
attachments: list[CachedAttachment], class_names: dict[str, str]
190+
) -> list[CachedAttachment]:
191+
"""Deobfuscates all view hierarchies contained in `attachments`, returning a new list of attachments.
192+
193+
Non-view-hierarchy attachments are unchanged.
194+
"""
195+
new_attachments = []
196+
for attachment in attachments:
197+
if attachment.type == "event.view_hierarchy":
198+
view_hierarchy = orjson.loads(attachment_cache.get_data(attachment))
199+
_deobfuscate_view_hierarchy(view_hierarchy, class_names)
200+
# Reupload to cache as a unchunked data
201+
new_attachments.append(
202+
CachedAttachment(
203+
type=attachment.type,
204+
id=attachment.id,
205+
name=attachment.name,
206+
content_type=attachment.content_type,
207+
data=orjson.dumps(view_hierarchy),
208+
chunks=None,
209+
)
210+
)
211+
else:
212+
new_attachments.append(attachment)
213+
214+
return new_attachments
215+
216+
142217
def map_symbolicator_process_jvm_errors(
143218
errors: list[dict[str, Any]] | None,
144219
) -> list[dict[str, Any]]:
@@ -195,10 +270,17 @@ def process_jvm_stacktraces(symbolicator: Symbolicator, data: Any) -> Any:
195270
]
196271

197272
processable_exceptions = _get_exceptions_for_symbolication(data)
273+
cache_key = cache_key_for_event(data)
274+
attachments = [*attachment_cache.get(cache_key)]
275+
window_class_names = _get_window_class_names(attachments)
198276

199277
metrics.incr("proguard.symbolicator.events")
200278

201-
if not any(stacktrace["frames"] for stacktrace in stacktraces) and not processable_exceptions:
279+
if (
280+
not any(stacktrace["frames"] for stacktrace in stacktraces)
281+
and not processable_exceptions
282+
and not window_class_names
283+
):
202284
metrics.incr("proguard.symbolicator.events.skipped")
203285
return
204286

@@ -211,6 +293,7 @@ def process_jvm_stacktraces(symbolicator: Symbolicator, data: Any) -> Any:
211293
stacktraces=stacktraces,
212294
modules=modules,
213295
release_package=release_package,
296+
classes=window_class_names,
214297
)
215298

216299
if not _handle_response_status(data, response):
@@ -248,4 +331,8 @@ def process_jvm_stacktraces(symbolicator: Symbolicator, data: Any) -> Any:
248331
raw_exc["module"] = exc["module"]
249332
raw_exc["type"] = exc["type"]
250333

334+
classes = response.get("classes")
335+
new_attachments = _deobfuscate_view_hierarchies(attachments, classes)
336+
attachment_cache.set(cache_key, attachments=new_attachments, timeout=CACHE_TIMEOUT)
337+
251338
return data

src/sentry/lang/java/utils.py

+3-44
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from sentry.lang.java.proguard import open_proguard_mapper
1212
from sentry.models.debugfile import ProjectDebugFile
1313
from sentry.models.project import Project
14-
from sentry.stacktraces.processing import StacktraceInfo
1514
from sentry.utils.cache import cache_key_for_event
1615
from sentry.utils.safe import get_path
1716

@@ -70,31 +69,6 @@ def get_proguard_mapper(uuid: str, project: Project):
7069
return mapper
7170

7271

73-
def _deobfuscate_view_hierarchy(event_data: dict[str, Any], project: Project, view_hierarchy):
74-
"""
75-
Deobfuscates a view hierarchy in-place.
76-
77-
If we're unable to fetch a ProGuard uuid or unable to init the mapper,
78-
then the view hierarchy remains unmodified.
79-
"""
80-
proguard_uuids = get_proguard_images(event_data)
81-
if len(proguard_uuids) == 0:
82-
return
83-
84-
with sentry_sdk.start_span(op="proguard.deobfuscate_view_hierarchy_data"):
85-
for proguard_uuid in proguard_uuids:
86-
mapper = get_proguard_mapper(proguard_uuid, project)
87-
if mapper is None:
88-
return
89-
90-
windows_to_deobfuscate = [*view_hierarchy.get("windows")]
91-
while windows_to_deobfuscate:
92-
window = windows_to_deobfuscate.pop()
93-
window["type"] = mapper.remap_class(window.get("type")) or window.get("type")
94-
if children := window.get("children"):
95-
windows_to_deobfuscate.extend(children)
96-
97-
9872
@sentry_sdk.trace
9973
def deobfuscation_template(data, map_type, deobfuscation_fn):
10074
"""
@@ -133,13 +107,8 @@ def deobfuscation_template(data, map_type, deobfuscation_fn):
133107
attachment_cache.set(cache_key, attachments=new_attachments, timeout=CACHE_TIMEOUT)
134108

135109

136-
def deobfuscate_view_hierarchy(data):
137-
deobfuscation_template(data, "proguard", _deobfuscate_view_hierarchy)
138-
139-
140-
def is_jvm_event(data: Any, stacktraces: list[StacktraceInfo]) -> bool:
141-
"""Returns whether `data` is a JVM event, based on its platform, images, and
142-
the supplied stacktraces."""
110+
def is_jvm_event(data: Any) -> bool:
111+
"""Returns whether `data` is a JVM event, based on its images."""
143112

144113
# check if there are any JVM or Proguard images
145114
images = get_path(
@@ -149,14 +118,4 @@ def is_jvm_event(data: Any, stacktraces: list[StacktraceInfo]) -> bool:
149118
filter=lambda x: is_valid_jvm_image(x) or is_valid_proguard_image(x),
150119
default=(),
151120
)
152-
if not images:
153-
return False
154-
155-
if data.get("platform") == "java":
156-
return True
157-
158-
for stacktrace in stacktraces:
159-
if any(x == "java" for x in stacktrace.platforms):
160-
return True
161-
162-
return False
121+
return bool(images)

src/sentry/lang/native/symbolicator.py

+2
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ def process_jvm(
241241
stacktraces,
242242
modules,
243243
release_package,
244+
classes,
244245
apply_source_context=True,
245246
):
246247
"""
@@ -262,6 +263,7 @@ def process_jvm(
262263
"exceptions": exceptions,
263264
"stacktraces": stacktraces,
264265
"modules": modules,
266+
"classes": classes,
265267
"options": {"apply_source_context": apply_source_context},
266268
}
267269

src/sentry/profiles/task.py

+1
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,7 @@ def symbolicate(
489489
modules=modules,
490490
release_package=profile.get("transaction_metadata", {}).get("app.identifier"),
491491
apply_source_context=False,
492+
classes=[],
492493
)
493494
return symbolicator.process_payload(
494495
stacktraces=stacktraces, modules=modules, apply_source_context=False

src/sentry/tasks/symbolication.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def get_symbolication_platforms(
109109

110110
platforms = []
111111

112-
if is_jvm_event(data, stacktraces):
112+
if is_jvm_event(data):
113113
platforms.append(SymbolicatorPlatform.jvm)
114114
if is_js_event(data, stacktraces):
115115
platforms.append(SymbolicatorPlatform.js)

tests/relay_integration/lang/java/test_plugin.py

+3-7
Original file line numberDiff line numberDiff line change
@@ -1556,7 +1556,6 @@ def test_invalid_exception(self):
15561556

15571557
def test_is_jvm_event(self):
15581558
from sentry.lang.java.utils import is_jvm_event
1559-
from sentry.stacktraces.processing import find_stacktraces_in_data
15601559

15611560
event = {
15621561
"user": {"ip_address": "31.172.207.97"},
@@ -1586,8 +1585,7 @@ def test_is_jvm_event(self):
15861585
},
15871586
"timestamp": iso_format(before_now(seconds=1)),
15881587
}
1589-
stacktraces = find_stacktraces_in_data(event)
1590-
assert is_jvm_event(event, stacktraces)
1588+
assert is_jvm_event(event)
15911589

15921590
event = {
15931591
"user": {"ip_address": "31.172.207.97"},
@@ -1616,9 +1614,8 @@ def test_is_jvm_event(self):
16161614
},
16171615
"timestamp": iso_format(before_now(seconds=1)),
16181616
}
1619-
stacktraces = find_stacktraces_in_data(event)
16201617
# has no platform
1621-
assert not is_jvm_event(event, stacktraces)
1618+
assert is_jvm_event(event)
16221619

16231620
event = {
16241621
"user": {"ip_address": "31.172.207.97"},
@@ -1648,6 +1645,5 @@ def test_is_jvm_event(self):
16481645
},
16491646
"timestamp": iso_format(before_now(seconds=1)),
16501647
}
1651-
stacktraces = find_stacktraces_in_data(event)
16521648
# has no modules
1653-
assert not is_jvm_event(event, stacktraces)
1649+
assert not is_jvm_event(event)

0 commit comments

Comments
 (0)