Skip to content

Commit 126f80b

Browse files
phacopschloeho7
authored andcommitted
feat(profiling): Deobfuscate Android methods' signature (#53427)
1 parent 92c3834 commit 126f80b

File tree

4 files changed

+197
-25
lines changed

4 files changed

+197
-25
lines changed

Diff for: src/sentry/profiles/java.py

+97
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
from typing import List, Tuple
2+
3+
JAVA_BASE_TYPES = {
4+
"Z": "boolean",
5+
"B": "byte",
6+
"C": "char",
7+
"S": "short",
8+
"I": "int",
9+
"J": "long",
10+
"F": "float",
11+
"D": "double",
12+
"V": "void",
13+
}
14+
15+
16+
# parse_obfuscated_signature will parse an obfuscated signatures into parameter
17+
# and return types that can be then deobfuscated
18+
def parse_obfuscated_signature(signature: str) -> Tuple[List[str], str]:
19+
if signature[0] != "(":
20+
return [], ""
21+
22+
signature = signature[1:]
23+
parameter_types, return_type = signature.rsplit(")", 1)
24+
types = []
25+
i = 0
26+
arrays = 0
27+
28+
while i < len(parameter_types):
29+
t = parameter_types[i]
30+
31+
if t in JAVA_BASE_TYPES:
32+
start_index = i - arrays
33+
types.append(parameter_types[start_index : i + 1])
34+
arrays = 0
35+
elif t == "L":
36+
start_index = i - arrays
37+
end_index = parameter_types[i:].index(";")
38+
types.append(parameter_types[start_index : i + end_index + 1])
39+
arrays = 0
40+
i += end_index
41+
elif t == "[":
42+
arrays += 1
43+
else:
44+
arrays = 0
45+
46+
i += 1
47+
48+
return types, return_type
49+
50+
51+
# format_signature formats the types into a human-readable signature
52+
def format_signature(parameter_java_types: List[str], return_java_type: str) -> str:
53+
signature = f"({', '.join(parameter_java_types)})"
54+
if return_java_type and return_java_type != "void":
55+
signature += f": {return_java_type}"
56+
return signature
57+
58+
59+
def byte_code_type_to_java_type(mapper, byte_code_type: str) -> str:
60+
if not byte_code_type:
61+
return ""
62+
63+
token = byte_code_type[0]
64+
if token in JAVA_BASE_TYPES:
65+
return JAVA_BASE_TYPES[token]
66+
elif token == "L":
67+
# invalid signature
68+
if byte_code_type[-1] != ";":
69+
return byte_code_type
70+
obfuscated = byte_code_type[1:-1].replace("/", ".")
71+
mapped = mapper.remap_class(obfuscated)
72+
if mapped:
73+
return mapped
74+
return obfuscated
75+
elif token == "[":
76+
return f"{byte_code_type_to_java_type(mapper, byte_code_type[1:])}[]"
77+
else:
78+
return byte_code_type
79+
80+
81+
# map_obfucated_signature will parse then deobfuscated a signature and
82+
# format it appropriately
83+
def deobfuscate_signature(mapper, signature: str) -> str:
84+
if not signature:
85+
return ""
86+
87+
parameter_types, return_type = parse_obfuscated_signature(signature)
88+
if not (parameter_types or return_type):
89+
return ""
90+
91+
parameter_java_types = []
92+
for parameter_type in parameter_types:
93+
new_class = byte_code_type_to_java_type(mapper, parameter_type)
94+
parameter_java_types.append(new_class)
95+
96+
return_java_type = byte_code_type_to_java_type(mapper, return_type)
97+
return format_signature(parameter_java_types, return_java_type)

Diff for: src/sentry/profiles/task.py

+34-16
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from sentry.lang.native.symbolicator import RetrySymbolication, Symbolicator, SymbolicatorTaskKind
1919
from sentry.models import EventError, Organization, Project, ProjectDebugFile
2020
from sentry.profiles.device import classify_device
21+
from sentry.profiles.java import deobfuscate_signature
2122
from sentry.profiles.utils import get_from_profiling_service
2223
from sentry.signals import first_profile_received
2324
from sentry.tasks.base import instrumented_task
@@ -618,35 +619,52 @@ def _deobfuscate(profile: Profile, project: Project) -> None:
618619

619620
with sentry_sdk.start_span(op="proguard.remap"):
620621
for method in profile["profile"]["methods"]:
622+
method.setdefault("data", {})
623+
621624
mapped = mapper.remap_frame(
622625
method["class_name"], method["name"], method["source_line"] or 0
623626
)
624-
method.setdefault("data", {})
625-
if len(mapped) == 1:
626-
new_frame = mapped[0]
627-
method.update(
628-
{
629-
"class_name": new_frame.class_name,
630-
"name": new_frame.method,
631-
"source_file": new_frame.file,
632-
"source_line": new_frame.line,
633-
}
634-
)
635-
method["data"]["deobfuscation_status"] = "deobfuscated"
636-
elif len(mapped) > 1:
627+
628+
if "signature" in method and method["signature"]:
629+
method["signature"] = deobfuscate_signature(mapper, method["signature"])
630+
631+
if len(mapped) >= 1:
632+
new_frame = mapped[-1]
633+
method["class_name"] = new_frame.class_name
634+
method["name"] = new_frame.method
635+
method["data"] = {
636+
"deobfuscation_status": "deobfuscated"
637+
if method.get("signature", None)
638+
else "partial"
639+
}
640+
641+
if new_frame.file:
642+
method["source_file"] = new_frame.file
643+
644+
if new_frame.line:
645+
method["source_line"] = new_frame.line
646+
637647
bottom_class = mapped[-1].class_name
638648
method["inline_frames"] = [
639649
{
640650
"class_name": new_frame.class_name,
651+
"data": {"deobfuscation_status": "deobfuscated"},
641652
"name": new_frame.method,
642653
"source_file": method["source_file"]
643654
if bottom_class == new_frame.class_name
644-
else None,
655+
else "",
645656
"source_line": new_frame.line,
646-
"data": {"deobfuscation_status": "deobfuscated"},
647657
}
648-
for new_frame in mapped
658+
for new_frame in reversed(mapped)
649659
]
660+
661+
# vroom will only take into account frames in this list
662+
# if it exists. since symbolic does not return a signature for
663+
# the frame we deobfuscated, we update it to set
664+
# the deobfuscated signature.
665+
if len(method["inline_frames"]) > 0:
666+
method["inline_frames"][0]["data"] = method["data"]
667+
method["inline_frames"][0]["signature"] = method.get("signature", "")
650668
else:
651669
mapped_class = mapper.remap_class(method["class_name"])
652670
if mapped_class:

Diff for: tests/sentry/profiles/test_java.py

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from tempfile import mkstemp
2+
3+
import pytest
4+
from symbolic.proguard import ProguardMapper
5+
6+
from sentry.profiles.java import deobfuscate_signature
7+
8+
PROGUARD_SOURCE = b"""\
9+
# compiler: R8
10+
# compiler_version: 2.0.74
11+
# min_api: 16
12+
# pg_map_id: 5b46fdc
13+
# common_typos_disable
14+
# {"id":"com.android.tools.r8.mapping","version":"1.0"}
15+
org.slf4j.helpers.Util$ClassContextSecurityManager -> org.a.b.g$a:
16+
65:65:void <init>() -> <init>
17+
67:67:java.lang.Class[] getClassContext() -> a
18+
69:69:java.lang.Class[] getExtraClassContext() -> a
19+
65:65:void <init>(org.slf4j.helpers.Util$1) -> <init>
20+
"""
21+
22+
23+
@pytest.fixture
24+
def mapper():
25+
_, mapping_file_path = mkstemp()
26+
with open(mapping_file_path, "wb") as f:
27+
f.write(PROGUARD_SOURCE)
28+
mapper = ProguardMapper.open(mapping_file_path)
29+
assert mapper.has_line_info
30+
return mapper
31+
32+
33+
@pytest.mark.parametrize(
34+
["obfuscated", "expected"],
35+
[
36+
# invalid signatures
37+
("", ""),
38+
("()", ""),
39+
# valid signatures
40+
("()V", "()"),
41+
("([I)V", "(int[])"),
42+
("(III)V", "(int, int, int)"),
43+
("([Ljava/lang/String;)V", "(java.lang.String[])"),
44+
("([[J)V", "(long[][])"),
45+
("(I)I", "(int): int"),
46+
("([B)V", "(byte[])"),
47+
],
48+
)
49+
def test_deobfuscate_signature(mapper, obfuscated, expected):
50+
assert deobfuscate_signature(mapper, obfuscated) == expected

Diff for: tests/sentry/profiles/test_task.py

+16-9
Original file line numberDiff line numberDiff line change
@@ -127,16 +127,18 @@ def test_basic_deobfuscation(self):
127127
"profile": {
128128
"methods": [
129129
{
130-
"name": "a",
131130
"abs_path": None,
132131
"class_name": "org.a.b.g$a",
132+
"name": "a",
133+
"signature": "()V",
133134
"source_file": None,
134135
"source_line": 67,
135136
},
136137
{
137-
"name": "a",
138138
"abs_path": None,
139139
"class_name": "org.a.b.g$a",
140+
"name": "a",
141+
"signature": "()V",
140142
"source_file": None,
141143
"source_line": 69,
142144
},
@@ -178,16 +180,18 @@ def test_inline_deobfuscation(self):
178180
"profile": {
179181
"methods": [
180182
{
181-
"name": "onClick",
182183
"abs_path": None,
183184
"class_name": "e.a.c.a",
185+
"name": "onClick",
186+
"signature": "()V",
184187
"source_file": None,
185188
"source_line": 2,
186189
},
187190
{
188-
"name": "t",
189191
"abs_path": None,
190192
"class_name": "io.sentry.sample.MainActivity",
193+
"name": "t",
194+
"signature": "()V",
191195
"source_file": "MainActivity.java",
192196
"source_line": 1,
193197
},
@@ -200,21 +204,24 @@ def test_inline_deobfuscation(self):
200204
_deobfuscate(profile, project)
201205
frames = profile["profile"]["methods"]
202206

203-
assert sum(len(f.get("inline_frames", [{}])) for f in frames) == 4
207+
assert sum(len(f.get("inline_frames", [])) for f in frames) == 3
204208

205209
assert frames[0]["name"] == "onClick"
206210
assert frames[0]["class_name"] == "io.sentry.sample.-$$Lambda$r3Avcbztes2hicEObh02jjhQqd4"
207211

212+
assert frames[1]["inline_frames"][0]["name"] == "onClickHandler"
213+
assert frames[1]["inline_frames"][0]["source_line"] == 40
208214
assert frames[1]["inline_frames"][0]["source_file"] == "MainActivity.java"
209215
assert frames[1]["inline_frames"][0]["class_name"] == "io.sentry.sample.MainActivity"
210-
assert frames[1]["inline_frames"][0]["name"] == "bar"
211-
assert frames[1]["inline_frames"][0]["source_line"] == 54
216+
assert frames[1]["inline_frames"][0]["signature"] == "()"
217+
212218
assert frames[1]["inline_frames"][1]["name"] == "foo"
213219
assert frames[1]["inline_frames"][1]["source_line"] == 44
214-
assert frames[1]["inline_frames"][2]["name"] == "onClickHandler"
215-
assert frames[1]["inline_frames"][2]["source_line"] == 40
220+
216221
assert frames[1]["inline_frames"][2]["source_file"] == "MainActivity.java"
217222
assert frames[1]["inline_frames"][2]["class_name"] == "io.sentry.sample.MainActivity"
223+
assert frames[1]["inline_frames"][2]["name"] == "bar"
224+
assert frames[1]["inline_frames"][2]["source_line"] == 54
218225

219226
def test_error_on_resolving(self):
220227
out = BytesIO()

0 commit comments

Comments
 (0)