Skip to content

feat(derived_code_mappings): Support Java #86280

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Mar 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,24 @@
# We only care about extensions of files which would show up in stacktraces after symbolication
SUPPORTED_EXTENSIONS = [
"clj",
"cljc",
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@AbhiPrasad @lcian does it look good to you?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep.
I've found that for Groovy there's also a few more possible extensions: .gvy, .gy, .gsh even though this is not very important considering the usage of groovy etc.
Sorce: https://blog.mrhaki.com/2011/10/groovy-goodness-default-groovy-script.html

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should split up this constant to categorize by language, makes it a bit easier to review.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I intend to refactor this.

"cljcs",
"cs",
"go",
"groovy",
"java",
"js",
"jsp",
"jsx",
"kt",
"kts",
"mjs",
"php",
"py",
"rake",
"rb",
"scala",
"sc",
"ts",
"tsx",
]
Expand Down
59 changes: 56 additions & 3 deletions src/sentry/issues/auto_source_code_config/code_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
RepoTreesIntegration,
get_extension,
)
from sentry.issues.auto_source_code_config.constants import (
EXTRACT_FILENAME_FROM_MODULE_AND_ABS_PATH,
)
from sentry.models.organization import Organization
from sentry.models.project import Project
from sentry.models.repository import Repository
Expand Down Expand Up @@ -51,6 +54,14 @@ class NeedsExtension(Exception):
pass


class MissingModuleOrAbsPath(Exception):
pass


class DoesNotFollowJavaPackageNamingConvention(Exception):
pass


def derive_code_mappings(
organization: Organization,
frame: Mapping[str, Any],
Expand All @@ -73,7 +84,10 @@ def derive_code_mappings(
# XXX: Look at sentry.interfaces.stacktrace and maybe use that
class FrameInfo:
def __init__(self, frame: Mapping[str, Any], platform: str | None = None) -> None:
# XXX: platform will be used in a following PR
if platform in EXTRACT_FILENAME_FROM_MODULE_AND_ABS_PATH:
self.frame_info_from_module(frame)
return

frame_file_path = frame["filename"]
frame_file_path = self.transformations(frame_file_path)

Expand Down Expand Up @@ -123,6 +137,15 @@ def transformations(self, frame_file_path: str) -> str:

return frame_file_path

def frame_info_from_module(self, frame: Mapping[str, Any]) -> None:
if frame.get("module") and frame.get("abs_path"):
stack_root, filepath = get_path_from_module(frame["module"], frame["abs_path"])
self.stack_root = stack_root
self.raw_path = filepath
self.normalized_path = filepath
else:
raise MissingModuleOrAbsPath("Investigate why the data is missing.")

def __repr__(self) -> str:
return f"FrameInfo: {self.raw_path}"

Expand Down Expand Up @@ -214,8 +237,12 @@ def _stacktrace_buckets(
buckets[frame_filename.stack_root].append(frame_filename)
except UnsupportedFrameInfo:
logger.warning("Frame's filepath not supported: %s", frame.get("filename"))
except MissingModuleOrAbsPath:
logger.warning("Do not panic. I'm collecting this data.")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Once I review the data I will remove reporting it to Sentry.

except NeedsExtension:
logger.warning("Needs extension: %s", frame.get("filename"))
except DoesNotFollowJavaPackageNamingConvention:
pass
except Exception:
logger.exception("Unable to split stacktrace path into buckets")

Expand Down Expand Up @@ -507,8 +534,10 @@ def find_roots(frame_filename: FrameInfo, source_path: str) -> tuple[str, str]:
return (stack_root, "")
elif source_path.endswith(stack_path): # "Packaged" logic
source_prefix = source_path.rpartition(stack_path)[0]
package_dir = stack_path.split("/")[0]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In Python, it has always been a single word representing the package name (e.g. sentry) while in Java it is multiple words (e.g. io.sentry.some_app_name), thus, we need to change it.

return (f"{stack_root}{package_dir}/", f"{source_prefix}{package_dir}/")
return (
f"{stack_root}{frame_filename.stack_root}/",
f"{source_prefix}{frame_filename.stack_root}/",
)
Comment on lines +538 to +540
Copy link
Member

@MichaelSun48 MichaelSun48 Mar 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any change to find_roots makes me nervous since this function effectively calculates the code mapping and was very sensitive to small changes, at least in my experience.

If it passes the tests, I think we should be fine, but I would keep a very close eye on related metrics after this PR lands.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have been making sure the tests are sound. It has not required any changes to the Python packages.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great! Apologies if I'm being a little overly paranoid 😅

elif stack_path.endswith(source_path):
stack_prefix = stack_path.rpartition(source_path)[0]
return (f"{stack_root}{stack_prefix}", "")
Expand Down Expand Up @@ -541,3 +570,27 @@ def find_roots(frame_filename: FrameInfo, source_path: str) -> tuple[str, str]:
# validate_source_url should have ensured the file names match
# so if we get here something went wrong and there is a bug
raise UnexpectedPathException("Could not find common root from paths")


# Based on # https://github.com/getsentry/symbolicator/blob/450f1d6a8c346405454505ed9ca87e08a6ff34b7/crates/symbolicator-proguard/src/symbolication.rs#L450-L485
def get_path_from_module(module: str, abs_path: str) -> tuple[str, str]:
"""This attempts to generate a modified module and a real path from a Java module name and filename.
Returns a tuple of (stack_root, source_path).
"""
# An `abs_path` is valid if it contains a `.` and doesn't contain a `$`.
if "$" in abs_path or "." not in abs_path:
# Split the module at the first '$' character and take the part before it
# If there's no '$', use the entire module
file_path = module.split("$", 1)[0] if "$" in module else module
stack_root = module.rsplit(".", 1)[0].replace(".", "/")
return stack_root, file_path.replace(".", "/")

if "." not in module:
raise DoesNotFollowJavaPackageNamingConvention

# If module has a dot, take everything before the last dot
# com.example.foo.Bar$InnerClass -> com/example/foo/
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like the code below doesn't have a trailing /. Maybe we should update the comment to avoid confusion?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call; thanks!

stack_root = module.rsplit(".", 1)[0].replace(".", "/")
file_path = f"{stack_root}/{abs_path}"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In symbolicator we use only the part of abs_path before the last .. But there we always append .jvm as fake file extension. I'm not entirely sure what works best here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would not work for us either.

I made up these two cases which are not going to help in deriving code mappings. However, we have many other cases that would help, thus, it's okay if these don't work.

            pytest.param(
                {"module": "foo.bar.Baz", "abs_path": "no_extension"},
                "foo/bar",
                "foo/bar/Baz",  # The path does not use the abs_path
                id="invalid_abs_path_no_extension",
            ),
            pytest.param(
                {"module": "foo.bar.Baz", "abs_path": "foo$bar"},
                "foo/bar",
                "foo/bar/Baz",  # The path does not use the abs_path
                id="invalid_abs_path_dollar_sign",
            ),


return stack_root, file_path
8 changes: 5 additions & 3 deletions src/sentry/issues/auto_source_code_config/constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
SUPPORTED_INTEGRATIONS = ["github"]
# XXX: We may want to change these constants into a configuration object
# Any new languages should also require updating the stacktraceLink.tsx and repo_trees.py SUPPORTED_EXTENSIONS
SUPPORTED_LANGUAGES = [
"csharp",
Expand All @@ -9,7 +10,8 @@
"python",
"ruby",
]
# These languages will run as dry-run mode by default
DRY_RUN_PLATFORMS: list[str] = []
# Some languages will also process system frames
PROCESS_ALL_FRAMES: list[str] = []
PROCESS_ALL_FRAMES: list[str] = ["java"]
# Extract filename from module and abs_path
# e.g. com.foo.bar.Baz$handle$1, Baz.kt -> com/foo/bar/Baz.kt
EXTRACT_FILENAME_FROM_MODULE_AND_ABS_PATH = ["java"]
4 changes: 1 addition & 3 deletions src/sentry/issues/auto_source_code_config/stacktraces.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@
logger = logging.getLogger(__name__)


def get_frames_to_process(
data: NodeData | dict[str, Any], platform: str | None = None
) -> list[dict[str, Any]]:
def get_frames_to_process(data: NodeData | dict[str, Any], platform: str) -> list[dict[str, Any]]:
"""It flattens all processableframes from the event's data."""
stacktraces = get_stacktraces(data)
frames_to_process = []
Expand Down
5 changes: 2 additions & 3 deletions src/sentry/issues/auto_source_code_config/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,13 @@
from sentry.utils import metrics
from sentry.utils.locking import UnableToAcquireLock

from .constants import DRY_RUN_PLATFORMS
from .integration_utils import (
InstallationCannotGetTreesError,
InstallationNotFoundError,
get_installation,
)
from .stacktraces import get_frames_to_process
from .utils import supported_platform
from .utils import is_dry_run_platform, supported_platform

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -80,7 +79,7 @@ def process_event(project_id: int, group_id: int, event_id: str) -> list[CodeMap
trees = get_trees_for_org(installation, org, extra)
trees_helper = CodeMappingTreesHelper(trees)
code_mappings = trees_helper.generate_code_mappings(frames_to_process, platform)
if platform not in DRY_RUN_PLATFORMS:
if not is_dry_run_platform(platform):
set_project_codemappings(code_mappings, installation, project, platform)
except (InstallationNotFoundError, InstallationCannotGetTreesError):
pass
Expand Down
16 changes: 13 additions & 3 deletions src/sentry/issues/auto_source_code_config/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
from .constants import DRY_RUN_PLATFORMS, SUPPORTED_LANGUAGES
from sentry import options

from .constants import SUPPORTED_LANGUAGES

def supported_platform(platform: str | None) -> bool:
return (platform or "") in SUPPORTED_LANGUAGES + DRY_RUN_PLATFORMS

def supported_platform(platform: str | None = None) -> bool:
return platform in SUPPORTED_LANGUAGES + dry_run_platforms()


def dry_run_platforms() -> list[str]:
return options.get("issues.auto_source_code_config.dry-run-platforms")


def is_dry_run_platform(platform: str | None = None) -> bool:
return platform in dry_run_platforms()
7 changes: 7 additions & 0 deletions src/sentry/options/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -870,6 +870,13 @@
flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE,
)

# List of platforms that will run in dry-run mode by default.
register(
"issues.auto_source_code_config.dry-run-platforms",
type=Sequence,
default=[],
flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE,
)

# Percentage of orgs that will be put into a bucket using the split rate below.
register(
Expand Down
5 changes: 3 additions & 2 deletions src/sentry/tasks/post_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -1003,10 +1003,11 @@ def process_code_mappings(job: PostProcessJob) -> None:
project = event.project
group_id = event.group_id

if not supported_platform(event.data.get("platform")):
platform = event.data.get("platform", "not_available")
if not supported_platform(platform):
return

frames_to_process = get_frames_to_process(event.data, event.platform)
frames_to_process = get_frames_to_process(event.data, platform)
if not frames_to_process:
return

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
from sentry.issues.auto_source_code_config.code_mapping import (
CodeMapping,
CodeMappingTreesHelper,
DoesNotFollowJavaPackageNamingConvention,
FrameInfo,
MissingModuleOrAbsPath,
NeedsExtension,
UnexpectedPathException,
UnsupportedFrameInfo,
Expand Down Expand Up @@ -124,6 +126,63 @@ def test_raises_no_extension(self) -> None:
with pytest.raises(NeedsExtension):
FrameInfo({"filename": filepath})

@pytest.mark.parametrize(
"frame, expected_exception",
[
pytest.param({}, MissingModuleOrAbsPath, id="no_module"),
pytest.param({"module": "foo"}, MissingModuleOrAbsPath, id="no_abs_path"),
pytest.param(
# Classes without declaring a package are placed in
# the unnamed package which cannot be imported.
# https://docs.oracle.com/javase/specs/jls/se8/html/jls-7.html#jls-7.4.2
{"module": "NoPackageName", "abs_path": "OtherActivity.java"},
DoesNotFollowJavaPackageNamingConvention,
id="unnamed_package",
),
],
)
def test_java_raises_exception(
self, frame: dict[str, Any], expected_exception: type[Exception]
) -> None:
with pytest.raises(expected_exception):
FrameInfo(frame, "java")

@pytest.mark.parametrize(
"frame, expected_stack_root, expected_normalized_path",
[
pytest.param(
{"module": "foo.bar.Baz$handle$1", "abs_path": "baz.java"},
"foo/bar",
"foo/bar/baz.java",
id="dollar_symbol_in_module",
),
pytest.param(
{"module": "foo.bar.Baz", "abs_path": "baz.extra.java"},
"foo/bar",
"foo/bar/baz.extra.java",
id="two_dots_in_abs_path",
),
pytest.param(
{"module": "foo.bar.Baz", "abs_path": "no_extension"},
"foo/bar",
"foo/bar/Baz", # The path does not use the abs_path
id="invalid_abs_path_no_extension",
),
pytest.param(
{"module": "foo.bar.Baz", "abs_path": "foo$bar"},
"foo/bar",
"foo/bar/Baz", # The path does not use the abs_path
id="invalid_abs_path_dollar_sign",
),
],
)
def test_java_valid_frames(
self, frame: dict[str, Any], expected_stack_root: str, expected_normalized_path: str
) -> None:
frame_info = FrameInfo(frame, "java")
assert frame_info.stack_root == expected_stack_root
assert frame_info.normalized_path == expected_normalized_path

@pytest.mark.parametrize(
"frame_filename, prefix",
[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from sentry.shared_integrations.exceptions import ApiError
from sentry.testutils.asserts import assert_failure_metric, assert_halt_metric
from sentry.testutils.cases import TestCase
from sentry.testutils.helpers.options import override_options
from sentry.testutils.silo import assume_test_silo_mode_of
from sentry.testutils.skips import requires_snuba
from sentry.utils.locking import UnableToAcquireLock
Expand Down Expand Up @@ -178,7 +179,7 @@ def test_dry_run_platform(self) -> None:
file_in_repo = "src/foo/bar.py"
with (
patch(f"{CODE_ROOT}.task.supported_platform", return_value=True),
patch(f"{CODE_ROOT}.task.DRY_RUN_PLATFORMS", ["other"]),
patch(f"{CODE_ROOT}.task.is_dry_run_platform", return_value=True),
):
# No code mapping will be stored, however, we get what would have been created
code_mappings = self._process_and_assert_no_code_mapping(
Expand Down Expand Up @@ -469,3 +470,37 @@ def test_auto_source_code_config_no_normalization(self) -> None:
expected_stack_root="",
expected_source_root="",
)


class TestJavaDeriveCodeMappings(LanguageSpecificDeriveCodeMappings):
option = {"issues.auto_source_code_config.dry-run-platforms": ["java"]}
platform = "java"

def test_very_short_module_name(self) -> None:
with override_options(self.option):
# No code mapping will be stored, however, we get what would have been created
code_mappings = self._process_and_assert_no_code_mapping(
repo_files=["src/a/SomeShortPackageNameClass.java"],
frames=[
{
"module": "a.SomeShortPackageNameClass",
"abs_path": "SomeShortPackageNameClass.java",
}
],
platform=self.platform,
)
assert len(code_mappings) == 1
assert code_mappings[0].stacktrace_root == "a/"
assert code_mappings[0].source_path == "src/a/"

def test_handles_dollar_sign_in_module(self) -> None:
with override_options(self.option):
# No code mapping will be stored, however, we get what would have been created
code_mappings = self._process_and_assert_no_code_mapping(
repo_files=["src/com/example/foo/Bar.kt"],
frames=[{"module": "com.example.foo.Bar$InnerClass", "abs_path": "Bar.kt"}],
platform=self.platform,
)
assert len(code_mappings) == 1
assert code_mappings[0].stacktrace_root == "com/example/foo/"
assert code_mappings[0].source_path == "src/com/example/foo/"
Original file line number Diff line number Diff line change
Expand Up @@ -82,5 +82,5 @@ def test_get_frames_to_process(
],
)
def test_find_stacktrace_empty(frames: list[dict[str, Any]], expected: list[str]) -> None:
frames = get_frames_to_process(_exception_with_stacktrace(frames))
frames = get_frames_to_process(_exception_with_stacktrace(frames), "python")
assert frames == expected
Loading