diff --git a/src/sentry/integrations/github/client.py b/src/sentry/integrations/github/client.py index f4c897775d6de6..e4820c401f61fc 100644 --- a/src/sentry/integrations/github/client.py +++ b/src/sentry/integrations/github/client.py @@ -26,6 +26,7 @@ from sentry.integrations.source_code_management.repo_trees import RepoTreesClient from sentry.integrations.source_code_management.repository import RepositoryClient from sentry.integrations.types import EXTERNAL_PROVIDERS, ExternalProviders +from sentry.models.pullrequest import PullRequest from sentry.models.repository import Repository from sentry.shared_integrations.client.proxy import IntegrationProxyClient from sentry.shared_integrations.exceptions import ApiError, ApiRateLimitedError @@ -254,13 +255,13 @@ def get_pullrequest_from_commit(self, repo: str, sha: str) -> Any: """ return self.get(f"/repos/{repo}/commits/{sha}/pulls") - def get_pullrequest_files(self, repo: str, pull_number: str) -> Any: + def get_pullrequest_files(self, repo: Repository, pr: PullRequest) -> Any: """ https://docs.github.com/en/rest/pulls/pulls#list-pull-requests-files Returns up to 30 files associated with a pull request. Responses are paginated. """ - return self.get(f"/repos/{repo}/pulls/{pull_number}/files") + return self.get(f"/repos/{repo.name}/pulls/{pr.key}/files") def get_repo(self, repo: str) -> Any: """ diff --git a/src/sentry/integrations/github/integration.py b/src/sentry/integrations/github/integration.py index 69f0e52bca0edc..638bab9272bbb3 100644 --- a/src/sentry/integrations/github/integration.py +++ b/src/sentry/integrations/github/integration.py @@ -26,12 +26,24 @@ IntegrationMetadata, IntegrationProvider, ) -from sentry.integrations.github.constants import RATE_LIMITED_MESSAGE +from sentry.integrations.github.constants import ISSUE_LOCKED_ERROR_MESSAGE, RATE_LIMITED_MESSAGE from sentry.integrations.github.tasks.link_all_repos import link_all_repos +from sentry.integrations.github.tasks.utils import GithubAPIErrorType from sentry.integrations.models.integration import Integration from sentry.integrations.models.organization_integration import OrganizationIntegration from sentry.integrations.services.repository import RpcRepository, repository_service -from sentry.integrations.source_code_management.commit_context import CommitContextIntegration +from sentry.integrations.source_code_management.commit_context import ( + OPEN_PR_MAX_FILES_CHANGED, + OPEN_PR_MAX_LINES_CHANGED, + OPEN_PR_METRICS_BASE, + CommitContextIntegration, + CommitContextOrganizationOptionKeys, + CommitContextReferrerIds, + CommitContextReferrers, + PullRequestFile, + PullRequestIssue, +) +from sentry.integrations.source_code_management.language_parsers import PATCH_PARSERS from sentry.integrations.source_code_management.repo_trees import RepoTreesIntegration from sentry.integrations.source_code_management.repository import RepositoryIntegration from sentry.integrations.tasks.migrate_repo import migrate_repo @@ -39,12 +51,18 @@ IntegrationPipelineViewEvent, IntegrationPipelineViewType, ) +from sentry.models.group import Group +from sentry.models.organization import Organization +from sentry.models.pullrequest import PullRequest from sentry.models.repository import Repository from sentry.organizations.absolute_url import generate_organization_url from sentry.organizations.services.organization.model import RpcOrganization from sentry.pipeline import Pipeline, PipelineView from sentry.shared_integrations.constants import ERR_INTERNAL, ERR_UNAUTHORIZED from sentry.shared_integrations.exceptions import ApiError, IntegrationError +from sentry.snuba.referrer import Referrer +from sentry.templatetags.sentry_helpers import small_count +from sentry.types.referrer_ids import GITHUB_OPEN_PR_BOT_REFERRER, GITHUB_PR_BOT_REFERRER from sentry.utils import metrics from sentry.utils.http import absolute_uri from sentry.web.frontend.base import determine_active_organization @@ -173,11 +191,9 @@ def get_document_origin(org) -> str: class GitHubIntegration( RepositoryIntegration, GitHubIssuesSpec, CommitContextIntegration, RepoTreesIntegration ): - codeowners_locations = ["CODEOWNERS", ".github/CODEOWNERS", "docs/CODEOWNERS"] + integration_name = "github" - @property - def integration_name(self) -> str: - return "github" + codeowners_locations = ["CODEOWNERS", ".github/CODEOWNERS", "docs/CODEOWNERS"] def get_client(self) -> GitHubBaseClient: if not self.org_integration: @@ -290,6 +306,248 @@ def search_issues(self, query: str | None, **kwargs) -> dict[str, Any]: assert isinstance(resp, dict) return resp + # CommitContextIntegration methods + + commit_context_referrers = CommitContextReferrers( + pr_comment_bot=Referrer.GITHUB_PR_COMMENT_BOT, + ) + commit_context_referrer_ids = CommitContextReferrerIds( + pr_bot=GITHUB_PR_BOT_REFERRER, + open_pr_bot=GITHUB_OPEN_PR_BOT_REFERRER, + ) + commit_context_organization_option_keys = CommitContextOrganizationOptionKeys( + pr_bot="sentry:github_pr_bot", + ) + + def format_comment_url(self, url: str, referrer: str) -> str: + return url + "?referrer=" + referrer + + def format_pr_comment(self, issue_ids: list[int]) -> str: + single_issue_template = "- ‼️ **{title}** `{subtitle}` [View Issue]({url})" + comment_body_template = """\ +## Suspect Issues +This pull request was deployed and Sentry observed the following issues: + +{issue_list} + +Did you find this useful? React with a 👍 or 👎""" + + def format_subtitle(subtitle: str) -> str: + return subtitle[:47] + "..." if len(subtitle) > 50 else subtitle + + issues = Group.objects.filter(id__in=issue_ids).order_by("id").all() + + issue_list = "\n".join( + single_issue_template.format( + title=issue.title, + subtitle=format_subtitle(issue.culprit), + url=self.format_comment_url( + issue.get_absolute_url(), referrer=self.commit_context_referrer_ids.pr_bot + ), + ) + for issue in issues + ) + + return comment_body_template.format(issue_list=issue_list) + + def build_pr_comment_data( + self, + organization: Organization, + repo: Repository, + pr_key: str, + comment_body: str, + issue_ids: list[int], + ) -> dict[str, Any]: + enabled_copilot = features.has("organizations:gen-ai-features", organization) + + comment_data = { + "body": comment_body, + } + if enabled_copilot: + comment_data["actions"] = [ + { + "name": f"Root cause #{i + 1}", + "type": "copilot-chat", + "prompt": f"@sentry root cause issue {str(issue_id)} with PR URL https://github.com/{repo.name}/pull/{str(pr_key)}", + } + for i, issue_id in enumerate(issue_ids[:3]) + ] + + return comment_data + + def queue_comment_task(self, pullrequest_id: int, project_id: int) -> None: + from sentry.integrations.github.tasks.pr_comment import github_comment_workflow + + github_comment_workflow.delay(pullrequest_id=pullrequest_id, project_id=project_id) + + def on_create_or_update_comment_error(self, api_error: ApiError, metrics_base: str) -> bool: + if api_error.json: + if ISSUE_LOCKED_ERROR_MESSAGE in api_error.json.get("message", ""): + metrics.incr( + metrics_base.format(integration=self.integration_name, key="error"), + tags={"type": "issue_locked_error"}, + ) + return True + + elif RATE_LIMITED_MESSAGE in api_error.json.get("message", ""): + metrics.incr( + metrics_base.format(integration=self.integration_name, key="error"), + tags={"type": "rate_limited_error"}, + ) + return True + + return False + + def get_pr_files_safe_for_comment( + self, repo: Repository, pr: PullRequest + ) -> list[dict[str, str]]: + client = self.get_client() + + logger.info("github.open_pr_comment.check_safe_for_comment") + try: + pr_files = client.get_pullrequest_files(repo=repo, pr=pr) + except ApiError as e: + logger.info("github.open_pr_comment.api_error") + if e.json and RATE_LIMITED_MESSAGE in e.json.get("message", ""): + metrics.incr( + OPEN_PR_METRICS_BASE.format(integration="github", key="api_error"), + tags={"type": GithubAPIErrorType.RATE_LIMITED.value, "code": e.code}, + ) + elif e.code == 404: + metrics.incr( + OPEN_PR_METRICS_BASE.format(integration="github", key="api_error"), + tags={"type": GithubAPIErrorType.MISSING_PULL_REQUEST.value, "code": e.code}, + ) + else: + metrics.incr( + OPEN_PR_METRICS_BASE.format(integration="github", key="api_error"), + tags={"type": GithubAPIErrorType.UNKNOWN.value, "code": e.code}, + ) + logger.exception( + "github.open_pr_comment.unknown_api_error", extra={"error": str(e)} + ) + return [] + + changed_file_count = 0 + changed_lines_count = 0 + filtered_pr_files = [] + + patch_parsers = PATCH_PARSERS + # NOTE: if we are testing beta patch parsers, add check here + + for file in pr_files: + filename = file["filename"] + # we only count the file if it's modified and if the file extension is in the list of supported file extensions + # we cannot look at deleted or newly added files because we cannot extract functions from the diffs + if file["status"] != "modified" or filename.split(".")[-1] not in patch_parsers: + continue + + changed_file_count += 1 + changed_lines_count += file["changes"] + filtered_pr_files.append(file) + + if changed_file_count > OPEN_PR_MAX_FILES_CHANGED: + metrics.incr( + OPEN_PR_METRICS_BASE.format(integration="github", key="rejected_comment"), + tags={"reason": "too_many_files"}, + ) + return [] + if changed_lines_count > OPEN_PR_MAX_LINES_CHANGED: + metrics.incr( + OPEN_PR_METRICS_BASE.format(integration="github", key="rejected_comment"), + tags={"reason": "too_many_lines"}, + ) + return [] + + return filtered_pr_files + + def get_pr_files(self, pr_files: list[dict[str, str]]) -> list[PullRequestFile]: + # new files will not have sentry issues associated with them + # only fetch Python files + pullrequest_files = [ + PullRequestFile(filename=file["filename"], patch=file["patch"]) + for file in pr_files + if "patch" in file + ] + + logger.info("github.open_pr_comment.pr_filenames", extra={"count": len(pullrequest_files)}) + + return pullrequest_files + + def format_open_pr_comment(self, issue_tables: list[str]) -> str: + comment_body_template = """\ +## 🔍 Existing Issues For Review +Your pull request is modifying functions with the following pre-existing issues: + +{issue_tables} +--- + +Did you find this useful? React with a 👍 or 👎""" + + return comment_body_template.format(issue_tables="\n".join(issue_tables)) + + def format_issue_table( + self, + diff_filename: str, + issues: list[PullRequestIssue], + patch_parsers: dict[str, Any], + toggle: bool, + ) -> str: + description_length = 52 + + issue_table_template = """\ +📄 File: **{filename}** + +| Function | Unhandled Issue | +| :------- | :----- | +{issue_rows}""" + + issue_table_toggle_template = """\ +
+📄 File: {filename} (Click to Expand) + +| Function | Unhandled Issue | +| :------- | :----- | +{issue_rows} +
""" + + def format_subtitle(title_length: int, subtitle: str) -> str: + # the title length + " " + subtitle should be <= 52 + subtitle_length = description_length - title_length - 1 + return ( + subtitle[: subtitle_length - 3] + "..." + if len(subtitle) > subtitle_length + else subtitle + ) + + language_parser = patch_parsers.get(diff_filename.split(".")[-1], None) + + if not language_parser: + return "" + + issue_row_template = language_parser.issue_row_template + + issue_rows = "\n".join( + [ + issue_row_template.format( + title=issue.title, + subtitle=format_subtitle(len(issue.title), issue.subtitle), + url=self.format_comment_url( + issue.url, referrer=self.commit_context_referrer_ids.open_pr_bot + ), + event_count=small_count(issue.event_count), + function_name=issue.function_name, + affected_users=small_count(issue.affected_users), + ) + for issue in issues + ] + ) + + if toggle: + return issue_table_toggle_template.format(filename=diff_filename, issue_rows=issue_rows) + + return issue_table_template.format(filename=diff_filename, issue_rows=issue_rows) + class GitHubIntegrationProvider(IntegrationProvider): key = "github" diff --git a/src/sentry/integrations/github/tasks/open_pr_comment.py b/src/sentry/integrations/github/tasks/open_pr_comment.py index 2b7b38580b3cfa..4d5723905e474c 100644 --- a/src/sentry/integrations/github/tasks/open_pr_comment.py +++ b/src/sentry/integrations/github/tasks/open_pr_comment.py @@ -1,409 +1,15 @@ from __future__ import annotations -import itertools import logging -from datetime import UTC, datetime, timedelta -from typing import Any -from django.db.models import Value -from django.db.models.functions import StrIndex -from snuba_sdk import ( - BooleanCondition, - BooleanOp, - Column, - Condition, - Direction, - Entity, - Function, - Op, - OrderBy, - Query, -) -from snuba_sdk import Request as SnubaRequest - -from sentry.constants import EXTENSION_LANGUAGE_MAP, ObjectStatus -from sentry.integrations.github.client import GitHubApiClient -from sentry.integrations.github.constants import ISSUE_LOCKED_ERROR_MESSAGE, RATE_LIMITED_MESSAGE -from sentry.integrations.github.tasks.pr_comment import format_comment_url -from sentry.integrations.github.tasks.utils import ( - GithubAPIErrorType, - PullRequestFile, - PullRequestIssue, -) -from sentry.integrations.models.repository_project_path_config import RepositoryProjectPathConfig -from sentry.integrations.services.integration import integration_service -from sentry.integrations.source_code_management.commit_context import CommitContextIntegration -from sentry.integrations.source_code_management.constants import STACKFRAME_COUNT -from sentry.integrations.source_code_management.language_parsers import PATCH_PARSERS -from sentry.models.group import Group, GroupStatus -from sentry.models.organization import Organization -from sentry.models.project import Project -from sentry.models.pullrequest import CommentType, PullRequest -from sentry.models.repository import Repository -from sentry.shared_integrations.exceptions import ApiError +from sentry.integrations.source_code_management.commit_context import run_open_pr_comment_workflow from sentry.silo.base import SiloMode -from sentry.snuba.dataset import Dataset -from sentry.snuba.referrer import Referrer from sentry.tasks.base import instrumented_task from sentry.taskworker.config import TaskworkerConfig from sentry.taskworker.namespaces import integrations_tasks -from sentry.templatetags.sentry_helpers import small_count -from sentry.types.referrer_ids import GITHUB_OPEN_PR_BOT_REFERRER -from sentry.utils import metrics -from sentry.utils.snuba import raw_snql_query logger = logging.getLogger(__name__) -OPEN_PR_METRICS_BASE = "{integration}.open_pr_comment.{key}" - -# Caps the number of files that can be modified in a PR to leave a comment -OPEN_PR_MAX_FILES_CHANGED = 7 -# Caps the number of lines that can be modified in a PR to leave a comment -OPEN_PR_MAX_LINES_CHANGED = 500 - -OPEN_PR_COMMENT_BODY_TEMPLATE = """\ -## 🔍 Existing Issues For Review -Your pull request is modifying functions with the following pre-existing issues: - -{issue_tables} ---- - -Did you find this useful? React with a 👍 or 👎""" - -OPEN_PR_ISSUE_TABLE_TEMPLATE = """\ -📄 File: **{filename}** - -| Function | Unhandled Issue | -| :------- | :----- | -{issue_rows}""" - -OPEN_PR_ISSUE_TABLE_TOGGLE_TEMPLATE = """\ -
-📄 File: {filename} (Click to Expand) - -| Function | Unhandled Issue | -| :------- | :----- | -{issue_rows} -
""" - -OPEN_PR_ISSUE_DESCRIPTION_LENGTH = 52 - -MAX_RECENT_ISSUES = 5000 - - -def format_open_pr_comment(issue_tables: list[str]) -> str: - return OPEN_PR_COMMENT_BODY_TEMPLATE.format(issue_tables="\n".join(issue_tables)) - - -def format_open_pr_comment_subtitle(title_length, subtitle): - # the title length + " " + subtitle should be <= 52 - subtitle_length = OPEN_PR_ISSUE_DESCRIPTION_LENGTH - title_length - 1 - return subtitle[: subtitle_length - 3] + "..." if len(subtitle) > subtitle_length else subtitle - - -# for a single file, create a table -def format_issue_table( - diff_filename: str, issues: list[PullRequestIssue], patch_parsers: dict[str, Any], toggle: bool -) -> str: - language_parser = patch_parsers.get(diff_filename.split(".")[-1], None) - - if not language_parser: - return "" - - issue_row_template = language_parser.issue_row_template - - issue_rows = "\n".join( - [ - issue_row_template.format( - title=issue.title, - subtitle=format_open_pr_comment_subtitle(len(issue.title), issue.subtitle), - url=format_comment_url(issue.url, GITHUB_OPEN_PR_BOT_REFERRER), - event_count=small_count(issue.event_count), - function_name=issue.function_name, - affected_users=small_count(issue.affected_users), - ) - for issue in issues - ] - ) - - if toggle: - return OPEN_PR_ISSUE_TABLE_TOGGLE_TEMPLATE.format( - filename=diff_filename, issue_rows=issue_rows - ) - - return OPEN_PR_ISSUE_TABLE_TEMPLATE.format(filename=diff_filename, issue_rows=issue_rows) - - -# for a single file, get the contents -def get_issue_table_contents(issue_list: list[dict[str, Any]]) -> list[PullRequestIssue]: - group_id_to_info = {} - for issue in issue_list: - group_id = issue["group_id"] - group_id_to_info[group_id] = dict(filter(lambda k: k[0] != "group_id", issue.items())) - - issues = Group.objects.filter(id__in=list(group_id_to_info.keys())).all() - - pull_request_issues = [ - PullRequestIssue( - title=issue.title, - subtitle=issue.culprit, - url=issue.get_absolute_url(), - affected_users=issue.count_users_seen( - referrer=Referrer.TAGSTORE_GET_GROUPS_USER_COUNTS_OPEN_PR_COMMENT.value - ), - event_count=group_id_to_info[issue.id]["event_count"], - function_name=group_id_to_info[issue.id]["function_name"], - ) - for issue in issues - ] - pull_request_issues.sort(key=lambda k: k.event_count or 0, reverse=True) - - return pull_request_issues - - -# TODO(cathy): Change the client typing to allow for multiple SCM Integrations -def safe_for_comment( - gh_client: GitHubApiClient, repository: Repository, pull_request: PullRequest -) -> list[dict[str, str]]: - logger.info("github.open_pr_comment.check_safe_for_comment") - try: - pr_files = gh_client.get_pullrequest_files( - repo=repository.name, pull_number=pull_request.key - ) - except ApiError as e: - logger.info("github.open_pr_comment.api_error") - if e.json and RATE_LIMITED_MESSAGE in e.json.get("message", ""): - metrics.incr( - OPEN_PR_METRICS_BASE.format(integration="github", key="api_error"), - tags={"type": GithubAPIErrorType.RATE_LIMITED.value, "code": e.code}, - ) - elif e.code == 404: - metrics.incr( - OPEN_PR_METRICS_BASE.format(integration="github", key="api_error"), - tags={"type": GithubAPIErrorType.MISSING_PULL_REQUEST.value, "code": e.code}, - ) - else: - metrics.incr( - OPEN_PR_METRICS_BASE.format(integration="github", key="api_error"), - tags={"type": GithubAPIErrorType.UNKNOWN.value, "code": e.code}, - ) - logger.exception("github.open_pr_comment.unknown_api_error", extra={"error": str(e)}) - return [] - - changed_file_count = 0 - changed_lines_count = 0 - filtered_pr_files = [] - - patch_parsers = PATCH_PARSERS - # NOTE: if we are testing beta patch parsers, add check here - - for file in pr_files: - filename = file["filename"] - # we only count the file if it's modified and if the file extension is in the list of supported file extensions - # we cannot look at deleted or newly added files because we cannot extract functions from the diffs - if file["status"] != "modified" or filename.split(".")[-1] not in patch_parsers: - continue - - changed_file_count += 1 - changed_lines_count += file["changes"] - filtered_pr_files.append(file) - - if changed_file_count > OPEN_PR_MAX_FILES_CHANGED: - metrics.incr( - OPEN_PR_METRICS_BASE.format(integration="github", key="rejected_comment"), - tags={"reason": "too_many_files"}, - ) - return [] - if changed_lines_count > OPEN_PR_MAX_LINES_CHANGED: - metrics.incr( - OPEN_PR_METRICS_BASE.format(integration="github", key="rejected_comment"), - tags={"reason": "too_many_lines"}, - ) - return [] - - return filtered_pr_files - - -def get_pr_files(pr_files: list[dict[str, str]]) -> list[PullRequestFile]: - # new files will not have sentry issues associated with them - # only fetch Python files - pullrequest_files = [ - PullRequestFile(filename=file["filename"], patch=file["patch"]) - for file in pr_files - if "patch" in file - ] - - logger.info("github.open_pr_comment.pr_filenames", extra={"count": len(pullrequest_files)}) - - return pullrequest_files - - -def get_projects_and_filenames_from_source_file( - org_id: int, - repo_id: int, - pr_filename: str, -) -> tuple[set[Project], set[str]]: - # fetch the code mappings in which the source_root is a substring at the start of pr_filename - code_mappings = ( - RepositoryProjectPathConfig.objects.filter( - organization_id=org_id, - repository_id=repo_id, - ) - .annotate(substring_match=StrIndex(Value(pr_filename), "source_root")) - .filter(substring_match=1) - ) - - project_list: set[Project] = set() - sentry_filenames = set() - - if len(code_mappings): - for code_mapping in code_mappings: - project_list.add(code_mapping.project) - sentry_filenames.add( - pr_filename.replace(code_mapping.source_root, code_mapping.stack_root, 1) - ) - return project_list, sentry_filenames - - -def get_top_5_issues_by_count_for_file( - projects: list[Project], sentry_filenames: list[str], function_names: list[str] -) -> list[dict[str, Any]]: - """ - Given a list of projects, Github filenames reverse-codemapped into filenames in Sentry, - and function names representing the list of functions changed in a PR file, return a - sublist of the top 5 recent unhandled issues ordered by event count. - """ - if not len(projects): - return [] - - patch_parsers = PATCH_PARSERS - # NOTE: if we are testing beta patch parsers, add check here - - # fetches the appropriate parser for formatting the snuba query given the file extension - # the extension is never replaced in reverse codemapping - language_parser = patch_parsers.get(sentry_filenames[0].split(".")[-1], None) - - if not language_parser: - return [] - - group_ids = list( - Group.objects.filter( - first_seen__gte=datetime.now(UTC) - timedelta(days=90), - last_seen__gte=datetime.now(UTC) - timedelta(days=14), - status=GroupStatus.UNRESOLVED, - project__in=projects, - ) - .order_by("-times_seen") - .values_list("id", flat=True) - )[:MAX_RECENT_ISSUES] - project_ids = [p.id for p in projects] - - multi_if = language_parser.generate_multi_if(function_names) - - # fetch the count of events for each group_id - subquery = ( - Query(Entity("events")) - .set_select( - [ - Column("title"), - Column("culprit"), - Column("group_id"), - Function("count", [], "event_count"), - Function( - "multiIf", - multi_if, - "function_name", - ), - ] - ) - .set_groupby( - [ - Column("title"), - Column("culprit"), - Column("group_id"), - Column("exception_frames.function"), - ] - ) - .set_where( - [ - Condition(Column("project_id"), Op.IN, project_ids), - Condition(Column("group_id"), Op.IN, group_ids), - Condition(Column("timestamp"), Op.GTE, datetime.now() - timedelta(days=14)), - Condition(Column("timestamp"), Op.LT, datetime.now()), - # NOTE: ideally this would follow suspect commit logic - BooleanCondition( - BooleanOp.OR, - [ - BooleanCondition( - BooleanOp.AND, - [ - Condition( - Function( - "arrayElement", - (Column("exception_frames.filename"), i), - ), - Op.IN, - sentry_filenames, - ), - language_parser.generate_function_name_conditions( - function_names, i - ), - ], - ) - for i in range(-STACKFRAME_COUNT, 0) # first n frames - ], - ), - Condition(Function("notHandled", []), Op.EQ, 1), - ] - ) - .set_orderby([OrderBy(Column("event_count"), Direction.DESC)]) - ) - - # filter on the subquery to squash group_ids with the same title and culprit - # return the group_id with the greatest count of events - query = ( - Query(subquery) - .set_select( - [ - Column("function_name"), - Function( - "arrayElement", - (Function("groupArray", [Column("group_id")]), 1), - "group_id", - ), - Function( - "arrayElement", - (Function("groupArray", [Column("event_count")]), 1), - "event_count", - ), - ] - ) - .set_groupby( - [ - Column("title"), - Column("culprit"), - Column("function_name"), - ] - ) - .set_orderby([OrderBy(Column("event_count"), Direction.DESC)]) - .set_limit(5) - ) - - request = SnubaRequest( - dataset=Dataset.Events.value, - app_id="default", - tenant_ids={"organization_id": projects[0].organization_id}, - query=query, - ) - - try: - return raw_snql_query(request, referrer=Referrer.GITHUB_PR_COMMENT_BOT.value)["data"] - except Exception: - logger.exception( - "github.open_pr_comment.snuba_query_error", extra={"query": request.to_dict()["query"]} - ) - return [] - @instrumented_task( name="sentry.integrations.github.tasks.open_pr_comment_workflow", @@ -413,234 +19,4 @@ def get_top_5_issues_by_count_for_file( ), ) def open_pr_comment_workflow(pr_id: int) -> None: - logger.info("github.open_pr_comment.start_workflow") - - # CHECKS - # check PR exists to get PR key - try: - pull_request = PullRequest.objects.get(id=pr_id) - except PullRequest.DoesNotExist: - logger.info("github.open_pr_comment.pr_missing") - metrics.incr( - OPEN_PR_METRICS_BASE.format(integration="github", key="error"), - tags={"type": "missing_pr"}, - ) - return - - # check org option - org_id = pull_request.organization_id - try: - Organization.objects.get_from_cache(id=org_id) - except Organization.DoesNotExist: - logger.exception("github.open_pr_comment.org_missing") - metrics.incr( - OPEN_PR_METRICS_BASE.format(integration="github", key="error"), - tags={"type": "missing_org"}, - ) - return - - # check PR repo exists to get repo name - try: - repo = Repository.objects.get(id=pull_request.repository_id) - except Repository.DoesNotExist: - logger.info("github.open_pr_comment.repo_missing", extra={"organization_id": org_id}) - metrics.incr( - OPEN_PR_METRICS_BASE.format(integration="github", key="error"), - tags={"type": "missing_repo"}, - ) - return - - # check integration exists to hit Github API with client - integration = integration_service.get_integration( - integration_id=repo.integration_id, status=ObjectStatus.ACTIVE - ) - if not integration: - logger.info("github.open_pr_comment.integration_missing", extra={"organization_id": org_id}) - metrics.incr( - OPEN_PR_METRICS_BASE.format(integration="github", key="error"), - tags={"type": "missing_integration"}, - ) - return - - installation = integration.get_installation(organization_id=org_id) - assert isinstance(installation, CommitContextIntegration) - - client = installation.get_client() - - # CREATING THE COMMENT - - # fetch the files in the PR and determine if it is safe to comment - pr_files = safe_for_comment(gh_client=client, repository=repo, pull_request=pull_request) - - if len(pr_files) == 0: - logger.info( - "github.open_pr_comment.not_safe_for_comment", extra={"file_count": len(pr_files)} - ) - metrics.incr( - OPEN_PR_METRICS_BASE.format(integration="github", key="error"), - tags={"type": "unsafe_for_comment"}, - ) - return - - pullrequest_files = get_pr_files(pr_files) - - issue_table_contents = {} - top_issues_per_file = [] - - patch_parsers = PATCH_PARSERS - # NOTE: if we are testing beta patch parsers, add check here - - file_extensions = set() - # fetch issues related to the files - for file in pullrequest_files: - projects, sentry_filenames = get_projects_and_filenames_from_source_file( - org_id, repo.id, file.filename - ) - if not len(projects) or not len(sentry_filenames): - continue - - file_extension = file.filename.split(".")[-1] - logger.info( - "github.open_pr_comment.file_extension", - extra={ - "organization_id": org_id, - "repository_id": repo.id, - "extension": file_extension, - }, - ) - - language_parser = patch_parsers.get(file.filename.split(".")[-1], None) - if not language_parser: - logger.info( - "github.open_pr_comment.missing_parser", extra={"extension": file_extension} - ) - metrics.incr( - OPEN_PR_METRICS_BASE.format(integration="github", key="missing_parser"), - tags={"extension": file_extension}, - ) - continue - - function_names = language_parser.extract_functions_from_patch(file.patch) - - if file_extension in ["js", "jsx"]: - logger.info( - "github.open_pr_comment.javascript", - extra={ - "organization_id": org_id, - "repository_id": repo.id, - "extension": file_extension, - "has_function_names": bool(function_names), - }, - ) - - if file_extension == ["php"]: - logger.info( - "github.open_pr_comment.php", - extra={ - "organization_id": org_id, - "repository_id": repo.id, - "extension": file_extension, - "has_function_names": bool(function_names), - }, - ) - - if file_extension == ["rb"]: - logger.info( - "github.open_pr_comment.ruby", - extra={ - "organization_id": org_id, - "repository_id": repo.id, - "extension": file_extension, - "has_function_names": bool(function_names), - }, - ) - - if not len(function_names): - continue - - top_issues = get_top_5_issues_by_count_for_file( - list(projects), list(sentry_filenames), list(function_names) - ) - if not len(top_issues): - continue - - top_issues_per_file.append(top_issues) - file_extensions.add(file_extension) - - issue_table_contents[file.filename] = get_issue_table_contents(top_issues) - - if not len(issue_table_contents): - logger.info("github.open_pr_comment.no_issues") - # don't leave a comment if no issues for files in PR - metrics.incr(OPEN_PR_METRICS_BASE.format(integration="github", key="no_issues")) - return - - # format issues per file into comment - issue_tables = [] - first_table = True - for file in pullrequest_files: - pr_filename = file.filename - issue_table_content = issue_table_contents.get(pr_filename, None) - - if issue_table_content is None: - continue - - if first_table: - issue_table = format_issue_table( - pr_filename, issue_table_content, patch_parsers, toggle=False - ) - first_table = False - else: - # toggle all tables but the first one - issue_table = format_issue_table( - pr_filename, issue_table_content, patch_parsers, toggle=True - ) - - issue_tables.append(issue_table) - - comment_body = format_open_pr_comment(issue_tables) - - # list all issues in the comment - issue_list: list[dict[str, Any]] = list(itertools.chain.from_iterable(top_issues_per_file)) - issue_id_list: list[int] = [issue["group_id"] for issue in issue_list] - - # pick one language from the list of languages in the PR for analytics - languages = [ - EXTENSION_LANGUAGE_MAP[extension] - for extension in file_extensions - if extension in EXTENSION_LANGUAGE_MAP - ] - language = languages[0] if len(languages) else "not found" - - try: - installation.create_or_update_comment( - repo=repo, - pr_key=pull_request.key, - comment_data={"body": comment_body}, - pullrequest_id=pull_request.id, - issue_list=issue_id_list, - comment_type=CommentType.OPEN_PR, - metrics_base=OPEN_PR_METRICS_BASE, - language=language, - ) - except ApiError as e: - if e.json: - if ISSUE_LOCKED_ERROR_MESSAGE in e.json.get("message", ""): - metrics.incr( - OPEN_PR_METRICS_BASE.format(integration="github", key="error"), - tags={"type": "issue_locked_error"}, - ) - return - - elif RATE_LIMITED_MESSAGE in e.json.get("message", ""): - metrics.incr( - OPEN_PR_METRICS_BASE.format(integration="github", key="error"), - tags={"type": "rate_limited_error"}, - ) - return - - metrics.incr( - OPEN_PR_METRICS_BASE.format(integration="github", key="error"), - tags={"type": "api_error"}, - ) - raise + run_open_pr_comment_workflow(integration_name="github", pullrequest_id=pr_id) diff --git a/src/sentry/integrations/github/tasks/pr_comment.py b/src/sentry/integrations/github/tasks/pr_comment.py index 2c7f2910d26227..59932e519ba84a 100644 --- a/src/sentry/integrations/github/tasks/pr_comment.py +++ b/src/sentry/integrations/github/tasks/pr_comment.py @@ -2,136 +2,25 @@ import logging from datetime import datetime, timedelta, timezone -from typing import Any import sentry_sdk -from django.db import connection -from snuba_sdk import Column, Condition, Direction, Entity, Function, Op, OrderBy, Query -from snuba_sdk import Request as SnubaRequest -from sentry import features from sentry.constants import ObjectStatus -from sentry.integrations.github.constants import ISSUE_LOCKED_ERROR_MESSAGE, RATE_LIMITED_MESSAGE -from sentry.integrations.github.tasks.utils import PullRequestIssue +from sentry.integrations.github.constants import RATE_LIMITED_MESSAGE from sentry.integrations.services.integration import integration_service -from sentry.integrations.source_code_management.commit_context import CommitContextIntegration -from sentry.models.group import Group -from sentry.models.groupowner import GroupOwnerType -from sentry.models.options.organization_option import OrganizationOption -from sentry.models.organization import Organization -from sentry.models.project import Project +from sentry.integrations.source_code_management.commit_context import run_pr_comment_workflow from sentry.models.pullrequest import PullRequestComment from sentry.models.repository import Repository from sentry.shared_integrations.exceptions import ApiError from sentry.silo.base import SiloMode -from sentry.snuba.dataset import Dataset -from sentry.snuba.referrer import Referrer from sentry.tasks.base import instrumented_task -from sentry.tasks.commit_context import DEBOUNCE_PR_COMMENT_CACHE_KEY from sentry.taskworker.config import TaskworkerConfig from sentry.taskworker.namespaces import integrations_tasks -from sentry.types.referrer_ids import GITHUB_PR_BOT_REFERRER from sentry.utils import metrics -from sentry.utils.cache import cache from sentry.utils.query import RangeQuerySetWrapper -from sentry.utils.snuba import raw_snql_query logger = logging.getLogger(__name__) -MERGED_PR_METRICS_BASE = "{integration}.pr_comment.{key}" - -MERGED_PR_COMMENT_BODY_TEMPLATE = """\ -## Suspect Issues -This pull request was deployed and Sentry observed the following issues: - -{issue_list} - -Did you find this useful? React with a 👍 or 👎""" - -MERGED_PR_SINGLE_ISSUE_TEMPLATE = "- ‼️ **{title}** `{subtitle}` [View Issue]({url})" - -MAX_SUSPECT_COMMITS = 1000 - - -def format_comment_subtitle(subtitle): - return subtitle[:47] + "..." if len(subtitle) > 50 else subtitle - - -def format_comment_url(url, referrer): - return url + "?referrer=" + referrer - - -def format_comment(issues: list[PullRequestIssue]): - issue_list = "\n".join( - [ - MERGED_PR_SINGLE_ISSUE_TEMPLATE.format( - title=issue.title, - subtitle=format_comment_subtitle(issue.subtitle), - url=format_comment_url(issue.url, GITHUB_PR_BOT_REFERRER), - ) - for issue in issues - ] - ) - - return MERGED_PR_COMMENT_BODY_TEMPLATE.format(issue_list=issue_list) - - -def pr_to_issue_query(pr_id: int): - with connection.cursor() as cursor: - cursor.execute( - f""" - SELECT pr.repository_id repo_id, - pr.key pr_key, - pr.organization_id org_id, - array_agg(go.group_id ORDER BY go.date_added) issues - FROM sentry_groupowner go - JOIN sentry_pullrequest_commit c ON c.commit_id = (go.context::jsonb->>'commitId')::bigint - JOIN sentry_pull_request pr ON c.pull_request_id = pr.id - WHERE go.type=0 - AND pr.id={pr_id} - GROUP BY repo_id, - pr_key, - org_id - """, - [GroupOwnerType.SUSPECT_COMMIT.value], - ) - return cursor.fetchall() - - -def get_top_5_issues_by_count(issue_list: list[int], project: Project) -> list[dict[str, Any]]: - """Given a list of issue group ids, return a sublist of the top 5 ordered by event count""" - request = SnubaRequest( - dataset=Dataset.Events.value, - app_id="default", - tenant_ids={"organization_id": project.organization_id}, - query=( - Query(Entity("events")) - .set_select([Column("group_id"), Function("count", [], "event_count")]) - .set_groupby([Column("group_id")]) - .set_where( - [ - Condition(Column("project_id"), Op.EQ, project.id), - Condition(Column("group_id"), Op.IN, issue_list), - Condition(Column("timestamp"), Op.GTE, datetime.now() - timedelta(days=30)), - Condition(Column("timestamp"), Op.LT, datetime.now()), - Condition(Column("level"), Op.NEQ, "info"), - ] - ) - .set_orderby([OrderBy(Column("event_count"), Direction.DESC)]) - .set_limit(5) - ), - ) - return raw_snql_query(request, referrer=Referrer.GITHUB_PR_COMMENT_BOT.value)["data"] - - -def get_comment_contents(issue_list: list[int]) -> list[PullRequestIssue]: - """Retrieve the issue information that will be used for comment contents""" - issues = Group.objects.filter(id__in=issue_list).order_by("id").all() - return [ - PullRequestIssue(title=issue.title, subtitle=issue.culprit, url=issue.get_absolute_url()) - for issue in issues - ] - @instrumented_task( name="sentry.integrations.github.tasks.github_comment_workflow", @@ -140,135 +29,10 @@ def get_comment_contents(issue_list: list[int]) -> list[PullRequestIssue]: namespace=integrations_tasks, ), ) -def github_comment_workflow(pullrequest_id: int, project_id: int): - cache_key = DEBOUNCE_PR_COMMENT_CACHE_KEY(pullrequest_id) - - gh_repo_id, pr_key, org_id, issue_list = pr_to_issue_query(pullrequest_id)[0] - - # cap to 1000 issues in which the merge commit is the suspect commit - issue_list = issue_list[:MAX_SUSPECT_COMMITS] - - try: - organization = Organization.objects.get_from_cache(id=org_id) - except Organization.DoesNotExist: - cache.delete(cache_key) - logger.info("github.pr_comment.org_missing") - metrics.incr( - MERGED_PR_METRICS_BASE.format(integration="github", key="error"), - tags={"type": "missing_org"}, - ) - return - - if not OrganizationOption.objects.get_value( - organization=organization, - key="sentry:github_pr_bot", - default=True, - ): - logger.info("github.pr_comment.option_missing", extra={"organization_id": org_id}) - return - - try: - project = Project.objects.get_from_cache(id=project_id) - except Project.DoesNotExist: - cache.delete(cache_key) - logger.info("github.pr_comment.project_missing", extra={"organization_id": org_id}) - metrics.incr( - MERGED_PR_METRICS_BASE.format(integration="github", key="error"), - tags={"type": "missing_project"}, - ) - return - - top_5_issues = get_top_5_issues_by_count(issue_list, project) - if not top_5_issues: - logger.info( - "github.pr_comment.no_issues", - extra={"organization_id": org_id, "pr_id": pullrequest_id}, - ) - cache.delete(cache_key) - return - - top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] - - issue_comment_contents = get_comment_contents(top_5_issue_ids) - - try: - repo = Repository.objects.get(id=gh_repo_id) - except Repository.DoesNotExist: - cache.delete(cache_key) - logger.info("github.pr_comment.repo_missing", extra={"organization_id": org_id}) - metrics.incr( - MERGED_PR_METRICS_BASE.format(integration="github", key="error"), - tags={"type": "missing_repo"}, - ) - return - - integration = integration_service.get_integration( - integration_id=repo.integration_id, status=ObjectStatus.ACTIVE +def github_comment_workflow(pullrequest_id: int, project_id: int) -> None: + run_pr_comment_workflow( + integration_name="github", pullrequest_id=pullrequest_id, project_id=project_id ) - if not integration: - cache.delete(cache_key) - logger.info("github.pr_comment.integration_missing", extra={"organization_id": org_id}) - metrics.incr( - MERGED_PR_METRICS_BASE.format(integration="github", key="error"), - tags={"type": "missing_integration"}, - ) - return - - installation = integration.get_installation(organization_id=org_id) - assert isinstance(installation, CommitContextIntegration) - - comment_body = format_comment(issue_comment_contents) - logger.info("github.pr_comment.comment_body", extra={"body": comment_body}) - - top_24_issues = issue_list[:24] # 24 is the P99 for issues-per-PR - - enabled_copilot = features.has("organizations:gen-ai-features", organization) - - comment_data = { - "body": comment_body, - } - if enabled_copilot: - comment_data["actions"] = [ - { - "name": f"Root cause #{i + 1}", - "type": "copilot-chat", - "prompt": f"@sentry root cause issue {str(issue_id)} with PR URL https://github.com/{repo.name}/pull/{str(pr_key)}", - } - for i, issue_id in enumerate(top_24_issues[:3]) - ] - - try: - installation.create_or_update_comment( - repo=repo, - pr_key=pr_key, - comment_data=comment_data, - pullrequest_id=pullrequest_id, - issue_list=top_24_issues, - metrics_base=MERGED_PR_METRICS_BASE, - ) - except ApiError as e: - cache.delete(cache_key) - - if e.json: - if ISSUE_LOCKED_ERROR_MESSAGE in e.json.get("message", ""): - metrics.incr( - MERGED_PR_METRICS_BASE.format(integration="github", key="error"), - tags={"type": "issue_locked_error"}, - ) - return - - elif RATE_LIMITED_MESSAGE in e.json.get("message", ""): - metrics.incr( - MERGED_PR_METRICS_BASE.format(integration="github", key="error"), - tags={"type": "rate_limited_error"}, - ) - return - - metrics.incr( - MERGED_PR_METRICS_BASE.format(integration="github", key="error"), - tags={"type": "api_error"}, - ) - raise @instrumented_task( diff --git a/src/sentry/integrations/github/tasks/utils.py b/src/sentry/integrations/github/tasks/utils.py index e7d6f3aacd6214..bbd766a3787b97 100644 --- a/src/sentry/integrations/github/tasks/utils.py +++ b/src/sentry/integrations/github/tasks/utils.py @@ -1,25 +1,5 @@ -import logging -from dataclasses import dataclass from enum import Enum -logger = logging.getLogger(__name__) - - -@dataclass -class PullRequestIssue: - title: str - subtitle: str | None - url: str - affected_users: int | None = None - event_count: int | None = None - function_name: str | None = None - - -@dataclass -class PullRequestFile: - filename: str - patch: str - class GithubAPIErrorType(Enum): RATE_LIMITED = "gh_rate_limited" diff --git a/src/sentry/integrations/github_enterprise/integration.py b/src/sentry/integrations/github_enterprise/integration.py index 68814788b9d556..fab62e56a6461a 100644 --- a/src/sentry/integrations/github_enterprise/integration.py +++ b/src/sentry/integrations/github_enterprise/integration.py @@ -24,8 +24,17 @@ from sentry.integrations.github.utils import get_jwt from sentry.integrations.models.integration import Integration from sentry.integrations.services.repository.model import RpcRepository -from sentry.integrations.source_code_management.commit_context import CommitContextIntegration +from sentry.integrations.source_code_management.commit_context import ( + CommitContextIntegration, + CommitContextOrganizationOptionKeys, + CommitContextReferrerIds, + CommitContextReferrers, + PullRequestFile, + PullRequestIssue, +) from sentry.integrations.source_code_management.repository import RepositoryIntegration +from sentry.models.organization import Organization +from sentry.models.pullrequest import PullRequest from sentry.models.repository import Repository from sentry.organizations.services.organization.model import RpcOrganization from sentry.pipeline import NestedPipelineView, Pipeline, PipelineView @@ -232,6 +241,59 @@ def has_repo_access(self, repo: RpcRepository) -> bool: # TODO: define this, used to migrate repositories return False + # CommitContextIntegration methods + + @property + def commit_context_referrers(self) -> CommitContextReferrers: + raise NotImplementedError + + @property + def commit_context_referrer_ids(self) -> CommitContextReferrerIds: + raise NotImplementedError + + @property + def commit_context_organization_option_keys(self) -> CommitContextOrganizationOptionKeys: + raise NotImplementedError + + def format_pr_comment(self, issue_ids: list[int]) -> str: + raise NotImplementedError + + def build_pr_comment_data( + self, + organization: Organization, + repo: Repository, + pr_key: str, + comment_body: str, + issue_ids: list[int], + ) -> dict[str, Any]: + raise NotImplementedError + + def queue_comment_task(self, pullrequest_id: int, project_id: int) -> None: + raise NotImplementedError + + def on_create_or_update_comment_error(self, api_error: ApiError, metrics_base: str) -> bool: + raise NotImplementedError + + def get_pr_files_safe_for_comment( + self, repo: Repository, pr: PullRequest + ) -> list[dict[str, str]]: + raise NotImplementedError + + def get_pr_files(self, pr_files: list[dict[str, str]]) -> list[PullRequestFile]: + raise NotImplementedError + + def format_open_pr_comment(self, issue_tables: list[str]) -> str: + raise NotImplementedError + + def format_issue_table( + self, + diff_filename: str, + issues: list[PullRequestIssue], + patch_parsers: dict[str, Any], + toggle: bool, + ) -> str: + raise NotImplementedError + class InstallationForm(forms.Form): url = forms.CharField( diff --git a/src/sentry/integrations/gitlab/client.py b/src/sentry/integrations/gitlab/client.py index a87ec18bf0c6c8..cf22e3d7fee720 100644 --- a/src/sentry/integrations/gitlab/client.py +++ b/src/sentry/integrations/gitlab/client.py @@ -18,6 +18,7 @@ SourceLineInfo, ) from sentry.integrations.source_code_management.repository import RepositoryClient +from sentry.models.pullrequest import PullRequest from sentry.models.repository import Repository from sentry.shared_integrations.client.proxy import IntegrationProxyClient from sentry.shared_integrations.exceptions import ApiError, ApiUnauthorized @@ -375,3 +376,6 @@ def get_blame_for_files( "org_integration_id": self.org_integration_id, }, ) + + def get_pullrequest_files(self, repo: Repository, pr: PullRequest) -> Any: + raise NotImplementedError diff --git a/src/sentry/integrations/gitlab/integration.py b/src/sentry/integrations/gitlab/integration.py index 902b776704ed6d..1b152228c7543a 100644 --- a/src/sentry/integrations/gitlab/integration.py +++ b/src/sentry/integrations/gitlab/integration.py @@ -20,8 +20,17 @@ IntegrationProvider, ) from sentry.integrations.services.repository.model import RpcRepository -from sentry.integrations.source_code_management.commit_context import CommitContextIntegration +from sentry.integrations.source_code_management.commit_context import ( + CommitContextIntegration, + CommitContextOrganizationOptionKeys, + CommitContextReferrerIds, + CommitContextReferrers, + PullRequestFile, + PullRequestIssue, +) from sentry.integrations.source_code_management.repository import RepositoryIntegration +from sentry.models.organization import Organization +from sentry.models.pullrequest import PullRequest from sentry.models.repository import Repository from sentry.pipeline import NestedPipelineView, Pipeline, PipelineView from sentry.shared_integrations.exceptions import ( @@ -164,6 +173,59 @@ def extract_source_path_from_source_url(self, repo: Repository, url: str) -> str _, _, source_path = url.partition("/") return source_path + # CommitContextIntegration methods + + @property + def commit_context_referrers(self) -> CommitContextReferrers: + raise NotImplementedError + + @property + def commit_context_referrer_ids(self) -> CommitContextReferrerIds: + raise NotImplementedError + + @property + def commit_context_organization_option_keys(self) -> CommitContextOrganizationOptionKeys: + raise NotImplementedError + + def format_pr_comment(self, issue_ids: list[int]) -> str: + raise NotImplementedError + + def build_pr_comment_data( + self, + organization: Organization, + repo: Repository, + pr_key: str, + comment_body: str, + issue_ids: list[int], + ) -> dict[str, Any]: + raise NotImplementedError + + def queue_comment_task(self, pullrequest_id: int, project_id: int) -> None: + raise NotImplementedError + + def on_create_or_update_comment_error(self, api_error: ApiError, metrics_base: str) -> bool: + raise NotImplementedError + + def get_pr_files_safe_for_comment( + self, repo: Repository, pr: PullRequest + ) -> list[dict[str, str]]: + raise NotImplementedError + + def get_pr_files(self, pr_files: list[dict[str, str]]) -> list[PullRequestFile]: + raise NotImplementedError + + def format_open_pr_comment(self, issue_tables: list[str]) -> str: + raise NotImplementedError + + def format_issue_table( + self, + diff_filename: str, + issues: list[PullRequestIssue], + patch_parsers: dict[str, Any], + toggle: bool, + ) -> str: + raise NotImplementedError + # Gitlab only functions def get_group_id(self): diff --git a/src/sentry/integrations/source_code_management/commit_context.py b/src/sentry/integrations/source_code_management/commit_context.py index 1683c2e304b528..fc5a7ff141fbd4 100644 --- a/src/sentry/integrations/source_code_management/commit_context.py +++ b/src/sentry/integrations/source_code_management/commit_context.py @@ -1,19 +1,40 @@ from __future__ import annotations +import itertools import logging from abc import ABC, abstractmethod from collections.abc import Sequence from dataclasses import dataclass -from datetime import datetime, timedelta, timezone +from datetime import UTC, datetime, timedelta, timezone from typing import Any import sentry_sdk +from django.db import connection +from django.db.models import Value +from django.db.models.functions import StrIndex from django.utils import timezone as django_timezone +from snuba_sdk import ( + BooleanCondition, + BooleanOp, + Column, + Condition, + Direction, + Entity, + Function, + Op, + OrderBy, + Query, +) +from snuba_sdk import Request as SnubaRequest from sentry import analytics from sentry.auth.exceptions import IdentityNotValid +from sentry.constants import EXTENSION_LANGUAGE_MAP, ObjectStatus from sentry.integrations.gitlab.constants import GITLAB_CLOUD_BASE_URL from sentry.integrations.models.repository_project_path_config import RepositoryProjectPathConfig +from sentry.integrations.services.integration import integration_service +from sentry.integrations.source_code_management.constants import STACKFRAME_COUNT +from sentry.integrations.source_code_management.language_parsers import PATCH_PARSERS from sentry.integrations.source_code_management.metrics import ( CommitContextHaltReason, CommitContextIntegrationInteractionEvent, @@ -22,9 +43,10 @@ from sentry.integrations.types import ExternalProviderEnum from sentry.locks import locks from sentry.models.commit import Commit -from sentry.models.group import Group +from sentry.models.group import Group, GroupStatus from sentry.models.groupowner import GroupOwner from sentry.models.options.organization_option import OrganizationOption +from sentry.models.organization import Organization from sentry.models.project import Project from sentry.models.pullrequest import ( CommentType, @@ -34,18 +56,38 @@ ) from sentry.models.repository import Repository from sentry.shared_integrations.exceptions import ( + ApiError, ApiInvalidRequestError, ApiRateLimitedError, ApiRetryError, ) +from sentry.snuba.dataset import Dataset +from sentry.snuba.referrer import Referrer from sentry.users.models.identity import Identity from sentry.utils import metrics from sentry.utils.cache import cache +from sentry.utils.snuba import raw_snql_query logger = logging.getLogger(__name__) -def _debounce_pr_comment_cache_key(pullrequest_id: int) -> str: +@dataclass(frozen=True, kw_only=True) +class CommitContextReferrers: + pr_comment_bot: Referrer + + +@dataclass(frozen=True, kw_only=True) +class CommitContextReferrerIds: + pr_bot: str + open_pr_bot: str + + +@dataclass(frozen=True, kw_only=True) +class CommitContextOrganizationOptionKeys: + pr_bot: str + + +def debounce_pr_comment_cache_key(pullrequest_id: int) -> str: return f"pr-comment-{pullrequest_id}" @@ -57,9 +99,25 @@ def _pr_comment_log(integration_name: str, suffix: str) -> str: return f"{integration_name}.pr_comment.{suffix}" +def _open_pr_comment_log(integration_name: str, suffix: str) -> str: + return f"{integration_name}.open_pr_comment.{suffix}" + + PR_COMMENT_TASK_TTL = timedelta(minutes=5).total_seconds() PR_COMMENT_WINDOW = 14 # days +PR_MAX_SUSPECT_COMMITS = 1000 + +OPEN_PR_MAX_RECENT_ISSUES = 5000 +# Caps the number of files that can be modified in a PR to leave a comment +OPEN_PR_MAX_FILES_CHANGED = 7 +# Caps the number of lines that can be modified in a PR to leave a comment +OPEN_PR_MAX_LINES_CHANGED = 500 + +# Metrics +MERGED_PR_METRICS_BASE = "{integration}.pr_comment.{key}" +OPEN_PR_METRICS_BASE = "{integration}.open_pr_comment.{key}" + @dataclass class SourceLineInfo: @@ -84,6 +142,22 @@ class FileBlameInfo(SourceLineInfo): commit: CommitInfo +@dataclass +class PullRequestIssue: + title: str + subtitle: str | None + url: str + affected_users: int | None = None + event_count: int | None = None + function_name: str | None = None + + +@dataclass +class PullRequestFile: + filename: str + patch: str + + class CommitContextIntegration(ABC): """ Base class for integrations that include commit context features: suspect commits, suspect PR comments @@ -98,6 +172,21 @@ def integration_name(self) -> str: def get_client(self) -> CommitContextClient: raise NotImplementedError + @property + @abstractmethod + def commit_context_referrers(self) -> CommitContextReferrers: + raise NotImplementedError + + @property + @abstractmethod + def commit_context_referrer_ids(self) -> CommitContextReferrerIds: + raise NotImplementedError + + @property + @abstractmethod + def commit_context_organization_option_keys(self) -> CommitContextOrganizationOptionKeys: + raise NotImplementedError + def get_blame_for_files( self, files: Sequence[SourceLineInfo], extra: dict[str, Any] ) -> list[FileBlameInfo]: @@ -157,6 +246,118 @@ def get_commit_context_all_frames( """ return self.get_blame_for_files(files, extra) + def get_projects_and_filenames_from_source_file( + self, + organization: Organization, + repo: Repository, + pr_filename: str, + ) -> tuple[set[Project], set[str]]: + # fetch the code mappings in which the source_root is a substring at the start of pr_filename + code_mappings = ( + RepositoryProjectPathConfig.objects.filter( + organization_id=organization.id, + repository_id=repo.id, + ) + .annotate(substring_match=StrIndex(Value(pr_filename), "source_root")) + .filter(substring_match=1) + ) + + project_list: set[Project] = set() + sentry_filenames = set() + + if len(code_mappings): + for code_mapping in code_mappings: + project_list.add(code_mapping.project) + sentry_filenames.add( + pr_filename.replace(code_mapping.source_root, code_mapping.stack_root, 1) + ) + return project_list, sentry_filenames + + def create_or_update_comment( + self, + repo: Repository, + pr_key: str, + comment_data: dict[str, Any], + pullrequest_id: int, + issue_ids: list[int], + metrics_base: str, + comment_type: int = CommentType.MERGED_PR, + language: str | None = None, + ): + client = self.get_client() + + pr_comment = PullRequestComment.objects.filter( + pull_request__id=pullrequest_id, comment_type=comment_type + ).first() + + interaction_type = ( + SCMIntegrationInteractionType.CREATE_COMMENT + if not pr_comment + else SCMIntegrationInteractionType.UPDATE_COMMENT + ) + + with CommitContextIntegrationInteractionEvent( + interaction_type=interaction_type, + provider_key=self.integration_name, + repository=repo, + pull_request_id=pullrequest_id, + ).capture(): + if pr_comment is None: + resp = client.create_comment( + repo=repo.name, + issue_id=str(pr_key), + data=comment_data, + ) + + current_time = django_timezone.now() + comment = PullRequestComment.objects.create( + external_id=resp.body["id"], + pull_request_id=pullrequest_id, + created_at=current_time, + updated_at=current_time, + group_ids=issue_ids, + comment_type=comment_type, + ) + metrics.incr( + metrics_base.format(integration=self.integration_name, key="comment_created") + ) + + if comment_type == CommentType.OPEN_PR: + analytics.record( + "open_pr_comment.created", + comment_id=comment.id, + org_id=repo.organization_id, + pr_id=pullrequest_id, + language=(language or "not found"), + ) + else: + resp = client.update_comment( + repo=repo.name, + issue_id=str(pr_key), + comment_id=pr_comment.external_id, + data=comment_data, + ) + metrics.incr( + metrics_base.format(integration=self.integration_name, key="comment_updated") + ) + pr_comment.updated_at = django_timezone.now() + pr_comment.group_ids = issue_ids + pr_comment.save() + + logger_event = metrics_base.format( + integration=self.integration_name, key="create_or_update_comment" + ) + logger.info( + logger_event, + extra={"new_comment": pr_comment is None, "pr_key": pr_key, "repo": repo.name}, + ) + + @abstractmethod + def on_create_or_update_comment_error(self, api_error: ApiError, metrics_base: str) -> bool: + raise NotImplementedError + + # PR Comment Workflow + def queue_comment_task_if_needed( self, project: Project, @@ -173,7 +374,7 @@ def queue_comment_task_if_needed( ).capture() as lifecycle: if not OrganizationOption.objects.get_value( organization=project.organization, - key="sentry:github_pr_bot", + key=self.commit_context_organization_option_keys.pr_bot, default=True, ): # TODO: remove logger in favor of the log recorded in lifecycle.record_halt @@ -210,7 +411,6 @@ def queue_comment_task_if_needed( scope = sentry_sdk.Scope.get_isolation_scope() scope.set_tag("queue_comment_check.merge_commit_sha", commit.key) scope.set_tag("queue_comment_check.organization_id", commit.organization_id) - from sentry.integrations.github.tasks.pr_comment import github_comment_workflow # client will raise an Exception if the request is not successful try: @@ -275,7 +475,7 @@ def queue_comment_task_if_needed( _debounce_pr_comment_lock_key(pr.id), duration=10, name="queue_comment_task" ) with lock.acquire(): - cache_key = _debounce_pr_comment_cache_key(pullrequest_id=pr.id) + cache_key = debounce_pr_comment_cache_key(pullrequest_id=pr.id) if cache.get(cache_key) is not None: lifecycle.record_halt(CommitContextHaltReason.ALREADY_QUEUED) return @@ -292,88 +492,669 @@ def queue_comment_task_if_needed( cache.set(cache_key, True, PR_COMMENT_TASK_TTL) - github_comment_workflow.delay( - pullrequest_id=pr.id, project_id=group_owner.project_id - ) + self.queue_comment_task(pullrequest_id=pr.id, project_id=group_owner.project_id) - def create_or_update_comment( + @abstractmethod + def queue_comment_task(self, pullrequest_id: int, project_id: int) -> None: + raise NotImplementedError + + @abstractmethod + def format_pr_comment(self, issue_ids: list[int]) -> str: + raise NotImplementedError + + @abstractmethod + def build_pr_comment_data( self, + organization: Organization, repo: Repository, pr_key: str, - comment_data: dict[str, Any], - pullrequest_id: int, - issue_list: list[int], - metrics_base: str, - comment_type: int = CommentType.MERGED_PR, - language: str | None = None, - ): - client = self.get_client() + comment_body: str, + issue_ids: list[int], + ) -> dict[str, Any]: + raise NotImplementedError - pr_comment = PullRequestComment.objects.filter( - pull_request__id=pullrequest_id, comment_type=comment_type - ).first() + def get_issue_ids_from_pr( + self, pr: PullRequest, limit: int = PR_MAX_SUSPECT_COMMITS + ) -> list[int]: + with connection.cursor() as cursor: + cursor.execute( + """ + SELECT go.group_id issue_id + FROM sentry_groupowner go + JOIN sentry_pullrequest_commit c ON c.commit_id = (go.context::jsonb->>'commitId')::bigint + JOIN sentry_pull_request pr ON c.pull_request_id = pr.id + WHERE go.type=0 + AND pr.id=%s + ORDER BY go.date_added + LIMIT %s + """, + params=[pr.id, limit], + ) + return [issue_id for (issue_id,) in cursor.fetchall()] - interaction_type = ( - SCMIntegrationInteractionType.CREATE_COMMENT - if not pr_comment - else SCMIntegrationInteractionType.UPDATE_COMMENT + def get_top_5_issues_by_count( + self, issue_ids: list[int], project: Project + ) -> list[dict[str, Any]]: + """Given a list of issue group ids, return a sublist of the top 5 ordered by event count""" + request = SnubaRequest( + dataset=Dataset.Events.value, + app_id="default", + tenant_ids={"organization_id": project.organization_id}, + query=( + Query(Entity("events")) + .set_select([Column("group_id"), Function("count", [], "event_count")]) + .set_groupby([Column("group_id")]) + .set_where( + [ + Condition(Column("project_id"), Op.EQ, project.id), + Condition(Column("group_id"), Op.IN, issue_ids), + Condition(Column("timestamp"), Op.GTE, datetime.now() - timedelta(days=30)), + Condition(Column("timestamp"), Op.LT, datetime.now()), + Condition(Column("level"), Op.NEQ, "info"), + ] + ) + .set_orderby([OrderBy(Column("event_count"), Direction.DESC)]) + .set_limit(5) + ), ) + referrer = self.commit_context_referrers.pr_comment_bot.value + return raw_snql_query(request, referrer=referrer)["data"] - with CommitContextIntegrationInteractionEvent( - interaction_type=interaction_type, - provider_key=self.integration_name, - repository=repo, - pull_request_id=pullrequest_id, - ).capture(): - if pr_comment is None: - resp = client.create_comment( - repo=repo.name, - issue_id=str(pr_key), - data=comment_data, - ) + def run_pr_comment_workflow( + self, organization: Organization, repo: Repository, pr: PullRequest, project_id: int + ) -> None: + cache_key = debounce_pr_comment_cache_key(pullrequest_id=pr.id) - current_time = django_timezone.now() - comment = PullRequestComment.objects.create( - external_id=resp.body["id"], - pull_request_id=pullrequest_id, - created_at=current_time, - updated_at=current_time, - group_ids=issue_list, - comment_type=comment_type, + # cap to 1000 issues in which the merge commit is the suspect commit + issue_ids = self.get_issue_ids_from_pr(pr, limit=PR_MAX_SUSPECT_COMMITS) + + if not OrganizationOption.objects.get_value( + organization=organization, + key=self.commit_context_organization_option_keys.pr_bot, + default=True, + ): + logger.info( + _pr_comment_log(integration_name=self.integration_name, suffix="option_missing"), + extra={"organization_id": organization.id}, + ) + return + + try: + project = Project.objects.get_from_cache(id=project_id) + except Project.DoesNotExist: + cache.delete(cache_key) + logger.info( + _pr_comment_log(integration_name=self.integration_name, suffix="project_missing"), + extra={"organization_id": organization.id}, + ) + metrics.incr( + MERGED_PR_METRICS_BASE.format(integration=self.integration_name, key="error"), + tags={"type": "missing_project"}, + ) + return + + top_5_issues = self.get_top_5_issues_by_count(issue_ids, project) + if not top_5_issues: + logger.info( + _pr_comment_log(integration_name=self.integration_name, suffix="no_issues"), + extra={"organization_id": organization.id, "pr_id": pr.id}, + ) + cache.delete(cache_key) + return + + top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] + + comment_body = self.format_pr_comment(issue_ids=top_5_issue_ids) + logger.info( + _pr_comment_log(integration_name=self.integration_name, suffix="comment_body"), + extra={"body": comment_body}, + ) + + top_24_issue_ids = issue_ids[:24] # 24 is the P99 for issues-per-PR + + comment_data = self.build_pr_comment_data( + organization=organization, + repo=repo, + pr_key=pr.key, + comment_body=comment_body, + issue_ids=top_24_issue_ids, + ) + + try: + self.create_or_update_comment( + repo=repo, + pr_key=pr.key, + comment_data=comment_data, + pullrequest_id=pr.id, + issue_ids=top_24_issue_ids, + metrics_base=MERGED_PR_METRICS_BASE, + ) + except ApiError as e: + cache.delete(cache_key) + + if self.on_create_or_update_comment_error( + api_error=e, metrics_base=MERGED_PR_METRICS_BASE + ): + return + + metrics.incr( + MERGED_PR_METRICS_BASE.format(integration=self.integration_name, key="error"), + tags={"type": "api_error"}, + ) + raise + + # Open PR Comment Workflow + + @abstractmethod + def get_pr_files_safe_for_comment( + self, repo: Repository, pr: PullRequest + ) -> list[dict[str, str]]: + raise NotImplementedError + + @abstractmethod + def get_pr_files(self, pr_files: list[dict[str, str]]) -> list[PullRequestFile]: + raise NotImplementedError + + @abstractmethod + def format_open_pr_comment(self, issue_tables: list[str]) -> str: + raise NotImplementedError + + @abstractmethod + def format_issue_table( + self, + diff_filename: str, + issues: list[PullRequestIssue], + patch_parsers: dict[str, Any], + toggle: bool, + ) -> str: + raise NotImplementedError + + def get_top_5_issues_by_count_for_file( + self, projects: list[Project], sentry_filenames: list[str], function_names: list[str] + ) -> list[dict[str, Any]]: + """ + Given a list of projects, filenames reverse-codemapped into filenames in Sentry, + and function names representing the list of functions changed in a PR file, return a + sublist of the top 5 recent unhandled issues ordered by event count. + """ + if not len(projects): + return [] + + patch_parsers = PATCH_PARSERS + # NOTE: if we are testing beta patch parsers, add check here + + # fetches the appropriate parser for formatting the snuba query given the file extension + # the extension is never replaced in reverse codemapping + language_parser = patch_parsers.get(sentry_filenames[0].split(".")[-1], None) + + if not language_parser: + return [] + + group_ids = list( + Group.objects.filter( + first_seen__gte=datetime.now(UTC) - timedelta(days=90), + last_seen__gte=datetime.now(UTC) - timedelta(days=14), + status=GroupStatus.UNRESOLVED, + project__in=projects, + ) + .order_by("-times_seen") + .values_list("id", flat=True) + )[:OPEN_PR_MAX_RECENT_ISSUES] + project_ids = [p.id for p in projects] + + multi_if = language_parser.generate_multi_if(function_names) + + # fetch the count of events for each group_id + subquery = ( + Query(Entity("events")) + .set_select( + [ + Column("title"), + Column("culprit"), + Column("group_id"), + Function("count", [], "event_count"), + Function( + "multiIf", + multi_if, + "function_name", + ), + ] + ) + .set_groupby( + [ + Column("title"), + Column("culprit"), + Column("group_id"), + Column("exception_frames.function"), + ] + ) + .set_where( + [ + Condition(Column("project_id"), Op.IN, project_ids), + Condition(Column("group_id"), Op.IN, group_ids), + Condition(Column("timestamp"), Op.GTE, datetime.now() - timedelta(days=14)), + Condition(Column("timestamp"), Op.LT, datetime.now()), + # NOTE: ideally this would follow suspect commit logic + BooleanCondition( + BooleanOp.OR, + [ + BooleanCondition( + BooleanOp.AND, + [ + Condition( + Function( + "arrayElement", + (Column("exception_frames.filename"), i), + ), + Op.IN, + sentry_filenames, + ), + language_parser.generate_function_name_conditions( + function_names, i + ), + ], + ) + for i in range(-STACKFRAME_COUNT, 0) # first n frames + ], + ), + Condition(Function("notHandled", []), Op.EQ, 1), + ] + ) + .set_orderby([OrderBy(Column("event_count"), Direction.DESC)]) + ) + + # filter on the subquery to squash group_ids with the same title and culprit + # return the group_id with the greatest count of events + query = ( + Query(subquery) + .set_select( + [ + Column("function_name"), + Function( + "arrayElement", + (Function("groupArray", [Column("group_id")]), 1), + "group_id", + ), + Function( + "arrayElement", + (Function("groupArray", [Column("event_count")]), 1), + "event_count", + ), + ] + ) + .set_groupby( + [ + Column("title"), + Column("culprit"), + Column("function_name"), + ] + ) + .set_orderby([OrderBy(Column("event_count"), Direction.DESC)]) + .set_limit(5) + ) + + request = SnubaRequest( + dataset=Dataset.Events.value, + app_id="default", + tenant_ids={"organization_id": projects[0].organization_id}, + query=query, + ) + + try: + referrer = self.commit_context_referrers.pr_comment_bot.value + return raw_snql_query(request, referrer=referrer)["data"] + except Exception: + logger.exception( + _open_pr_comment_log( + integration_name=self.integration_name, suffix="snuba_query_error" + ), + extra={"query": request.to_dict()["query"]}, + ) + return [] + + def get_issue_table_contents(self, issue_list: list[dict[str, Any]]) -> list[PullRequestIssue]: + group_id_to_info = {} + for issue in issue_list: + group_id = issue["group_id"] + group_id_to_info[group_id] = dict(filter(lambda k: k[0] != "group_id", issue.items())) + + issues = Group.objects.filter(id__in=list(group_id_to_info.keys())).all() + + pull_request_issues = [ + PullRequestIssue( + title=issue.title, + subtitle=issue.culprit, + url=issue.get_absolute_url(), + affected_users=issue.count_users_seen( + referrer=Referrer.TAGSTORE_GET_GROUPS_USER_COUNTS_OPEN_PR_COMMENT.value + ), + event_count=group_id_to_info[issue.id]["event_count"], + function_name=group_id_to_info[issue.id]["function_name"], + ) + for issue in issues + ] + pull_request_issues.sort(key=lambda k: k.event_count or 0, reverse=True) + + return pull_request_issues + + def run_open_pr_comment_workflow( + self, organization: Organization, repo: Repository, pr: PullRequest + ) -> None: + # fetch the files in the PR and determine if it is safe to comment + pr_files = self.get_pr_files_safe_for_comment(repo=repo, pr=pr) + + if len(pr_files) == 0: + logger.info( + _open_pr_comment_log( + integration_name=self.integration_name, suffix="not_safe_for_comment" + ), + extra={"file_count": len(pr_files)}, + ) + metrics.incr( + OPEN_PR_METRICS_BASE.format(integration=self.integration_name, key="error"), + tags={"type": "unsafe_for_comment"}, + ) + return + + pullrequest_files = self.get_pr_files(pr_files) + + issue_table_contents = {} + top_issues_per_file = [] + + patch_parsers = PATCH_PARSERS + # NOTE: if we are testing beta patch parsers, add check here + + file_extensions = set() + # fetch issues related to the files + for file in pullrequest_files: + projects, sentry_filenames = self.get_projects_and_filenames_from_source_file( + organization=organization, repo=repo, pr_filename=file.filename + ) + if not len(projects) or not len(sentry_filenames): + continue + + file_extension = file.filename.split(".")[-1] + logger.info( + _open_pr_comment_log( + integration_name=self.integration_name, suffix="file_extension" + ), + extra={ + "organization_id": organization.id, + "repository_id": repo.id, + "extension": file_extension, + }, + ) + + language_parser = patch_parsers.get(file.filename.split(".")[-1], None) + if not language_parser: + logger.info( + _open_pr_comment_log( + integration_name=self.integration_name, suffix="missing_parser" + ), + extra={"extension": file_extension}, ) metrics.incr( - metrics_base.format(integration=self.integration_name, key="comment_created") + OPEN_PR_METRICS_BASE.format( + integration=self.integration_name, key="missing_parser" + ), + tags={"extension": file_extension}, ) + continue - if comment_type == CommentType.OPEN_PR: - analytics.record( - "open_pr_comment.created", - comment_id=comment.id, - org_id=repo.organization_id, - pr_id=pullrequest_id, - language=(language or "not found"), - ) - else: - resp = client.update_comment( - repo=repo.name, - issue_id=str(pr_key), - comment_id=pr_comment.external_id, - data=comment_data, + function_names = language_parser.extract_functions_from_patch(file.patch) + + if file_extension in ["js", "jsx"]: + logger.info( + _open_pr_comment_log( + integration_name=self.integration_name, suffix="javascript" + ), + extra={ + "organization_id": organization.id, + "repository_id": repo.id, + "extension": file_extension, + "has_function_names": bool(function_names), + }, ) - metrics.incr( - metrics_base.format(integration=self.integration_name, key="comment_updated") + + if file_extension == ["php"]: + logger.info( + _open_pr_comment_log(integration_name=self.integration_name, suffix="php"), + extra={ + "organization_id": organization.id, + "repository_id": repo.id, + "extension": file_extension, + "has_function_names": bool(function_names), + }, ) - pr_comment.updated_at = django_timezone.now() - pr_comment.group_ids = issue_list - pr_comment.save() - logger_event = metrics_base.format( - integration=self.integration_name, key="create_or_update_comment" + if file_extension == ["rb"]: + logger.info( + _open_pr_comment_log(integration_name=self.integration_name, suffix="ruby"), + extra={ + "organization_id": organization.id, + "repository_id": repo.id, + "extension": file_extension, + "has_function_names": bool(function_names), + }, + ) + + if not len(function_names): + continue + + top_issues = self.get_top_5_issues_by_count_for_file( + list(projects), list(sentry_filenames), list(function_names) ) + if not len(top_issues): + continue + + top_issues_per_file.append(top_issues) + file_extensions.add(file_extension) + + issue_table_contents[file.filename] = self.get_issue_table_contents(top_issues) + + if not len(issue_table_contents): logger.info( - logger_event, - extra={"new_comment": pr_comment is None, "pr_key": pr_key, "repo": repo.name}, + _open_pr_comment_log(integration_name=self.integration_name, suffix="no_issues") + ) + # don't leave a comment if no issues for files in PR + metrics.incr( + OPEN_PR_METRICS_BASE.format(integration=self.integration_name, key="no_issues") + ) + return + + # format issues per file into comment + issue_tables = [] + first_table = True + for file in pullrequest_files: + pr_filename = file.filename + issue_table_content = issue_table_contents.get(pr_filename, None) + + if issue_table_content is None: + continue + + if first_table: + issue_table = self.format_issue_table( + pr_filename, issue_table_content, patch_parsers, toggle=False + ) + first_table = False + else: + # toggle all tables but the first one + issue_table = self.format_issue_table( + pr_filename, issue_table_content, patch_parsers, toggle=True + ) + + issue_tables.append(issue_table) + + comment_body = self.format_open_pr_comment(issue_tables) + + # list all issues in the comment + issue_list: list[dict[str, Any]] = list(itertools.chain.from_iterable(top_issues_per_file)) + issue_id_list: list[int] = [issue["group_id"] for issue in issue_list] + + # pick one language from the list of languages in the PR for analytics + languages = [ + EXTENSION_LANGUAGE_MAP[extension] + for extension in file_extensions + if extension in EXTENSION_LANGUAGE_MAP + ] + language = languages[0] if len(languages) else "not found" + + try: + self.create_or_update_comment( + repo=repo, + pr_key=pr.key, + comment_data={"body": comment_body}, + pullrequest_id=pr.id, + issue_ids=issue_id_list, + comment_type=CommentType.OPEN_PR, + metrics_base=OPEN_PR_METRICS_BASE, + language=language, + ) + except ApiError as e: + if self.on_create_or_update_comment_error( + api_error=e, metrics_base=OPEN_PR_METRICS_BASE + ): + return + + metrics.incr( + OPEN_PR_METRICS_BASE.format(integration=self.integration_name, key="error"), + tags={"type": "api_error"}, ) + raise + + +def run_pr_comment_workflow(integration_name: str, pullrequest_id: int, project_id: int) -> None: + cache_key = debounce_pr_comment_cache_key(pullrequest_id=pullrequest_id) + + try: + pr = PullRequest.objects.get(id=pullrequest_id) + assert isinstance(pr, PullRequest) + except PullRequest.DoesNotExist: + cache.delete(cache_key) + logger.info(_pr_comment_log(integration_name=integration_name, suffix="pr_missing")) + return + + try: + organization = Organization.objects.get_from_cache(id=pr.organization_id) + assert isinstance(organization, Organization) + except Organization.DoesNotExist: + cache.delete(cache_key) + logger.info(_pr_comment_log(integration_name=integration_name, suffix="org_missing")) + metrics.incr( + MERGED_PR_METRICS_BASE.format(integration=integration_name, key="error"), + tags={"type": "missing_org"}, + ) + return + + try: + repo = Repository.objects.get(id=pr.repository_id) + assert isinstance(repo, Repository) + except Repository.DoesNotExist: + cache.delete(cache_key) + logger.info( + _pr_comment_log(integration_name=integration_name, suffix="repo_missing"), + extra={"organization_id": organization.id}, + ) + metrics.incr( + MERGED_PR_METRICS_BASE.format(integration=integration_name, key="error"), + tags={"type": "missing_repo"}, + ) + return + + integration = integration_service.get_integration( + integration_id=repo.integration_id, status=ObjectStatus.ACTIVE + ) + if not integration: + cache.delete(cache_key) + logger.info( + _pr_comment_log(integration_name=integration_name, suffix="integration_missing"), + extra={"organization_id": organization.id}, + ) + metrics.incr( + MERGED_PR_METRICS_BASE.format(integration=integration_name, key="error"), + tags={"type": "missing_integration"}, + ) + return + + installation = integration.get_installation(organization_id=organization.id) + assert isinstance(installation, CommitContextIntegration) + + installation.run_pr_comment_workflow( + organization=organization, + repo=repo, + pr=pr, + project_id=project_id, + ) + + +def run_open_pr_comment_workflow(integration_name: str, pullrequest_id: int) -> None: + logger.info(_open_pr_comment_log(integration_name=integration_name, suffix="start_workflow")) + + # CHECKS + # check PR exists to get PR key + try: + pr = PullRequest.objects.get(id=pullrequest_id) + assert isinstance(pr, PullRequest) + except PullRequest.DoesNotExist: + logger.info(_open_pr_comment_log(integration_name=integration_name, suffix="pr_missing")) + metrics.incr( + OPEN_PR_METRICS_BASE.format(integration=integration_name, key="error"), + tags={"type": "missing_pr"}, + ) + return + + # check org option + try: + organization = Organization.objects.get_from_cache(id=pr.organization_id) + assert isinstance(organization, Organization) + except Organization.DoesNotExist: + logger.exception( + _open_pr_comment_log(integration_name=integration_name, suffix="org_missing") + ) + metrics.incr( + OPEN_PR_METRICS_BASE.format(integration=integration_name, key="error"), + tags={"type": "missing_org"}, + ) + return + + # check PR repo exists to get repo name + try: + repo = Repository.objects.get(id=pr.repository_id) + assert isinstance(repo, Repository) + except Repository.DoesNotExist: + logger.info( + _open_pr_comment_log(integration_name=integration_name, suffix="repo_missing"), + extra={"organization_id": organization.id}, + ) + metrics.incr( + OPEN_PR_METRICS_BASE.format(integration=integration_name, key="error"), + tags={"type": "missing_repo"}, + ) + return + + # check integration exists to hit Github API with client + integration = integration_service.get_integration( + integration_id=repo.integration_id, status=ObjectStatus.ACTIVE + ) + if not integration: + logger.info( + _open_pr_comment_log(integration_name=integration_name, suffix="integration_missing"), + extra={"organization_id": organization.id}, + ) + metrics.incr( + OPEN_PR_METRICS_BASE.format(integration=integration_name, key="error"), + tags={"type": "missing_integration"}, + ) + return + + installation = integration.get_installation(organization_id=organization.id) + assert isinstance(installation, CommitContextIntegration) + + installation.run_open_pr_comment_workflow( + organization=organization, + repo=repo, + pr=pr, + ) class CommitContextClient(ABC): @@ -399,3 +1180,7 @@ def update_comment( @abstractmethod def get_merge_commit_sha_from_commit(self, repo: Repository, sha: str) -> str | None: raise NotImplementedError + + @abstractmethod + def get_pullrequest_files(self, repo: Repository, pr: PullRequest) -> Any: + raise NotImplementedError diff --git a/src/sentry/seer/fetch_issues/fetch_issues_given_patches.py b/src/sentry/seer/fetch_issues/fetch_issues_given_patches.py index fc19e11a3f8135..7a2e69a58d77ca 100644 --- a/src/sentry/seer/fetch_issues/fetch_issues_given_patches.py +++ b/src/sentry/seer/fetch_issues/fetch_issues_given_patches.py @@ -11,13 +11,13 @@ from sentry import eventstore from sentry.api.serializers import EventSerializer, serialize from sentry.api.serializers.models.event import EventSerializerResponse -from sentry.integrations.github.tasks.open_pr_comment import ( - MAX_RECENT_ISSUES, +from sentry.integrations.models.repository_project_path_config import RepositoryProjectPathConfig +from sentry.integrations.source_code_management.commit_context import ( OPEN_PR_MAX_FILES_CHANGED, OPEN_PR_MAX_LINES_CHANGED, + OPEN_PR_MAX_RECENT_ISSUES, + PullRequestFile, ) -from sentry.integrations.github.tasks.utils import PullRequestFile -from sentry.integrations.models.repository_project_path_config import RepositoryProjectPathConfig from sentry.models.group import Group, GroupStatus from sentry.models.project import Project from sentry.models.repository import Repository @@ -107,7 +107,7 @@ def _get_issues_for_file( ) .order_by("-times_seen") .values_list("id", flat=True) - )[:MAX_RECENT_ISSUES] + )[:OPEN_PR_MAX_RECENT_ISSUES] project_ids = [project.id for project in projects] multi_if = language_parser.generate_multi_if(function_names) diff --git a/tests/sentry/integrations/github/tasks/test_open_pr_comment.py b/tests/sentry/integrations/github/tasks/test_open_pr_comment.py index 1a053bf515ceaf..5223b2973b49d9 100644 --- a/tests/sentry/integrations/github/tasks/test_open_pr_comment.py +++ b/tests/sentry/integrations/github/tasks/test_open_pr_comment.py @@ -6,21 +6,14 @@ from django.utils import timezone from sentry.constants import ObjectStatus -from sentry.integrations.github.tasks.open_pr_comment import ( - format_issue_table, - format_open_pr_comment, - get_issue_table_contents, - get_pr_files, - get_projects_and_filenames_from_source_file, - get_top_5_issues_by_count_for_file, - open_pr_comment_workflow, - safe_for_comment, -) -from sentry.integrations.github.tasks.utils import PullRequestFile, PullRequestIssue +from sentry.integrations.github.tasks.open_pr_comment import open_pr_comment_workflow from sentry.integrations.models.integration import Integration -from sentry.integrations.source_code_management.constants import STACKFRAME_COUNT +from sentry.integrations.source_code_management.commit_context import ( + PullRequestFile, + PullRequestIssue, +) from sentry.integrations.source_code_management.language_parsers import PATCH_PARSERS -from sentry.models.group import Group, GroupStatus +from sentry.models.group import Group from sentry.models.pullrequest import CommentType, PullRequest, PullRequestComment from sentry.shared_integrations.exceptions import ApiError from sentry.testutils.cases import IntegrationTestCase, TestCase @@ -87,12 +80,15 @@ class TestSafeForComment(GithubCommentTestCase): def setUp(self): super().setUp() self.pr = self.create_pr_issues() - self.mock_metrics = patch( - "sentry.integrations.github.tasks.open_pr_comment.metrics" - ).start() + + metrics_patch = patch("sentry.integrations.github.integration.metrics") + self.mock_metrics = metrics_patch.start() + self.addCleanup(metrics_patch.stop) + self.gh_path = self.base_url + "/repos/getsentry/sentry/pulls/{pull_number}/files" - installation = self.integration.get_installation(organization_id=self.organization.id) - self.gh_client = installation.get_client() + self.installation_impl = self.integration.get_installation( + organization_id=self.organization.id + ) @responses.activate def test_simple(self): @@ -112,7 +108,9 @@ def test_simple(self): json=data, ) - pr_files = safe_for_comment(self.gh_client, self.gh_repo, self.pr) + pr_files = self.installation_impl.get_pr_files_safe_for_comment( + repo=self.gh_repo, pr=self.pr + ) assert pr_files == [ {"filename": "foo.py", "changes": 100, "status": "modified"}, {"filename": "bar.js", "changes": 100, "status": "modified"}, @@ -139,7 +137,9 @@ def test_too_many_files(self): ], ) - pr_files = safe_for_comment(self.gh_client, self.gh_repo, self.pr) + pr_files = self.installation_impl.get_pr_files_safe_for_comment( + repo=self.gh_repo, pr=self.pr + ) assert pr_files == [] # not safe self.mock_metrics.incr.assert_called_with( "github.open_pr_comment.rejected_comment", tags={"reason": "too_many_files"} @@ -157,7 +157,9 @@ def test_too_many_lines(self): ], ) - pr_files = safe_for_comment(self.gh_client, self.gh_repo, self.pr) + pr_files = self.installation_impl.get_pr_files_safe_for_comment( + repo=self.gh_repo, pr=self.pr + ) assert pr_files == [] # not safe self.mock_metrics.incr.assert_called_with( "github.open_pr_comment.rejected_comment", tags={"reason": "too_many_lines"} @@ -182,7 +184,9 @@ def test_too_many_files_and_lines(self): ], ) - pr_files = safe_for_comment(self.gh_client, self.gh_repo, self.pr) + pr_files = self.installation_impl.get_pr_files_safe_for_comment( + repo=self.gh_repo, pr=self.pr + ) assert pr_files == [] # not safe self.mock_metrics.incr.assert_any_call( "github.open_pr_comment.rejected_comment", tags={"reason": "too_many_lines"} @@ -200,7 +204,9 @@ def test_error__rate_limited(self): }, ) - pr_files = safe_for_comment(self.gh_client, self.gh_repo, self.pr) + pr_files = self.installation_impl.get_pr_files_safe_for_comment( + repo=self.gh_repo, pr=self.pr + ) assert pr_files == [] # not safe self.mock_metrics.incr.assert_called_with( "github.open_pr_comment.api_error", tags={"type": "gh_rate_limited", "code": 429} @@ -212,7 +218,9 @@ def test_error__missing_pr(self): responses.GET, self.gh_path.format(pull_number=self.pr.key), status=404, json={} ) - pr_files = safe_for_comment(self.gh_client, self.gh_repo, self.pr) + pr_files = self.installation_impl.get_pr_files_safe_for_comment( + repo=self.gh_repo, pr=self.pr + ) assert pr_files == [] # not safe self.mock_metrics.incr.assert_called_with( "github.open_pr_comment.api_error", @@ -225,7 +233,9 @@ def test_error__api_error(self): responses.GET, self.gh_path.format(pull_number=self.pr.key), status=400, json={} ) - pr_files = safe_for_comment(self.gh_client, self.gh_repo, self.pr) + pr_files = self.installation_impl.get_pr_files_safe_for_comment( + repo=self.gh_repo, pr=self.pr + ) assert pr_files == [] # not safe self.mock_metrics.incr.assert_called_with( "github.open_pr_comment.api_error", tags={"type": "unknown_api_error", "code": 400} @@ -236,12 +246,16 @@ class TestGetFilenames(GithubCommentTestCase): def setUp(self): super().setUp() self.pr = self.create_pr_issues() - self.mock_metrics = patch( - "sentry.integrations.source_code_management.commit_context.metrics" - ).start() + + metrics_patch = patch("sentry.integrations.source_code_management.commit_context.metrics") + self.mock_metrics = metrics_patch.start() + self.addCleanup(metrics_patch.stop) + self.gh_path = self.base_url + "/repos/getsentry/sentry/pulls/{pull_number}/files" - installation = self.integration.get_installation(organization_id=self.organization.id) - self.gh_client = installation.get_client() + self.installation_impl = self.integration.get_installation( + organization_id=self.organization.id + ) + self.gh_client = self.installation_impl.get_client() @responses.activate def test_get_pr_files(self): @@ -250,449 +264,20 @@ def test_get_pr_files(self): {"filename": "baz.py", "status": "modified"}, ] - pr_files = get_pr_files(data) + pr_files = self.installation_impl.get_pr_files(data) assert len(pr_files) == 1 pr_file = pr_files[0] assert pr_file.filename == data[0]["filename"] assert pr_file.patch == data[0]["patch"] - def test_get_projects_and_filenames_from_source_file(self): - projects = [self.create_project() for _ in range(4)] - - source_stack_pairs = [ - ("", "./"), - ("src/sentry", "sentry/"), - ("src/", ""), - ("src/sentry/", "sentry/"), - ] - for i, pair in enumerate(source_stack_pairs): - source_root, stack_root = pair - self.create_code_mapping( - project=projects[i], - repo=self.gh_repo, - source_root=source_root, - stack_root=stack_root, - default_branch="master", - ) - - # matching code mapping from a different org - other_org_code_mapping = self.create_code_mapping( - project=self.another_org_project, - repo=self.another_org_repo, - source_root="", - stack_root="./", - ) - other_org_code_mapping.organization_id = self.another_organization.id - other_org_code_mapping.save() - - source_stack_nonmatches = [ - ("/src/sentry", "sentry"), - ("tests/", "tests/"), - ("app/", "static/app"), - ("tasks/integrations", "tasks"), # random match in the middle of the string - ] - for source_root, stack_root in source_stack_nonmatches: - self.create_code_mapping( - project=self.create_project(), - repo=self.gh_repo, - source_root=source_root, - stack_root=stack_root, - default_branch="master", - ) - - filename = "src/sentry/tasks/integrations/github/open_pr_comment.py" - correct_filenames = [ - "./src/sentry/tasks/integrations/github/open_pr_comment.py", - "sentry//tasks/integrations/github/open_pr_comment.py", - "sentry/tasks/integrations/github/open_pr_comment.py", - ] - - project_list, sentry_filenames = get_projects_and_filenames_from_source_file( - self.organization.id, self.gh_repo.id, filename - ) - assert project_list == set(projects) - assert sentry_filenames == set(correct_filenames) - - def test_get_projects_and_filenames_from_source_file_filters_repo(self): - projects = [self.create_project() for _ in range(3)] - - source_stack_pairs = [ - ("src/sentry", "sentry/"), - ("src/", ""), - ("src/sentry/", "sentry/"), - ] - for i, pair in enumerate(source_stack_pairs): - source_root, stack_root = pair - self.create_code_mapping( - project=projects[i], - repo=self.gh_repo, - source_root=source_root, - stack_root=stack_root, - default_branch="master", - ) - - # other codemapping in different repo, will not match - project = self.create_project() - repo = self.create_repo( - name="getsentry/santry", - provider="integrations:github", - integration_id=self.integration.id, - project=project, - url="https://github.com/getsentry/santry", - ) - self.create_code_mapping( - project=project, - repo=repo, - source_root="", - stack_root="./", - default_branch="master", - ) - - filename = "src/sentry/tasks/integrations/github/open_pr_comment.py" - correct_filenames = [ - "sentry//tasks/integrations/github/open_pr_comment.py", - "sentry/tasks/integrations/github/open_pr_comment.py", - ] - - project_list, sentry_filenames = get_projects_and_filenames_from_source_file( - self.organization.id, self.gh_repo.id, filename - ) - assert project_list == set(projects) - assert sentry_filenames == set(correct_filenames) - - -class TestGetCommentIssues(CreateEventTestCase): - def setUp(self): - self.group_id = [self._create_event(user_id=str(i)) for i in range(6)][0].group.id - self.another_org = self.create_organization() - self.another_org_project = self.create_project(organization=self.another_org) - - def test_simple(self): - group_id = [ - self._create_event(function_names=["blue", "planet"], user_id=str(i)) for i in range(7) - ][0].group.id - top_5_issues = get_top_5_issues_by_count_for_file( - [self.project], ["baz.py"], ["world", "planet"] - ) - - top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] - function_names = [issue["function_name"] for issue in top_5_issues] - assert top_5_issue_ids == [group_id, self.group_id] - assert function_names == ["planet", "world"] - - def test_javascript_simple(self): - # should match function name exactly or className.functionName - group_id_1 = [ - self._create_event( - function_names=["other.planet", "component.blue"], - filenames=["baz.js", "foo.js"], - user_id=str(i), - ) - for i in range(7) - ][0].group.id - group_id_2 = [ - self._create_event( - function_names=["component.blue", "world"], - filenames=["foo.js", "baz.js"], - user_id=str(i), - ) - for i in range(6) - ][0].group.id - top_5_issues = get_top_5_issues_by_count_for_file( - [self.project], ["baz.js"], ["world", "planet"] - ) - top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] - function_names = [issue["function_name"] for issue in top_5_issues] - assert top_5_issue_ids == [group_id_1, group_id_2] - assert function_names == ["other.planet", "world"] - - def test_php_simple(self): - # should match function name exactly or namespace::functionName - group_id_1 = [ - self._create_event( - function_names=["namespace/other/test::planet", "test/component::blue"], - filenames=["baz.php", "foo.php"], - user_id=str(i), - ) - for i in range(7) - ][0].group.id - group_id_2 = [ - self._create_event( - function_names=["test/component::blue", "world"], - filenames=["foo.php", "baz.php"], - user_id=str(i), - ) - for i in range(6) - ][0].group.id - top_5_issues = get_top_5_issues_by_count_for_file( - [self.project], ["baz.php"], ["world", "planet"] - ) - top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] - function_names = [issue["function_name"] for issue in top_5_issues] - assert top_5_issue_ids == [group_id_1, group_id_2] - assert function_names == ["namespace/other/test::planet", "world"] - - def test_ruby_simple(self): - # should match function name exactly or class.functionName - group_id_1 = [ - self._create_event( - function_names=["test.planet", "test/component.blue"], - filenames=["baz.rb", "foo.rb"], - user_id=str(i), - ) - for i in range(7) - ][0].group.id - group_id_2 = [ - self._create_event( - function_names=["test/component.blue", "world"], - filenames=["foo.rb", "baz.rb"], - user_id=str(i), - ) - for i in range(6) - ][0].group.id - top_5_issues = get_top_5_issues_by_count_for_file( - [self.project], ["baz.rb"], ["world", "planet"] - ) - top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] - function_names = [issue["function_name"] for issue in top_5_issues] - assert top_5_issue_ids == [group_id_1, group_id_2] - assert function_names == ["test.planet", "world"] - - def test_filters_resolved_issue(self): - group = Group.objects.all()[0] - group.resolved_at = timezone.now() - group.status = GroupStatus.RESOLVED - group.substatus = None - group.save() - - top_5_issues = get_top_5_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) - assert len(top_5_issues) == 0 - - def test_filters_handled_issue(self): - group_id = self._create_event(filenames=["bar.py", "baz.py"], handled=True).group.id - - top_5_issues = get_top_5_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) - top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] - assert group_id != self.group_id - assert top_5_issue_ids == [self.group_id] - - def test_project_group_id_mismatch(self): - # we fetch all group_ids that belong to the projects passed into the function - self._create_event(project_id=self.another_org_project.id) - - top_5_issues = get_top_5_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) - top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] - assert top_5_issue_ids == [self.group_id] - - def test_filename_mismatch(self): - group_id = self._create_event( - filenames=["foo.py", "bar.py"], - ).group.id - - top_5_issues = get_top_5_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) - top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] - assert group_id != self.group_id - assert top_5_issue_ids == [self.group_id] - - def test_function_name_mismatch(self): - group_id = self._create_event( - function_names=["world", "hello"], - ).group.id - - top_5_issues = get_top_5_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) - top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] - assert group_id != self.group_id - assert top_5_issue_ids == [self.group_id] - - def test_not_first_frame(self): - group_id = self._create_event( - function_names=["world", "hello"], filenames=["baz.py", "bar.py"], culprit="hi" - ).group.id - - top_5_issues = get_top_5_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) - top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] - function_names = [issue["function_name"] for issue in top_5_issues] - assert group_id != self.group_id - assert top_5_issue_ids == [self.group_id, group_id] - assert function_names == ["world", "world"] - - def test_not_within_frame_limit(self): - function_names = ["world"] + ["a" for _ in range(STACKFRAME_COUNT)] - filenames = ["baz.py"] + ["foo.py" for _ in range(STACKFRAME_COUNT)] - group_id = self._create_event(function_names=function_names, filenames=filenames).group.id - - top_5_issues = get_top_5_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) - top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] - assert group_id != self.group_id - assert top_5_issue_ids == [self.group_id] - - def test_event_too_old(self): - group_id = self._create_event( - timestamp=before_now(days=15).isoformat(), filenames=["bar.py", "baz.py"] - ).group.id - - top_5_issues = get_top_5_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) - top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] - assert group_id != self.group_id - assert top_5_issue_ids == [self.group_id] - - def test_squashes_same_title_culprit_issues(self): - # both of these have the same title and culprit, - # so "squash" them and return the one with greater number of events - [ - self._create_event( - filenames=["base.py", "baz.py"], - function_names=["wonderful", "world"], - user_id=str(i), - handled=False, - ) - for i in range(3) - ] - group_id = [ - self._create_event( - filenames=["bar.py", "baz.py"], - function_names=["blue", "planet"], - user_id=str(i), - handled=False, - ) - for i in range(5) - ][0].group_id - - top_5_issues = get_top_5_issues_by_count_for_file( - [self.project], ["baz.py"], ["world", "planet"] - ) - top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] - function_names = [issue["function_name"] for issue in top_5_issues] - - assert top_5_issue_ids == [self.group_id, group_id] - assert function_names == ["world", "planet"] - - def test_fetches_top_five_issues(self): - group_id_1 = [ - self._create_event( - filenames=["bar.py", "baz.py"], - function_names=["blue", "planet"], - user_id=str(i), - handled=False, - ) - for i in range(5) - ][0].group.id - [ - self._create_event( - filenames=["hello.py", "baz.py"], - function_names=["green", "planet"], - user_id=str(i), - handled=True, - ) - for i in range(4) - ] - group_id_3 = [ - self._create_event( - filenames=["base.py", "baz.py"], - function_names=["wonderful", "world"], - user_id=str(i), - handled=False, - culprit="hi", - ) - for i in range(3) - ][0].group.id - [ - self._create_event( - filenames=["nom.py", "baz.py"], - function_names=["jurassic", "world"], - user_id=str(i), - handled=True, - ) - for i in range(2) - ] - # 6th issue - self._create_event( - filenames=["nan.py", "baz.py"], function_names=["my_own", "world"], handled=True - ) - # unrelated issue with same stack trace in different project - self._create_event(project_id=self.another_org_project.id) - - top_5_issues = get_top_5_issues_by_count_for_file( - [self.project], ["baz.py"], ["world", "planet"] - ) - top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] - function_names = [issue["function_name"] for issue in top_5_issues] - - # filters handled issues - assert top_5_issue_ids == [self.group_id, group_id_1, group_id_3] - assert function_names == ["world", "planet", "world"] - - def test_get_issue_table_contents(self): - group_id_1 = [ - self._create_event( - culprit="issue1", - filenames=["bar.py", "baz.py"], - function_names=["blue", "planet"], - user_id=str(i), - handled=False, - ) - for i in range(5) - ][0].group.id - group_id_2 = [ - self._create_event( - culprit="issue2", - filenames=["hello.py", "baz.py"], - function_names=["green", "planet"], - user_id=str(i), - handled=False, - ) - for i in range(4) - ][0].group.id - group_id_3 = [ - self._create_event( - culprit="issue3", - filenames=["base.py", "baz.py"], - function_names=["wonderful", "world"], - user_id=str(i), - handled=False, - ) - for i in range(3) - ][0].group.id - group_id_4 = [ - self._create_event( - culprit="issue4", - filenames=["nom.py", "baz.py"], - function_names=["jurassic", "world"], - user_id=str(i), - handled=False, - ) - for i in range(2) - ][0].group.id - - top_5_issues = get_top_5_issues_by_count_for_file( - [self.project], ["baz.py"], ["world", "planet"] - ) - affected_users = [6, 5, 4, 3, 2] - event_count = [issue["event_count"] for issue in top_5_issues] - function_names = [issue["function_name"] for issue in top_5_issues] - - comment_table_contents = get_issue_table_contents(top_5_issues) - group_ids = [self.group_id, group_id_1, group_id_2, group_id_3, group_id_4] - - for i in range(5): - subtitle = "issue" + str(i) - assert ( - PullRequestIssue( - title="Error", - subtitle=subtitle, - url=f"http://testserver/organizations/{self.organization.slug}/issues/{group_ids[i]}/", - affected_users=affected_users[i], - event_count=event_count[i], - function_name=function_names[i], - ) - in comment_table_contents - ) - class TestFormatComment(TestCase): def setUp(self): super().setUp() + self.installation_impl = self.integration.get_installation( + organization_id=self.organization.id + ) def test_comment_format_python(self): file1 = "tests/sentry/tasks/integrations/github/test_open_pr_comment.py" @@ -722,9 +307,13 @@ def test_comment_format_python(self): for i in range(2) ] - issue_table = format_issue_table(file1, file1_issues, PATCH_PARSERS, toggle=False) - toggle_issue_table = format_issue_table(file2, file2_issues, PATCH_PARSERS, toggle=True) - comment = format_open_pr_comment([issue_table, toggle_issue_table]) + issue_table = self.installation_impl.format_issue_table( + file1, file1_issues, PATCH_PARSERS, toggle=False + ) + toggle_issue_table = self.installation_impl.format_issue_table( + file2, file2_issues, PATCH_PARSERS, toggle=True + ) + comment = self.installation_impl.format_open_pr_comment([issue_table, toggle_issue_table]) assert ( comment @@ -781,9 +370,13 @@ def test_comment_format_javascript(self): for i in range(2) ] - issue_table = format_issue_table(file1, file1_issues, PATCH_PARSERS, toggle=False) - toggle_issue_table = format_issue_table(file2, file2_issues, PATCH_PARSERS, toggle=True) - comment = format_open_pr_comment([issue_table, toggle_issue_table]) + issue_table = self.installation_impl.format_issue_table( + file1, file1_issues, PATCH_PARSERS, toggle=False + ) + toggle_issue_table = self.installation_impl.format_issue_table( + file2, file2_issues, PATCH_PARSERS, toggle=True + ) + comment = self.installation_impl.format_open_pr_comment([issue_table, toggle_issue_table]) assert ( comment @@ -815,20 +408,24 @@ def test_comment_format_javascript(self): def test_comment_format_missing_language(self): file1 = "tests/sentry/tasks/integrations/github/test_open_pr_comment.docx" - issue_table = format_issue_table(file1, [], PATCH_PARSERS, toggle=False) + issue_table = self.installation_impl.format_issue_table( + file1, [], PATCH_PARSERS, toggle=False + ) assert issue_table == "" -@patch("sentry.integrations.github.tasks.open_pr_comment.get_pr_files") +@patch("sentry.integrations.github.integration.GitHubIntegration.get_pr_files") @patch( - "sentry.integrations.github.tasks.open_pr_comment.get_projects_and_filenames_from_source_file" + "sentry.integrations.github.integration.GitHubIntegration.get_projects_and_filenames_from_source_file" ) @patch( "sentry.integrations.source_code_management.language_parsers.PythonParser.extract_functions_from_patch" ) -@patch("sentry.integrations.github.tasks.open_pr_comment.get_top_5_issues_by_count_for_file") -@patch("sentry.integrations.github.tasks.open_pr_comment.safe_for_comment") +@patch( + "sentry.integrations.github.integration.GitHubIntegration.get_top_5_issues_by_count_for_file" +) +@patch("sentry.integrations.github.integration.GitHubIntegration.get_pr_files_safe_for_comment") @patch("sentry.integrations.source_code_management.commit_context.metrics") class TestOpenPRCommentWorkflow(IntegrationTestCase, CreateEventTestCase): base_url = "https://api.github.com" @@ -977,13 +574,11 @@ def test_comment_workflow_comment_exists( assert not mock_analytics.called @patch("sentry.analytics.record") - @patch("sentry.integrations.github.tasks.open_pr_comment.metrics") @responses.activate def test_comment_workflow_early_return( self, - mock_metrics, mock_analytics, - _, + mock_metrics, mock_safe_for_comment, mock_issues, mock_function_names, @@ -1034,13 +629,13 @@ def test_comment_workflow_early_return( assert not mock_analytics.called @patch("sentry.analytics.record") - @patch("sentry.integrations.github.tasks.open_pr_comment.metrics") + @patch("sentry.integrations.github.integration.metrics") @responses.activate def test_comment_workflow_api_error( self, - mock_metrics, + mock_integration_metrics, mock_analytics, - _, + mock_metrics, mock_safe_for_comment, mock_issues, mock_function_names, @@ -1083,7 +678,9 @@ def test_comment_workflow_api_error( with pytest.raises(ApiError): open_pr_comment_workflow(self.pr.id) - mock_metrics.incr.assert_called_with("github.open_pr_comment.api_error") + mock_metrics.incr.assert_called_with( + "github.open_pr_comment.error", tags={"type": "api_error"} + ) pr_2 = PullRequest.objects.create( organization_id=self.organization.id, @@ -1093,7 +690,7 @@ def test_comment_workflow_api_error( # does not raise ApiError for locked issue open_pr_comment_workflow(pr_2.id) - mock_metrics.incr.assert_called_with( + mock_integration_metrics.incr.assert_called_with( "github.open_pr_comment.error", tags={"type": "issue_locked_error"} ) @@ -1105,17 +702,14 @@ def test_comment_workflow_api_error( # does not raise ApiError for rate limited error open_pr_comment_workflow(pr_3.id) - - mock_metrics.incr.assert_called_with( + mock_integration_metrics.incr.assert_called_with( "github.open_pr_comment.error", tags={"type": "rate_limited_error"} ) assert not mock_analytics.called - @patch("sentry.integrations.github.tasks.open_pr_comment.metrics") def test_comment_workflow_missing_pr( self, mock_metrics, - _, mock_safe_for_comment, mock_issues, mock_function_names, @@ -1131,11 +725,9 @@ def test_comment_workflow_missing_pr( "github.open_pr_comment.error", tags={"type": "missing_pr"} ) - @patch("sentry.integrations.github.tasks.open_pr_comment.metrics") def test_comment_workflow_missing_org( self, mock_metrics, - _, mock_safe_for_comment, mock_issues, mock_function_names, @@ -1152,11 +744,9 @@ def test_comment_workflow_missing_org( "github.open_pr_comment.error", tags={"type": "missing_org"} ) - @patch("sentry.integrations.github.tasks.open_pr_comment.metrics") def test_comment_workflow_missing_repo( self, mock_metrics, - _, mock_safe_for_comment, mock_issues, mock_function_names, @@ -1173,11 +763,9 @@ def test_comment_workflow_missing_repo( "github.open_pr_comment.error", tags={"type": "missing_repo"} ) - @patch("sentry.integrations.github.tasks.open_pr_comment.metrics") def test_comment_workflow_missing_integration( self, mock_metrics, - _, mock_safe_for_comment, mock_issues, mock_function_names, @@ -1195,11 +783,9 @@ def test_comment_workflow_missing_integration( "github.open_pr_comment.error", tags={"type": "missing_integration"} ) - @patch("sentry.integrations.github.tasks.open_pr_comment.metrics") def test_comment_workflow_not_safe_for_comment( self, mock_metrics, - _, mock_safe_for_comment, mock_issues, mock_function_names, diff --git a/tests/sentry/integrations/github/tasks/test_pr_comment.py b/tests/sentry/integrations/github/tasks/test_pr_comment.py index 54bcee6d5374d0..87b0f0442d4607 100644 --- a/tests/sentry/integrations/github/tasks/test_pr_comment.py +++ b/tests/sentry/integrations/github/tasks/test_pr_comment.py @@ -1,4 +1,3 @@ -import logging from datetime import UTC, datetime, timedelta from unittest.mock import patch @@ -7,21 +6,18 @@ from django.utils import timezone from sentry.constants import ObjectStatus -from sentry.integrations.github.integration import GitHubIntegrationProvider +from sentry.integrations.github.integration import GitHubIntegration, GitHubIntegrationProvider from sentry.integrations.github.tasks.pr_comment import ( - format_comment, - get_comment_contents, - get_top_5_issues_by_count, github_comment_reactions, github_comment_workflow, - pr_to_issue_query, ) -from sentry.integrations.github.tasks.utils import PullRequestIssue from sentry.integrations.models.integration import Integration +from sentry.integrations.source_code_management.commit_context import debounce_pr_comment_cache_key from sentry.models.commit import Commit from sentry.models.group import Group from sentry.models.groupowner import GroupOwner, GroupOwnerType from sentry.models.options.organization_option import OrganizationOption +from sentry.models.organization import Organization from sentry.models.project import Project from sentry.models.pullrequest import ( CommentType, @@ -31,9 +27,9 @@ ) from sentry.models.repository import Repository from sentry.shared_integrations.exceptions import ApiError -from sentry.tasks.commit_context import DEBOUNCE_PR_COMMENT_CACHE_KEY -from sentry.testutils.cases import IntegrationTestCase, SnubaTestCase, TestCase +from sentry.testutils.cases import IntegrationTestCase from sentry.testutils.helpers.datetime import before_now, freeze_time +from sentry.testutils.helpers.integrations import get_installation_of_type from sentry.testutils.silo import assume_test_silo_mode_of from sentry.testutils.skips import requires_snuba from sentry.utils.cache import cache @@ -47,6 +43,11 @@ class GithubCommentTestCase(IntegrationTestCase): def setUp(self): super().setUp() + + self.installation = get_installation_of_type( + GitHubIntegration, self.integration, self.organization.id + ) + self.another_integration = self.create_integration( organization=self.organization, external_id="1", provider="gitlab" ) @@ -162,16 +163,15 @@ def create_pr_issues(self, gh_repo=None): return pr -class TestPrToIssueQuery(GithubCommentTestCase): +class TestGetIssueIdsFromPr(GithubCommentTestCase): def test_simple(self): """one pr with one issue""" commit = self.add_commit_to_repo(self.gh_repo, self.user, self.project) pr = self.add_pr_to_commit(commit) groupowner = self.add_groupowner_to_commit(commit, self.project, self.user) - results = pr_to_issue_query(pr.id) - - assert results[0] == (self.gh_repo.id, pr.key, self.organization.id, [groupowner.group_id]) + results = self.installation.get_issue_ids_from_pr(pr, limit=1) + assert results == [groupowner.group_id] def test_multiple_issues(self): """one pr with multiple issues""" @@ -181,14 +181,8 @@ def test_multiple_issues(self): groupowner_2 = self.add_groupowner_to_commit(commit, self.project, self.user) groupowner_3 = self.add_groupowner_to_commit(commit, self.project, self.user) - results = pr_to_issue_query(pr.id) - - assert results[0][0:3] == (self.gh_repo.id, pr.key, self.organization.id) - assert ( - groupowner_1.group_id in results[0][3] - and groupowner_2.group_id in results[0][3] - and groupowner_3.group_id in results[0][3] - ) + results = self.installation.get_issue_ids_from_pr(pr) + assert results == [groupowner_1.group_id, groupowner_2.group_id, groupowner_3.group_id] def test_multiple_prs(self): """multiple eligible PRs with one issue each""" @@ -199,21 +193,11 @@ def test_multiple_prs(self): groupowner_1 = self.add_groupowner_to_commit(commit_1, self.project, self.user) groupowner_2 = self.add_groupowner_to_commit(commit_2, self.project, self.user) - results = pr_to_issue_query(pr_1.id) - assert results[0] == ( - self.gh_repo.id, - pr_1.key, - self.organization.id, - [groupowner_1.group_id], - ) + results = self.installation.get_issue_ids_from_pr(pr_1, limit=1) + assert results == [groupowner_1.group_id] - results = pr_to_issue_query(pr_2.id) - assert results[0] == ( - self.gh_repo.id, - pr_2.key, - self.organization.id, - [groupowner_2.group_id], - ) + results = self.installation.get_issue_ids_from_pr(pr_2, limit=1) + assert results == [groupowner_2.group_id] def test_multiple_commits(self): """Multiple eligible commits with one issue each""" @@ -223,124 +207,12 @@ def test_multiple_commits(self): self.add_branch_commit_to_pr(commit_2, pr) groupowner_1 = self.add_groupowner_to_commit(commit_1, self.project, self.user) groupowner_2 = self.add_groupowner_to_commit(commit_2, self.project, self.user) - results = pr_to_issue_query(pr.id) - assert results[0] == ( - self.gh_repo.id, - pr.key, - self.organization.id, - [groupowner_1.group_id, groupowner_2.group_id], - ) - -class TestTop5IssuesByCount(TestCase, SnubaTestCase): - def test_simple(self): - group1 = [ - self.store_event( - {"fingerprint": ["group-1"], "timestamp": before_now(days=1).isoformat()}, - project_id=self.project.id, - ) - for _ in range(3) - ][0].group.id - group2 = [ - self.store_event( - {"fingerprint": ["group-2"], "timestamp": before_now(days=1).isoformat()}, - project_id=self.project.id, - ) - for _ in range(6) - ][0].group.id - group3 = [ - self.store_event( - {"fingerprint": ["group-3"], "timestamp": before_now(days=1).isoformat()}, - project_id=self.project.id, - ) - for _ in range(4) - ][0].group.id - res = get_top_5_issues_by_count([group1, group2, group3], self.project) - assert [issue["group_id"] for issue in res] == [group2, group3, group1] - - def test_over_5_issues(self): - issue_ids = [ - self.store_event( - {"fingerprint": [f"group-{idx}"], "timestamp": before_now(days=1).isoformat()}, - project_id=self.project.id, - ).group.id - for idx in range(6) - ] - res = get_top_5_issues_by_count(issue_ids, self.project) - assert len(res) == 5 - - def test_ignore_info_level_issues(self): - group1 = [ - self.store_event( - { - "fingerprint": ["group-1"], - "timestamp": before_now(days=1).isoformat(), - "level": logging.INFO, - }, - project_id=self.project.id, - ) - for _ in range(3) - ][0].group.id - group2 = [ - self.store_event( - {"fingerprint": ["group-2"], "timestamp": before_now(days=1).isoformat()}, - project_id=self.project.id, - ) - for _ in range(6) - ][0].group.id - group3 = [ - self.store_event( - { - "fingerprint": ["group-3"], - "timestamp": before_now(days=1).isoformat(), - "level": logging.INFO, - }, - project_id=self.project.id, - ) - for _ in range(4) - ][0].group.id - res = get_top_5_issues_by_count([group1, group2, group3], self.project) - assert [issue["group_id"] for issue in res] == [group2] - - def test_do_not_ignore_other_issues(self): - group1 = [ - self.store_event( - { - "fingerprint": ["group-1"], - "timestamp": before_now(days=1).isoformat(), - "level": logging.ERROR, - }, - project_id=self.project.id, - ) - for _ in range(3) - ][0].group.id - group2 = [ - self.store_event( - { - "fingerprint": ["group-2"], - "timestamp": before_now(days=1).isoformat(), - "level": logging.INFO, - }, - project_id=self.project.id, - ) - for _ in range(6) - ][0].group.id - group3 = [ - self.store_event( - { - "fingerprint": ["group-3"], - "timestamp": before_now(days=1).isoformat(), - "level": logging.DEBUG, - }, - project_id=self.project.id, - ) - for _ in range(4) - ][0].group.id - res = get_top_5_issues_by_count([group1, group2, group3], self.project) - assert [issue["group_id"] for issue in res] == [group3, group1] + results = self.installation.get_issue_ids_from_pr(pr) + assert results == [groupowner_1.group_id, groupowner_2.group_id] -class TestCommentBuilderQueries(GithubCommentTestCase): +class TestFormatPrComment(GithubCommentTestCase): def test_simple(self): ev1 = self.store_event( data={"message": "issue 1", "culprit": "issue1", "fingerprint": ["group-1"]}, @@ -357,54 +229,16 @@ def test_simple(self): project_id=self.project.id, ) assert ev3.group is not None - comment_contents = get_comment_contents([ev1.group.id, ev2.group.id, ev3.group.id]) - assert ( - PullRequestIssue( - title="issue 1", - subtitle="issue1", - url=f"http://testserver/organizations/{self.organization.slug}/issues/{ev1.group.id}/", - ) - in comment_contents - ) - assert ( - PullRequestIssue( - title="issue 2", - subtitle="issue2", - url=f"http://testserver/organizations/{self.organization.slug}/issues/{ev2.group.id}/", - ) - in comment_contents - ) - assert ( - PullRequestIssue( - title="issue 3", - subtitle="issue3", - url=f"http://testserver/organizations/{self.organization.slug}/issues/{ev3.group.id}/", - ) - in comment_contents - ) - -class TestFormatComment(TestCase): - def test_format_comment(self): - issues = [ - PullRequestIssue( - title="TypeError", - subtitle="sentry.tasks.auto_source_code_config.derive_code_mappings", - url="https://sentry.sentry.io/issues/", - ), - PullRequestIssue( - title="KafkaException", - subtitle="query_subscription_consumer_process_message", - url="https://sentry.sentry.io/stats/", - ), - ] - - formatted_comment = format_comment(issues) - expected_comment = """## Suspect Issues + formatted_comment = self.installation.format_pr_comment( + [ev1.group.id, ev2.group.id, ev3.group.id] + ) + expected_comment = f"""## Suspect Issues This pull request was deployed and Sentry observed the following issues: -- ‼️ **TypeError** `sentry.tasks.auto_source_code_config.derive_cod...` [View Issue](https://sentry.sentry.io/issues/?referrer=github-pr-bot) -- ‼️ **KafkaException** `query_subscription_consumer_process_message` [View Issue](https://sentry.sentry.io/stats/?referrer=github-pr-bot) +- ‼️ **issue 1** `issue1` [View Issue](http://testserver/organizations/foo/issues/{ev1.group.id}/?referrer=github-pr-bot) +- ‼️ **issue 2** `issue2` [View Issue](http://testserver/organizations/foo/issues/{ev2.group.id}/?referrer=github-pr-bot) +- ‼️ **issue 3** `issue3` [View Issue](http://testserver/organizations/foo/issues/{ev3.group.id}/?referrer=github-pr-bot) Did you find this useful? React with a 👍 or 👎""" assert formatted_comment == expected_comment @@ -416,10 +250,19 @@ def setUp(self): self.user_id = "user_1" self.app_id = "app_1" self.pr = self.create_pr_issues() - self.cache_key = DEBOUNCE_PR_COMMENT_CACHE_KEY(self.pr.id) + self.cache_key = debounce_pr_comment_cache_key(self.pr.id) - @patch("sentry.integrations.github.tasks.pr_comment.get_top_5_issues_by_count") - @patch("sentry.integrations.source_code_management.commit_context.metrics") + patch_get_top_5_issues_by_count = patch( + "sentry.integrations.github.integration.GitHubIntegration.get_top_5_issues_by_count" + ) + patch_format_pr_comment = patch( + "sentry.integrations.github.integration.GitHubIntegration.format_pr_comment" + ) + patch_integration_metrics = patch("sentry.integrations.github.integration.metrics") + patch_metrics = patch("sentry.integrations.source_code_management.commit_context.metrics") + + @patch_get_top_5_issues_by_count + @patch_metrics @responses.activate def test_comment_workflow(self, mock_metrics, mock_issues): group_objs = Group.objects.order_by("id").all() @@ -447,8 +290,8 @@ def test_comment_workflow(self, mock_metrics, mock_issues): assert pull_request_comment_query[0].comment_type == CommentType.MERGED_PR mock_metrics.incr.assert_called_with("github.pr_comment.comment_created") - @patch("sentry.integrations.github.tasks.pr_comment.get_top_5_issues_by_count") - @patch("sentry.integrations.source_code_management.commit_context.metrics") + @patch_get_top_5_issues_by_count + @patch_metrics @responses.activate @freeze_time(datetime(2023, 6, 8, 0, 0, 0, tzinfo=UTC)) def test_comment_workflow_updates_comment(self, mock_metrics, mock_issues): @@ -490,10 +333,11 @@ def test_comment_workflow_updates_comment(self, mock_metrics, mock_issues): assert pull_request_comment.updated_at == timezone.now() mock_metrics.incr.assert_called_with("github.pr_comment.comment_updated") - @patch("sentry.integrations.github.tasks.pr_comment.get_top_5_issues_by_count") - @patch("sentry.integrations.github.tasks.pr_comment.metrics") + @patch_get_top_5_issues_by_count + @patch_integration_metrics + @patch_metrics @responses.activate - def test_comment_workflow_api_error(self, mock_metrics, mock_issues): + def test_comment_workflow_api_error(self, mock_metrics, mock_integration_metrics, mock_issues): cache.set(self.cache_key, True, timedelta(minutes=5).total_seconds()) mock_issues.return_value = [ {"group_id": g.id, "event_count": 10} for g in Group.objects.all() @@ -526,40 +370,40 @@ def test_comment_workflow_api_error(self, mock_metrics, mock_issues): with pytest.raises(ApiError): github_comment_workflow(self.pr.id, self.project.id) - assert cache.get(self.cache_key) is None - mock_metrics.incr.assert_called_with("github.pr_comment.api_error") + assert cache.get(self.cache_key) is None + mock_metrics.incr.assert_called_with("github.pr_comment.error", tags={"type": "api_error"}) pr_2 = self.create_pr_issues() - cache_key = DEBOUNCE_PR_COMMENT_CACHE_KEY(pr_2.id) + cache_key = debounce_pr_comment_cache_key(pr_2.id) cache.set(cache_key, True, timedelta(minutes=5).total_seconds()) # does not raise ApiError for locked issue github_comment_workflow(pr_2.id, self.project.id) assert cache.get(cache_key) is None - mock_metrics.incr.assert_called_with( + mock_integration_metrics.incr.assert_called_with( "github.pr_comment.error", tags={"type": "issue_locked_error"} ) pr_3 = self.create_pr_issues() - cache_key = DEBOUNCE_PR_COMMENT_CACHE_KEY(pr_3.id) + cache_key = debounce_pr_comment_cache_key(pr_3.id) cache.set(cache_key, True, timedelta(minutes=5).total_seconds()) # does not raise ApiError for rate limited error github_comment_workflow(pr_3.id, self.project.id) assert cache.get(cache_key) is None - mock_metrics.incr.assert_called_with( + mock_integration_metrics.incr.assert_called_with( "github.pr_comment.error", tags={"type": "rate_limited_error"} ) - @patch( - "sentry.integrations.github.tasks.pr_comment.pr_to_issue_query", - return_value=[(0, 0, 0, [])], - ) - @patch("sentry.integrations.github.tasks.pr_comment.get_top_5_issues_by_count") - @patch("sentry.integrations.github.tasks.pr_comment.metrics") - def test_comment_workflow_missing_org(self, mock_metrics, mock_issues, mock_issue_query): + @patch_get_top_5_issues_by_count + @patch_metrics + @patch("sentry.models.Organization.objects") + def test_comment_workflow_missing_org(self, mock_repository, mock_metrics, mock_issues): # Organization.DoesNotExist should trigger the cache to release the key cache.set(self.cache_key, True, timedelta(minutes=5).total_seconds()) + + mock_repository.get_from_cache.side_effect = Organization.DoesNotExist + github_comment_workflow(self.pr.id, self.project.id) assert not mock_issues.called @@ -568,7 +412,7 @@ def test_comment_workflow_missing_org(self, mock_metrics, mock_issues, mock_issu "github.pr_comment.error", tags={"type": "missing_org"} ) - @patch("sentry.integrations.github.tasks.pr_comment.get_top_5_issues_by_count") + @patch_get_top_5_issues_by_count def test_comment_workflow_missing_org_option(self, mock_issues): OrganizationOption.objects.set_value( organization=self.organization, key="sentry:github_pr_bot", value=False @@ -577,9 +421,9 @@ def test_comment_workflow_missing_org_option(self, mock_issues): assert not mock_issues.called - @patch("sentry.integrations.github.tasks.pr_comment.get_top_5_issues_by_count") + @patch_get_top_5_issues_by_count @patch("sentry.models.Project.objects.get_from_cache") - @patch("sentry.integrations.github.tasks.pr_comment.metrics") + @patch_metrics def test_comment_workflow_missing_project(self, mock_metrics, mock_project, mock_issues): # Project.DoesNotExist should trigger the cache to release the key cache.set(self.cache_key, True, timedelta(minutes=5).total_seconds()) @@ -594,14 +438,12 @@ def test_comment_workflow_missing_project(self, mock_metrics, mock_project, mock "github.pr_comment.error", tags={"type": "missing_project"} ) - @patch( - "sentry.integrations.github.tasks.pr_comment.get_top_5_issues_by_count", - ) + @patch_get_top_5_issues_by_count @patch("sentry.models.Repository.objects") - @patch("sentry.integrations.github.tasks.pr_comment.format_comment") - @patch("sentry.integrations.github.tasks.pr_comment.metrics") + @patch_format_pr_comment + @patch_metrics def test_comment_workflow_missing_repo( - self, mock_metrics, mock_format_comment, mock_repository, mock_issues + self, mock_metrics, mock_format_pr_comment, mock_repository, mock_issues ): # Repository.DoesNotExist should trigger the cache to release the key cache.set(self.cache_key, True, timedelta(minutes=5).total_seconds()) @@ -613,20 +455,18 @@ def test_comment_workflow_missing_repo( {"group_id": g.id, "event_count": 10} for g in Group.objects.all() ] - assert mock_issues.called - assert not mock_format_comment.called + assert not mock_issues.called + assert not mock_format_pr_comment.called assert cache.get(self.cache_key) is None mock_metrics.incr.assert_called_with( "github.pr_comment.error", tags={"type": "missing_repo"} ) - @patch( - "sentry.integrations.github.tasks.pr_comment.get_top_5_issues_by_count", - ) - @patch("sentry.integrations.github.tasks.pr_comment.format_comment") - @patch("sentry.integrations.github.tasks.pr_comment.metrics") + @patch_get_top_5_issues_by_count + @patch_format_pr_comment + @patch_metrics def test_comment_workflow_missing_integration( - self, mock_metrics, mock_format_comment, mock_issues + self, mock_metrics, mock_format_pr_comment, mock_issues ): # missing integration should trigger the cache to release the key cache.set(self.cache_key, True, timedelta(minutes=5).total_seconds()) @@ -635,29 +475,25 @@ def test_comment_workflow_missing_integration( with assume_test_silo_mode_of(Integration): self.integration.update(status=ObjectStatus.DISABLED) - mock_issues.return_value = [ - {"group_id": g.id, "event_count": 10} for g in Group.objects.all() - ] - github_comment_workflow(self.pr.id, self.project.id) - assert mock_issues.called - assert not mock_format_comment.called + assert not mock_issues.called + assert not mock_format_pr_comment.called assert cache.get(self.cache_key) is None mock_metrics.incr.assert_called_with( "github.pr_comment.error", tags={"type": "missing_integration"} ) - @patch("sentry.integrations.github.tasks.pr_comment.get_top_5_issues_by_count") - @patch("sentry.integrations.github.tasks.pr_comment.format_comment") + @patch_get_top_5_issues_by_count + @patch_format_pr_comment @responses.activate - def test_comment_workflow_no_issues(self, mock_format_comment, mock_issues): + def test_comment_workflow_no_issues(self, mock_format_pr_comment, mock_issues): mock_issues.return_value = [] github_comment_workflow(self.pr.id, self.project.id) assert mock_issues.called - assert not mock_format_comment.called + assert not mock_format_pr_comment.called class TestCommentReactionsTask(GithubCommentTestCase): diff --git a/tests/sentry/integrations/source_code_management/test_commit_context.py b/tests/sentry/integrations/source_code_management/test_commit_context.py new file mode 100644 index 00000000000000..d18541eb5e0e32 --- /dev/null +++ b/tests/sentry/integrations/source_code_management/test_commit_context.py @@ -0,0 +1,907 @@ +import logging +from typing import Any +from unittest.mock import Mock, patch + +import pytest +from django.utils import timezone + +from sentry.integrations.gitlab.constants import GITLAB_CLOUD_BASE_URL +from sentry.integrations.source_code_management.commit_context import ( + CommitContextIntegration, + CommitContextOrganizationOptionKeys, + CommitContextReferrerIds, + CommitContextReferrers, + PullRequestFile, + PullRequestIssue, + SourceLineInfo, +) +from sentry.integrations.source_code_management.constants import STACKFRAME_COUNT +from sentry.integrations.types import EventLifecycleOutcome +from sentry.models.group import Group, GroupStatus +from sentry.models.organization import Organization +from sentry.models.pullrequest import PullRequest +from sentry.models.repository import Repository +from sentry.shared_integrations.exceptions import ApiError +from sentry.snuba.referrer import Referrer +from sentry.testutils.asserts import assert_failure_metric, assert_halt_metric, assert_slo_metric +from sentry.testutils.cases import SnubaTestCase, TestCase +from sentry.testutils.helpers.datetime import before_now +from sentry.types.referrer_ids import GITHUB_OPEN_PR_BOT_REFERRER, GITHUB_PR_BOT_REFERRER +from sentry.users.models.identity import Identity + + +class MockCommitContextIntegration(CommitContextIntegration): + """Mock implementation for testing""" + + integration_name = "mock_integration" + + def __init__(self): + self.client = Mock() + self.client.base_url = "https://example.com" + + def get_client(self): + return self.client + + commit_context_referrers = CommitContextReferrers( + pr_comment_bot=Referrer.GITHUB_PR_COMMENT_BOT, + ) + commit_context_referrer_ids = CommitContextReferrerIds( + pr_bot=GITHUB_PR_BOT_REFERRER, + open_pr_bot=GITHUB_OPEN_PR_BOT_REFERRER, + ) + commit_context_organization_option_keys = CommitContextOrganizationOptionKeys( + pr_bot="sentry:github_pr_bot", + ) + + def format_pr_comment(self, issue_ids: list[int]) -> str: + raise NotImplementedError + + def build_pr_comment_data( + self, + organization: Organization, + repo: Repository, + pr_key: str, + comment_body: str, + issue_ids: list[int], + ) -> dict[str, Any]: + raise NotImplementedError + + def queue_comment_task(self, pullrequest_id: int, project_id: int) -> None: + raise NotImplementedError + + def on_create_or_update_comment_error(self, api_error: ApiError, metrics_base: str) -> bool: + raise NotImplementedError + + def get_pr_files_safe_for_comment( + self, repo: Repository, pr: PullRequest + ) -> list[dict[str, str]]: + raise NotImplementedError + + def get_pr_files(self, pr_files: list[dict[str, str]]) -> list[PullRequestFile]: + raise NotImplementedError + + def format_open_pr_comment(self, issue_tables: list[str]) -> str: + raise NotImplementedError + + def format_issue_table( + self, + diff_filename: str, + issues: list[PullRequestIssue], + patch_parsers: dict[str, Any], + toggle: bool, + ) -> str: + raise NotImplementedError + + +class TestCommitContextIntegrationSLO(TestCase): + def setUp(self): + super().setUp() + self.integration = MockCommitContextIntegration() + self.repo = Repository.objects.create( + organization_id=self.organization.id, + name="example/repo", + ) + self.source_line = SourceLineInfo( + lineno=10, path="src/file.py", ref="main", repo=self.repo, code_mapping=Mock() + ) + + @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") + def test_get_blame_for_files_success(self, mock_record): + """Test successful blame retrieval records correct lifecycle events""" + self.integration.client.get_blame_for_files.return_value = [] + + result = self.integration.get_blame_for_files([self.source_line], {}) + + assert result == [] + assert_slo_metric(mock_record, EventLifecycleOutcome.SUCCESS) + + @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") + def test_get_blame_for_files_missing_identity(self, mock_record): + """Test missing identity records failure""" + self.integration.get_client = Mock(side_effect=Identity.DoesNotExist()) + + result = self.integration.get_blame_for_files([self.source_line], {}) + + assert result == [] + assert len(mock_record.mock_calls) == 2 + assert_slo_metric(mock_record, EventLifecycleOutcome.FAILURE) + assert_failure_metric(mock_record, Identity.DoesNotExist()) + + @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") + def test_get_blame_for_files_invalid_identity(self, mock_record): + """Test invalid identity records failure""" + from sentry.auth.exceptions import IdentityNotValid + + self.integration.client.get_blame_for_files = Mock(side_effect=IdentityNotValid()) + + result = self.integration.get_blame_for_files([self.source_line], {}) + + assert result == [] + assert_slo_metric(mock_record, EventLifecycleOutcome.FAILURE) + assert_failure_metric(mock_record, IdentityNotValid()) + + @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") + def test_get_blame_for_files_rate_limited(self, mock_record): + """Test rate limited requests record halt""" + from sentry.shared_integrations.exceptions import ApiRateLimitedError + + self.integration.client.get_blame_for_files = Mock( + side_effect=ApiRateLimitedError(text="Rate limited") + ) + + result = self.integration.get_blame_for_files([self.source_line], {}) + + assert result == [] + assert_slo_metric(mock_record, EventLifecycleOutcome.HALTED) + assert_halt_metric(mock_record, ApiRateLimitedError(text="Rate limited")) + + @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") + def test_get_blame_for_files_invalid_request(self, mock_record): + """Test invalid request records failure""" + from sentry.shared_integrations.exceptions import ApiInvalidRequestError + + self.integration.client.get_blame_for_files = Mock( + side_effect=ApiInvalidRequestError(text="Invalid request") + ) + + with pytest.raises(ApiInvalidRequestError): + self.integration.get_blame_for_files([self.source_line], {}) + + assert_slo_metric(mock_record, EventLifecycleOutcome.FAILURE) + assert_failure_metric(mock_record, ApiInvalidRequestError(text="Invalid request")) + + @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") + def test_get_blame_for_files_invalid_request_gitlab(self, mock_record): + """Test invalid request for GitLab records halt""" + from sentry.shared_integrations.exceptions import ApiInvalidRequestError + + class MockGitlabIntegration(MockCommitContextIntegration): + integration_name = "gitlab" + + self.integration = MockGitlabIntegration() + + self.integration.client.get_blame_for_files = Mock( + side_effect=ApiInvalidRequestError(text="Invalid request") + ) + + result = self.integration.get_blame_for_files([self.source_line], {}) + + assert result == [] + assert_slo_metric(mock_record, EventLifecycleOutcome.HALTED) + assert_halt_metric(mock_record, ApiInvalidRequestError(text="Invalid request")) + + @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") + def test_get_blame_for_files_retry_error(self, mock_record): + """Test retry error for Gitlab Self-hosted records halt""" + from sentry.shared_integrations.exceptions import ApiRetryError + + # Because this is Gitlab Self-hosted, this should be halt + class MockGitlabIntegration(MockCommitContextIntegration): + integration_name = "gitlab" + base_url = "https://bufo-bot.gitlab.com" + + def __init__(self): + super().__init__() + self.client.base_url = self.base_url + + self.integration = MockGitlabIntegration() + + self.integration.client.get_blame_for_files = Mock( + side_effect=ApiRetryError(text="Host error") + ) + + result = self.integration.get_blame_for_files([self.source_line], {}) + + assert result == [] + assert_slo_metric(mock_record, EventLifecycleOutcome.HALTED) + assert_halt_metric(mock_record, ApiRetryError(text="Host error")) + + @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") + def test_get_blame_for_files_retry_error_gitlab(self, mock_record): + """Test retry error for GitLab saas records failure""" + from sentry.shared_integrations.exceptions import ApiRetryError + + # Because this is Gitlab SAAS, this should be failure + class MockGitlabIntegration(MockCommitContextIntegration): + integration_name = "gitlab" + base_url = GITLAB_CLOUD_BASE_URL + + def __init__(self): + super().__init__() + self.client.base_url = self.base_url + + self.integration = MockGitlabIntegration() + + self.integration.client.get_blame_for_files = Mock( + side_effect=ApiRetryError(text="Host error") + ) + + with pytest.raises(ApiRetryError): + self.integration.get_blame_for_files([self.source_line], {}) + + assert_slo_metric(mock_record, EventLifecycleOutcome.FAILURE) + assert_failure_metric(mock_record, ApiRetryError(text="Host error")) + + @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") + def test_get_commit_context_all_frames(self, mock_record): + """Test get_commit_context_all_frames records correct lifecycle events""" + self.integration.client.get_blame_for_files.return_value = [] + + result = self.integration.get_commit_context_all_frames([self.source_line], {}) + + assert result == [] + assert len(mock_record.mock_calls) == 2 + assert_slo_metric(mock_record, EventLifecycleOutcome.SUCCESS) + + +class TestTop5IssuesByCount(TestCase, SnubaTestCase): + def setUp(self): + super().setUp() + self.integration_impl = MockCommitContextIntegration() + + def test_simple(self): + group1 = [ + self.store_event( + {"fingerprint": ["group-1"], "timestamp": before_now(days=1).isoformat()}, + project_id=self.project.id, + ) + for _ in range(3) + ][0].group.id + group2 = [ + self.store_event( + {"fingerprint": ["group-2"], "timestamp": before_now(days=1).isoformat()}, + project_id=self.project.id, + ) + for _ in range(6) + ][0].group.id + group3 = [ + self.store_event( + {"fingerprint": ["group-3"], "timestamp": before_now(days=1).isoformat()}, + project_id=self.project.id, + ) + for _ in range(4) + ][0].group.id + res = self.integration_impl.get_top_5_issues_by_count( + [group1, group2, group3], self.project + ) + assert [issue["group_id"] for issue in res] == [group2, group3, group1] + + def test_over_5_issues(self): + issue_ids = [ + self.store_event( + {"fingerprint": [f"group-{idx}"], "timestamp": before_now(days=1).isoformat()}, + project_id=self.project.id, + ).group.id + for idx in range(6) + ] + res = self.integration_impl.get_top_5_issues_by_count(issue_ids, self.project) + assert len(res) == 5 + + def test_ignore_info_level_issues(self): + group1 = [ + self.store_event( + { + "fingerprint": ["group-1"], + "timestamp": before_now(days=1).isoformat(), + "level": logging.INFO, + }, + project_id=self.project.id, + ) + for _ in range(3) + ][0].group.id + group2 = [ + self.store_event( + {"fingerprint": ["group-2"], "timestamp": before_now(days=1).isoformat()}, + project_id=self.project.id, + ) + for _ in range(6) + ][0].group.id + group3 = [ + self.store_event( + { + "fingerprint": ["group-3"], + "timestamp": before_now(days=1).isoformat(), + "level": logging.INFO, + }, + project_id=self.project.id, + ) + for _ in range(4) + ][0].group.id + res = self.integration_impl.get_top_5_issues_by_count( + [group1, group2, group3], self.project + ) + assert [issue["group_id"] for issue in res] == [group2] + + def test_do_not_ignore_other_issues(self): + group1 = [ + self.store_event( + { + "fingerprint": ["group-1"], + "timestamp": before_now(days=1).isoformat(), + "level": logging.ERROR, + }, + project_id=self.project.id, + ) + for _ in range(3) + ][0].group.id + group2 = [ + self.store_event( + { + "fingerprint": ["group-2"], + "timestamp": before_now(days=1).isoformat(), + "level": logging.INFO, + }, + project_id=self.project.id, + ) + for _ in range(6) + ][0].group.id + group3 = [ + self.store_event( + { + "fingerprint": ["group-3"], + "timestamp": before_now(days=1).isoformat(), + "level": logging.DEBUG, + }, + project_id=self.project.id, + ) + for _ in range(4) + ][0].group.id + res = self.integration_impl.get_top_5_issues_by_count( + [group1, group2, group3], self.project + ) + assert [issue["group_id"] for issue in res] == [group3, group1] + + +class CreateEventTestCase(TestCase): + def _create_event( + self, + culprit=None, + timestamp=None, + filenames=None, + function_names=None, + project_id=None, + user_id=None, + handled=False, + ): + if culprit is None: + culprit = "issue0" + if timestamp is None: + timestamp = before_now(seconds=5).isoformat() + if filenames is None: + filenames = ["foo.py", "baz.py"] + if function_names is None: + function_names = ["hello", "world"] + if project_id is None: + project_id = self.project.id + + assert len(function_names) == len(filenames) + + frames = [] + for i, filename in enumerate(filenames): + frames.append({"filename": filename, "function": function_names[i]}) + + return self.store_event( + data={ + "message": "hello!", + "culprit": culprit, + "platform": "python", + "timestamp": timestamp, + "exception": { + "values": [ + { + "type": "Error", + "stacktrace": { + "frames": frames, + }, + "mechanism": {"handled": handled, "type": "generic"}, + }, + ] + }, + "user": {"id": user_id}, + }, + project_id=project_id, + ) + + +class TestGetCommentIssues(CreateEventTestCase): + def setUp(self): + self.group_id = [self._create_event(user_id=str(i)) for i in range(6)][0].group.id + self.another_org = self.create_organization() + self.another_org_project = self.create_project(organization=self.another_org) + + self.installation_impl = MockCommitContextIntegration() + + def test_simple(self): + group_id = [ + self._create_event(function_names=["blue", "planet"], user_id=str(i)) for i in range(7) + ][0].group.id + top_5_issues = self.installation_impl.get_top_5_issues_by_count_for_file( + projects=[self.project], sentry_filenames=["baz.py"], function_names=["world", "planet"] + ) + + top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] + function_names = [issue["function_name"] for issue in top_5_issues] + assert top_5_issue_ids == [group_id, self.group_id] + assert function_names == ["planet", "world"] + + def test_javascript_simple(self): + # should match function name exactly or className.functionName + group_id_1 = [ + self._create_event( + function_names=["other.planet", "component.blue"], + filenames=["baz.js", "foo.js"], + user_id=str(i), + ) + for i in range(7) + ][0].group.id + group_id_2 = [ + self._create_event( + function_names=["component.blue", "world"], + filenames=["foo.js", "baz.js"], + user_id=str(i), + ) + for i in range(6) + ][0].group.id + top_5_issues = self.installation_impl.get_top_5_issues_by_count_for_file( + projects=[self.project], sentry_filenames=["baz.js"], function_names=["world", "planet"] + ) + top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] + function_names = [issue["function_name"] for issue in top_5_issues] + assert top_5_issue_ids == [group_id_1, group_id_2] + assert function_names == ["other.planet", "world"] + + def test_php_simple(self): + # should match function name exactly or namespace::functionName + group_id_1 = [ + self._create_event( + function_names=["namespace/other/test::planet", "test/component::blue"], + filenames=["baz.php", "foo.php"], + user_id=str(i), + ) + for i in range(7) + ][0].group.id + group_id_2 = [ + self._create_event( + function_names=["test/component::blue", "world"], + filenames=["foo.php", "baz.php"], + user_id=str(i), + ) + for i in range(6) + ][0].group.id + top_5_issues = self.installation_impl.get_top_5_issues_by_count_for_file( + projects=[self.project], + sentry_filenames=["baz.php"], + function_names=["world", "planet"], + ) + top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] + function_names = [issue["function_name"] for issue in top_5_issues] + assert top_5_issue_ids == [group_id_1, group_id_2] + assert function_names == ["namespace/other/test::planet", "world"] + + def test_ruby_simple(self): + # should match function name exactly or class.functionName + group_id_1 = [ + self._create_event( + function_names=["test.planet", "test/component.blue"], + filenames=["baz.rb", "foo.rb"], + user_id=str(i), + ) + for i in range(7) + ][0].group.id + group_id_2 = [ + self._create_event( + function_names=["test/component.blue", "world"], + filenames=["foo.rb", "baz.rb"], + user_id=str(i), + ) + for i in range(6) + ][0].group.id + top_5_issues = self.installation_impl.get_top_5_issues_by_count_for_file( + projects=[self.project], sentry_filenames=["baz.rb"], function_names=["world", "planet"] + ) + top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] + function_names = [issue["function_name"] for issue in top_5_issues] + assert top_5_issue_ids == [group_id_1, group_id_2] + assert function_names == ["test.planet", "world"] + + def test_filters_resolved_issue(self): + group = Group.objects.all()[0] + group.resolved_at = timezone.now() + group.status = GroupStatus.RESOLVED + group.substatus = None + group.save() + + top_5_issues = self.installation_impl.get_top_5_issues_by_count_for_file( + projects=[self.project], sentry_filenames=["baz.py"], function_names=["world"] + ) + assert len(top_5_issues) == 0 + + def test_filters_handled_issue(self): + group_id = self._create_event(filenames=["bar.py", "baz.py"], handled=True).group.id + + top_5_issues = self.installation_impl.get_top_5_issues_by_count_for_file( + projects=[self.project], sentry_filenames=["baz.py"], function_names=["world"] + ) + top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] + assert group_id != self.group_id + assert top_5_issue_ids == [self.group_id] + + def test_project_group_id_mismatch(self): + # we fetch all group_ids that belong to the projects passed into the function + self._create_event(project_id=self.another_org_project.id) + + top_5_issues = self.installation_impl.get_top_5_issues_by_count_for_file( + projects=[self.project], sentry_filenames=["baz.py"], function_names=["world"] + ) + top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] + assert top_5_issue_ids == [self.group_id] + + def test_filename_mismatch(self): + group_id = self._create_event( + filenames=["foo.py", "bar.py"], + ).group.id + + top_5_issues = self.installation_impl.get_top_5_issues_by_count_for_file( + projects=[self.project], sentry_filenames=["baz.py"], function_names=["world"] + ) + top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] + assert group_id != self.group_id + assert top_5_issue_ids == [self.group_id] + + def test_function_name_mismatch(self): + group_id = self._create_event( + function_names=["world", "hello"], + ).group.id + + top_5_issues = self.installation_impl.get_top_5_issues_by_count_for_file( + projects=[self.project], sentry_filenames=["baz.py"], function_names=["world"] + ) + top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] + assert group_id != self.group_id + assert top_5_issue_ids == [self.group_id] + + def test_not_first_frame(self): + group_id = self._create_event( + function_names=["world", "hello"], filenames=["baz.py", "bar.py"], culprit="hi" + ).group.id + + top_5_issues = self.installation_impl.get_top_5_issues_by_count_for_file( + projects=[self.project], sentry_filenames=["baz.py"], function_names=["world"] + ) + top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] + function_names = [issue["function_name"] for issue in top_5_issues] + assert group_id != self.group_id + assert top_5_issue_ids == [self.group_id, group_id] + assert function_names == ["world", "world"] + + def test_not_within_frame_limit(self): + function_names = ["world"] + ["a" for _ in range(STACKFRAME_COUNT)] + filenames = ["baz.py"] + ["foo.py" for _ in range(STACKFRAME_COUNT)] + group_id = self._create_event(function_names=function_names, filenames=filenames).group.id + + top_5_issues = self.installation_impl.get_top_5_issues_by_count_for_file( + projects=[self.project], sentry_filenames=["baz.py"], function_names=["world"] + ) + top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] + assert group_id != self.group_id + assert top_5_issue_ids == [self.group_id] + + def test_event_too_old(self): + group_id = self._create_event( + timestamp=before_now(days=15).isoformat(), filenames=["bar.py", "baz.py"] + ).group.id + + top_5_issues = self.installation_impl.get_top_5_issues_by_count_for_file( + projects=[self.project], sentry_filenames=["baz.py"], function_names=["world"] + ) + top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] + assert group_id != self.group_id + assert top_5_issue_ids == [self.group_id] + + def test_squashes_same_title_culprit_issues(self): + # both of these have the same title and culprit, + # so "squash" them and return the one with greater number of events + [ + self._create_event( + filenames=["base.py", "baz.py"], + function_names=["wonderful", "world"], + user_id=str(i), + handled=False, + ) + for i in range(3) + ] + group_id = [ + self._create_event( + filenames=["bar.py", "baz.py"], + function_names=["blue", "planet"], + user_id=str(i), + handled=False, + ) + for i in range(5) + ][0].group_id + + top_5_issues = self.installation_impl.get_top_5_issues_by_count_for_file( + projects=[self.project], sentry_filenames=["baz.py"], function_names=["world", "planet"] + ) + top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] + function_names = [issue["function_name"] for issue in top_5_issues] + + assert top_5_issue_ids == [self.group_id, group_id] + assert function_names == ["world", "planet"] + + def test_fetches_top_five_issues(self): + group_id_1 = [ + self._create_event( + filenames=["bar.py", "baz.py"], + function_names=["blue", "planet"], + user_id=str(i), + handled=False, + ) + for i in range(5) + ][0].group.id + [ + self._create_event( + filenames=["hello.py", "baz.py"], + function_names=["green", "planet"], + user_id=str(i), + handled=True, + ) + for i in range(4) + ] + group_id_3 = [ + self._create_event( + filenames=["base.py", "baz.py"], + function_names=["wonderful", "world"], + user_id=str(i), + handled=False, + culprit="hi", + ) + for i in range(3) + ][0].group.id + [ + self._create_event( + filenames=["nom.py", "baz.py"], + function_names=["jurassic", "world"], + user_id=str(i), + handled=True, + ) + for i in range(2) + ] + # 6th issue + self._create_event( + filenames=["nan.py", "baz.py"], function_names=["my_own", "world"], handled=True + ) + # unrelated issue with same stack trace in different project + self._create_event(project_id=self.another_org_project.id) + + top_5_issues = self.installation_impl.get_top_5_issues_by_count_for_file( + projects=[self.project], sentry_filenames=["baz.py"], function_names=["world", "planet"] + ) + top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] + function_names = [issue["function_name"] for issue in top_5_issues] + + # filters handled issues + assert top_5_issue_ids == [self.group_id, group_id_1, group_id_3] + assert function_names == ["world", "planet", "world"] + + def test_get_issue_table_contents(self): + group_id_1 = [ + self._create_event( + culprit="issue1", + filenames=["bar.py", "baz.py"], + function_names=["blue", "planet"], + user_id=str(i), + handled=False, + ) + for i in range(5) + ][0].group.id + group_id_2 = [ + self._create_event( + culprit="issue2", + filenames=["hello.py", "baz.py"], + function_names=["green", "planet"], + user_id=str(i), + handled=False, + ) + for i in range(4) + ][0].group.id + group_id_3 = [ + self._create_event( + culprit="issue3", + filenames=["base.py", "baz.py"], + function_names=["wonderful", "world"], + user_id=str(i), + handled=False, + ) + for i in range(3) + ][0].group.id + group_id_4 = [ + self._create_event( + culprit="issue4", + filenames=["nom.py", "baz.py"], + function_names=["jurassic", "world"], + user_id=str(i), + handled=False, + ) + for i in range(2) + ][0].group.id + + top_5_issues = self.installation_impl.get_top_5_issues_by_count_for_file( + projects=[self.project], sentry_filenames=["baz.py"], function_names=["world", "planet"] + ) + affected_users = [6, 5, 4, 3, 2] + event_count = [issue["event_count"] for issue in top_5_issues] + function_names = [issue["function_name"] for issue in top_5_issues] + + comment_table_contents = self.installation_impl.get_issue_table_contents(top_5_issues) + group_ids = [self.group_id, group_id_1, group_id_2, group_id_3, group_id_4] + + for i in range(5): + subtitle = "issue" + str(i) + assert ( + PullRequestIssue( + title="Error", + subtitle=subtitle, + url=f"http://testserver/organizations/{self.organization.slug}/issues/{group_ids[i]}/", + affected_users=affected_users[i], + event_count=event_count[i], + function_name=function_names[i], + ) + in comment_table_contents + ) + + +class TestGetProjectsAndFilenamesFromSourceFile(TestCase): + def setUp(self): + super().setUp() + + self.repo = self.create_repo( + name="getsentry/sentry", + provider="integrations:mock_integration", + integration_id=self.integration.id, + project=self.project, + url="https://github.com/getsentry/sentry", + ) + + self.another_org = self.create_organization() + self.another_org_project = self.create_project(organization=self.another_org) + self.another_org_integration = self.create_integration( + organization=self.another_org, external_id="1", provider="mock_integration" + ) + self.another_org_repo = self.create_repo( + name="getsentry/sentree", + provider="integrations:mock_integration", + integration_id=self.another_org_integration.id, + project=self.another_org_project, + url="https://github.com/getsentry/sentree", + ) + + self.integration_impl = MockCommitContextIntegration() + + def test_get_projects_and_filenames_from_source_file(self): + projects = [self.create_project() for _ in range(4)] + + source_stack_pairs = [ + ("", "./"), + ("src/sentry", "sentry/"), + ("src/", ""), + ("src/sentry/", "sentry/"), + ] + for i, pair in enumerate(source_stack_pairs): + source_root, stack_root = pair + self.create_code_mapping( + project=projects[i], + repo=self.repo, + source_root=source_root, + stack_root=stack_root, + default_branch="master", + ) + + # matching code mapping from a different org + other_org_code_mapping = self.create_code_mapping( + project=self.another_org_project, + repo=self.another_org_repo, + source_root="", + stack_root="./", + ) + other_org_code_mapping.organization_id = self.another_org.id + other_org_code_mapping.save() + + source_stack_nonmatches = [ + ("/src/sentry", "sentry"), + ("tests/", "tests/"), + ("app/", "static/app"), + ("tasks/integrations", "tasks"), # random match in the middle of the string + ] + for source_root, stack_root in source_stack_nonmatches: + self.create_code_mapping( + project=self.create_project(), + repo=self.repo, + source_root=source_root, + stack_root=stack_root, + default_branch="master", + ) + + filename = "src/sentry/tasks/integrations/github/open_pr_comment.py" + correct_filenames = [ + "./src/sentry/tasks/integrations/github/open_pr_comment.py", + "sentry//tasks/integrations/github/open_pr_comment.py", + "sentry/tasks/integrations/github/open_pr_comment.py", + ] + + project_list, sentry_filenames = ( + self.integration_impl.get_projects_and_filenames_from_source_file( + organization=self.organization, repo=self.repo, pr_filename=filename + ) + ) + assert project_list == set(projects) + assert sentry_filenames == set(correct_filenames) + + def test_get_projects_and_filenames_from_source_file_filters_repo(self): + projects = [self.create_project() for _ in range(3)] + + source_stack_pairs = [ + ("src/sentry", "sentry/"), + ("src/", ""), + ("src/sentry/", "sentry/"), + ] + for i, pair in enumerate(source_stack_pairs): + source_root, stack_root = pair + self.create_code_mapping( + project=projects[i], + repo=self.repo, + source_root=source_root, + stack_root=stack_root, + default_branch="master", + ) + + # other codemapping in different repo, will not match + project = self.create_project() + repo = self.create_repo( + name="getsentry/santry", + provider="integrations:github", + integration_id=self.integration.id, + project=project, + url="https://github.com/getsentry/santry", + ) + self.create_code_mapping( + project=project, + repo=repo, + source_root="", + stack_root="./", + default_branch="master", + ) + + filename = "src/sentry/tasks/integrations/github/open_pr_comment.py" + correct_filenames = [ + "sentry//tasks/integrations/github/open_pr_comment.py", + "sentry/tasks/integrations/github/open_pr_comment.py", + ] + + project_list, sentry_filenames = ( + self.integration_impl.get_projects_and_filenames_from_source_file( + organization=self.organization, repo=self.repo, pr_filename=filename + ) + ) + assert project_list == set(projects) + assert sentry_filenames == set(correct_filenames) diff --git a/tests/sentry/integrations/source_code_management/test_commit_context_slo.py b/tests/sentry/integrations/source_code_management/test_commit_context_slo.py deleted file mode 100644 index 255844727eba9b..00000000000000 --- a/tests/sentry/integrations/source_code_management/test_commit_context_slo.py +++ /dev/null @@ -1,188 +0,0 @@ -from unittest.mock import Mock, patch - -import pytest - -from sentry.integrations.gitlab.constants import GITLAB_CLOUD_BASE_URL -from sentry.integrations.source_code_management.commit_context import ( - CommitContextIntegration, - SourceLineInfo, -) -from sentry.integrations.types import EventLifecycleOutcome -from sentry.models.repository import Repository -from sentry.testutils.asserts import assert_failure_metric, assert_halt_metric, assert_slo_metric -from sentry.testutils.cases import TestCase -from sentry.users.models.identity import Identity - - -class MockCommitContextIntegration(CommitContextIntegration): - """Mock implementation for testing""" - - integration_name = "mock_integration" - - def __init__(self): - self.client = Mock() - self.client.base_url = "https://example.com" - - def get_client(self): - return self.client - - -class CommitContextIntegrationTest(TestCase): - def setUp(self): - super().setUp() - self.integration = MockCommitContextIntegration() - self.repo = Repository.objects.create( - organization_id=self.organization.id, - name="example/repo", - ) - self.source_line = SourceLineInfo( - lineno=10, path="src/file.py", ref="main", repo=self.repo, code_mapping=Mock() - ) - - @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") - def test_get_blame_for_files_success(self, mock_record): - """Test successful blame retrieval records correct lifecycle events""" - self.integration.client.get_blame_for_files.return_value = [] - - result = self.integration.get_blame_for_files([self.source_line], {}) - - assert result == [] - assert_slo_metric(mock_record, EventLifecycleOutcome.SUCCESS) - - @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") - def test_get_blame_for_files_missing_identity(self, mock_record): - """Test missing identity records failure""" - self.integration.get_client = Mock(side_effect=Identity.DoesNotExist()) - - result = self.integration.get_blame_for_files([self.source_line], {}) - - assert result == [] - assert len(mock_record.mock_calls) == 2 - assert_slo_metric(mock_record, EventLifecycleOutcome.FAILURE) - assert_failure_metric(mock_record, Identity.DoesNotExist()) - - @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") - def test_get_blame_for_files_invalid_identity(self, mock_record): - """Test invalid identity records failure""" - from sentry.auth.exceptions import IdentityNotValid - - self.integration.client.get_blame_for_files = Mock(side_effect=IdentityNotValid()) - - result = self.integration.get_blame_for_files([self.source_line], {}) - - assert result == [] - assert_slo_metric(mock_record, EventLifecycleOutcome.FAILURE) - assert_failure_metric(mock_record, IdentityNotValid()) - - @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") - def test_get_blame_for_files_rate_limited(self, mock_record): - """Test rate limited requests record halt""" - from sentry.shared_integrations.exceptions import ApiRateLimitedError - - self.integration.client.get_blame_for_files = Mock( - side_effect=ApiRateLimitedError(text="Rate limited") - ) - - result = self.integration.get_blame_for_files([self.source_line], {}) - - assert result == [] - assert_slo_metric(mock_record, EventLifecycleOutcome.HALTED) - assert_halt_metric(mock_record, ApiRateLimitedError(text="Rate limited")) - - @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") - def test_get_blame_for_files_invalid_request(self, mock_record): - """Test invalid request records failure""" - from sentry.shared_integrations.exceptions import ApiInvalidRequestError - - self.integration.client.get_blame_for_files = Mock( - side_effect=ApiInvalidRequestError(text="Invalid request") - ) - - with pytest.raises(ApiInvalidRequestError): - self.integration.get_blame_for_files([self.source_line], {}) - - assert_slo_metric(mock_record, EventLifecycleOutcome.FAILURE) - assert_failure_metric(mock_record, ApiInvalidRequestError(text="Invalid request")) - - @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") - def test_get_blame_for_files_invalid_request_gitlab(self, mock_record): - """Test invalid request for GitLab records halt""" - from sentry.shared_integrations.exceptions import ApiInvalidRequestError - - class MockGitlabIntegration(MockCommitContextIntegration): - integration_name = "gitlab" - - self.integration = MockGitlabIntegration() - - self.integration.client.get_blame_for_files = Mock( - side_effect=ApiInvalidRequestError(text="Invalid request") - ) - - result = self.integration.get_blame_for_files([self.source_line], {}) - - assert result == [] - assert_slo_metric(mock_record, EventLifecycleOutcome.HALTED) - assert_halt_metric(mock_record, ApiInvalidRequestError(text="Invalid request")) - - @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") - def test_get_blame_for_files_retry_error(self, mock_record): - """Test retry error for Gitlab Self-hosted records halt""" - from sentry.shared_integrations.exceptions import ApiRetryError - - # Because this is Gitlab Self-hosted, this should be halt - class MockGitlabIntegration(MockCommitContextIntegration): - integration_name = "gitlab" - base_url = "https://bufo-bot.gitlab.com" - - def __init__(self): - super().__init__() - self.client.base_url = self.base_url - - self.integration = MockGitlabIntegration() - - self.integration.client.get_blame_for_files = Mock( - side_effect=ApiRetryError(text="Host error") - ) - - result = self.integration.get_blame_for_files([self.source_line], {}) - - assert result == [] - assert_slo_metric(mock_record, EventLifecycleOutcome.HALTED) - assert_halt_metric(mock_record, ApiRetryError(text="Host error")) - - @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") - def test_get_blame_for_files_retry_error_gitlab(self, mock_record): - """Test retry error for GitLab saas records failure""" - from sentry.shared_integrations.exceptions import ApiRetryError - - # Because this is Gitlab SAAS, this should be failure - class MockGitlabIntegration(MockCommitContextIntegration): - integration_name = "gitlab" - base_url = GITLAB_CLOUD_BASE_URL - - def __init__(self): - super().__init__() - self.client.base_url = self.base_url - - self.integration = MockGitlabIntegration() - - self.integration.client.get_blame_for_files = Mock( - side_effect=ApiRetryError(text="Host error") - ) - - with pytest.raises(ApiRetryError): - self.integration.get_blame_for_files([self.source_line], {}) - - assert_slo_metric(mock_record, EventLifecycleOutcome.FAILURE) - assert_failure_metric(mock_record, ApiRetryError(text="Host error")) - - @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") - def test_get_commit_context_all_frames(self, mock_record): - """Test get_commit_context_all_frames records correct lifecycle events""" - self.integration.client.get_blame_for_files.return_value = [] - - result = self.integration.get_commit_context_all_frames([self.source_line], {}) - - assert result == [] - assert len(mock_record.mock_calls) == 2 - assert_slo_metric(mock_record, EventLifecycleOutcome.SUCCESS)