getsentry · Zylphrex · Sep 20, 2023 · Sep 15, 2023 · Sep 18, 2023 · Sep 18, 2023
@@ -1,9 +1,31 @@
+from typing import List, TypedDict
+
 from django.conf import settings
 from urllib3 import Retry
 
 from sentry.net.http import connection_from_url
 from sentry.utils import json
 
+
+class BreakpointData(TypedDict):
+    project: str
+    # For legacy reasons, the group name is always
+    # transaction even when working with functions.
+    transaction: str
+    aggregate_range_1: float
+    aggregate_range_2: float
+    unweighted_t_value: float
+    unweighted_p_value: float
+    trend_percentage: float
+    absolute_percentage_change: float
+    trend_difference: float
+    breakpoint: int
+
+
+class BreakpointResponse(TypedDict):
+    data: List[BreakpointData]
+
+
 seer_connection_pool = connection_from_url(
     settings.ANOMALY_DETECTION_URL,
     retries=Retry(
@@ -14,7 +36,7 @@
 )
 
 
-def detect_breakpoints(breakpoint_request):
+def detect_breakpoints(breakpoint_request) -> BreakpointResponse:
     response = seer_connection_pool.urlopen(
         "POST",
         "/trends/breakpoint-detector",

diff --git a/src/sentry/snuba/referrer.py b/src/sentry/snuba/referrer.py
@@ -341,6 +341,9 @@ class Referrer(Enum):
     API_PROFILING_FUNCTION_TRENDS_TOP_EVENTS = "api.profiling.function-trends.top-events"
     API_PROFILING_FUNCTION_TRENDS_STATS = "api.profiling.function-trends.stats"
     API_PROFILING_FUNCTIONS_STATISTICAL_DETECTOR = "api.profiling.functions.statistical-detector"
+    API_PROFILING_FUNCTIONS_STATISTICAL_DETECTOR_STATS = (
+        "api.profiling.functions.statistical-detector.stats"
+    )
     API_PROJECT_EVENTS = "api.project-events"
     API_RELEASES_RELEASE_DETAILS_CHART = "api.releases.release-details-chart"
     API_REPLAY_DETAILS_PAGE = "api.replay.details-page"

@@ -23,8 +23,13 @@
 )
 
 from sentry import options
+from sentry.api.serializers.snuba import SnubaTSResultSerializer
 from sentry.constants import ObjectStatus
 from sentry.models.project import Project
+from sentry.search.events.builder import ProfileTopFunctionsTimeseriesQueryBuilder
+from sentry.search.events.fields import get_function_alias
+from sentry.search.events.types import QueryBuilderConfig
+from sentry.seer.utils import BreakpointData, detect_breakpoints
 from sentry.sentry_metrics import indexer
 from sentry.sentry_metrics.use_case_id_registry import UseCaseID
 from sentry.snuba import functions
@@ -153,20 +158,36 @@ def detect_function_trends(project_ids: List[int], start: datetime, *args, **kwa
     max_retries=0,
 )
 def detect_function_change_points(
-    functions: List[Tuple[int, str | int]], start: datetime, *args, **kwargs
+    functions_list: List[Tuple[int, int]], start: datetime, *args, **kwargs
 ) -> None:
-    for project_id, function_id in functions:
+    breakpoint_count = 0
+
+    breakpoints = _detect_function_change_points(functions_list, start)
+
+    for entry in breakpoints:
+        breakpoint_count += 1
+
         with sentry_sdk.push_scope() as scope:
-            scope.set_tag("regressed_project_id", project_id)
-            scope.set_tag("regressed_function_id", function_id)
+            scope.set_tag("regressed_project_id", entry["project"])
+            # the service was originally meant for transactions so this
+            # naming is a result of this
+            scope.set_tag("regressed_function_id", entry["transaction"])
             scope.set_context(
                 "statistical_detectors",
                 {
+                    **entry,
                     "timestamp": start.isoformat(),
+                    "breakpoint": datetime.fromtimestamp(entry["breakpoint"]),
                 },
             )
             sentry_sdk.capture_message("Potential Regression")
 
+    metrics.incr(
+        "statistical_detectors.breakpoint.functions",
+        amount=breakpoint_count,
+        sample_rate=1.0,
+    )
+
 
 def _detect_function_trends(
     project_ids: List[int], start: datetime
@@ -238,6 +259,38 @@ def _detect_function_trends(
     )
 
 
+def _detect_function_change_points(
+    functions_list: List[Tuple[int, int]],
+    start: datetime,
+) -> Generator[BreakpointData, None, None]:
+    serializer = SnubaTSResultSerializer(None, None, None)
+
+    trend_function = "p95()"
+
+    for chunk in chunked(query_functions_timeseries(functions_list, start, trend_function), 10):
+        data = {}
+        for project_id, fingerprint, timeseries in chunk:
+            serialized = serializer.serialize(timeseries, get_function_alias(trend_function))
+            data[f"{project_id},{fingerprint}"] = {
+                "data": serialized["data"],
+                "data_start": serialized["start"],
+                "data_end": serialized["end"],
+                # only look at the last 24 hours as the request data
+                "request_start": serialized["end"] - 24 * 60 * 60,
+                "request_end": serialized["end"],
+            }
+
+        request = {
+            "data": data,
+            "sort": "-trend_percentage()",
+            "trendFunction": trend_function,
+        }
+
+        breakpoints = detect_breakpoints(request)["data"]
+
+        yield from breakpoints
+
+
 def all_function_payloads(
     project_ids: List[int],
     start: datetime,
@@ -363,7 +416,18 @@ def query_transactions(
 
 
 def query_functions(projects: List[Project], start: datetime) -> List[DetectorPayload]:
-    params = _get_function_query_params(projects, start)
+    # The functions dataset only supports 1 hour granularity.
+    # So we always look back at the last full hour that just elapsed.
+    # And since the timestamps are truncated to the start of the hour
+    # we just need to query for the 1 minute of data.
+    start = start - timedelta(hours=1)
+    start = start.replace(minute=0, second=0, microsecond=0)
+    params: Dict[str, Any] = {
+        "start": start,
+        "end": start + timedelta(minutes=1),
+        "project_id": [project.id for project in projects],
+        "project_objects": projects,
+    }
 
     # TODOs: handle any errors
     query_results = functions.query(
@@ -397,17 +461,69 @@ def query_functions(projects: List[Project], start: datetime) -> List[DetectorPa
     ]
 
 
-def _get_function_query_params(projects: List[Project], start: datetime) -> Dict[str, Any]:
-    # The functions dataset only supports 1 hour granularity.
-    # So we always look back at the last full hour that just elapsed.
-    # And since the timestamps are truncated to the start of the hour
-    # we just need to query for the 1 minute of data.
-    start = start - timedelta(hours=1)
-    start = start.replace(minute=0, second=0, microsecond=0)
-
-    return {
-        "start": start,
-        "end": start + timedelta(minutes=1),
-        "project_id": [project.id for project in projects],
+def query_functions_timeseries(
+    functions_list: List[Tuple[int, int]],
+    start: datetime,
+    agg_function: str,
+) -> Generator[Tuple[int, int, Any], None, None]:
+    project_ids = [project_id for project_id, _ in functions_list]
+    projects = Project.objects.filter(id__in=project_ids)
+
+    # take the last 14 days as our window
+    end = start.replace(minute=0, second=0, microsecond=0) + timedelta(hours=1)
+    params: Dict[str, Any] = {
+        "start": end - timedelta(days=14),
+        "end": end,
+        "project_id": project_ids,
         "project_objects": projects,
     }
+    interval = 3600  # 1 hour
+
+    # make sure that each chunk can fit in the 10,000 row limit
+    # imposed by snuba
+    for functions_chunk in chunked(functions_list, 25):
+        chunk: List[Dict[str, Any]] = [
+            {
+                "project.id": project_id,
+                "fingerprint": fingerprint,
+            }
+            for project_id, fingerprint in functions_chunk
+        ]
+
+        builder = ProfileTopFunctionsTimeseriesQueryBuilder(
+            dataset=Dataset.Functions,
+            params=params,
+            interval=interval,
+            top_events=chunk,
+            other=False,
+            query="is_application:1",
+            selected_columns=["project.id", "fingerprint"],
+            timeseries_columns=[agg_function],
+            config=QueryBuilderConfig(
+                skip_tag_resolution=True,
+            ),
+        )
+        raw_results = raw_snql_query(
+            builder.get_snql_query(),
+            referrer=Referrer.API_PROFILING_FUNCTIONS_STATISTICAL_DETECTOR_STATS.value,
+        )
+
+        results = functions.format_top_events_timeseries_results(
+            raw_results,
+            builder,
+            params,
+            interval,
+            top_events={"data": chunk},
+            result_key_order=["project.id", "fingerprint"],
+        )
+
+        for project_id, fingerprint in functions_chunk:
+            key = f"{project_id},{fingerprint}"
+            if key not in results:
+                logger.warning(
+                    "Missing timeseries for project: {} function: {}",
+                    project_id,
+                    fingerprint,
+                )
+                continue
+            yield project_id, fingerprint, results[key]
@@ -10,6 +10,7 @@
 from sentry.snuba.metrics.naming_layer.mri import TransactionMRI
 from sentry.statistical_detectors.detector import DetectorPayload
 from sentry.tasks.statistical_detectors import (
+    detect_function_change_points,
     detect_function_trends,
     detect_transaction_trends,
     query_functions,
@@ -260,24 +261,68 @@ def test_detect_function_trends(
     assert detect_function_change_points.delay.called
 
 
+@mock.patch("sentry.tasks.statistical_detectors.detect_breakpoints")
+@mock.patch("sentry.tasks.statistical_detectors.raw_snql_query")
+@django_db_all
+def test_detect_function_change_points(
+    mock_raw_snql_query, mock_detect_breakpoints, timestamp, project
+):
+    start_of_hour = timestamp.replace(minute=0, second=0, microsecond=0, tzinfo=timezone.utc)
+
+    fingerprint = 12345
+
+    mock_raw_snql_query.return_value = {
+        "data": [
+            {
+                "time": (start_of_hour - timedelta(days=day, hours=hour)).isoformat(),
+                "project.id": project.id,
+                "fingerprint": fingerprint,
+                "p95": 2 if day < 1 and hour < 8 else 1,
+            }
+            for day in reversed(range(14))
+            for hour in reversed(range(24))
+        ]
+    }
+
+    mock_detect_breakpoints.return_value = {
+        "data": [
+            {
+                "absolute_percentage_change": 5.0,
+                "aggregate_range_1": 100000000.0,
+                "aggregate_range_2": 500000000.0,
+                "breakpoint": 1687323600,
+                "change": "regression",
+                "project": str(project.id),
+                "transaction": str(fingerprint),
+                "trend_difference": 400000000.0,
+                "trend_percentage": 5.0,
+                "unweighted_p_value": 0.0,
+                "unweighted_t_value": -float("inf"),
+            },
+        ]
+    }
+
+    detect_function_change_points([(project.id, fingerprint)], timestamp)
+
+
 @region_silo_test(stable=True)
-class FunctionsQueryTest(ProfilesSnubaTestCase):
+class FunctionsTasksTest(ProfilesSnubaTestCase):
     def setUp(self):
         super().setUp()
 
         self.now = before_now(minutes=10)
         self.hour_ago = (self.now - timedelta(hours=1)).replace(
             minute=0, second=0, microsecond=0, tzinfo=timezone.utc
         )
-
-    @mock.patch("sentry.tasks.statistical_detectors.FUNCTIONS_PER_PROJECT", 1)
-    def test_functions_query(self):
-        projects = [
+        self.projects = [
             self.create_project(organization=self.organization, teams=[self.team], name="Foo"),
             self.create_project(organization=self.organization, teams=[self.team], name="Bar"),
         ]
 
-        for project in projects:
+    @mock.patch("sentry.tasks.statistical_detectors.FUNCTIONS_PER_PROJECT", 1)
+    def test_functions_query(self):
+
+        for project in self.projects:
             self.store_functions(
                 [
                     {
@@ -310,7 +355,7 @@ def test_functions_query(self):
                 timestamp=self.hour_ago,
             )
 
-        results = query_functions(projects, self.now)
+        results = query_functions(self.projects, self.now)
         assert results == [
             DetectorPayload(
                 project_id=project.id,
@@ -319,7 +364,7 @@ def test_functions_query(self):
                 value=pytest.approx(100),  # type: ignore[arg-type]
                 timestamp=self.hour_ago,
             )
-            for project in projects
+            for project in self.projects
         ]