Skip to content

feat(statistical-detectors): Integrate detector with breakpoint detection #56384

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion src/sentry/seer/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,31 @@
from typing import List, TypedDict

from django.conf import settings
from urllib3 import Retry

from sentry.net.http import connection_from_url
from sentry.utils import json


class BreakpointData(TypedDict):
project: str
# For legacy reasons, the group name is always
# transaction even when working with functions.
transaction: str
aggregate_range_1: float
aggregate_range_2: float
unweighted_t_value: float
unweighted_p_value: float
trend_percentage: float
absolute_percentage_change: float
trend_difference: float
breakpoint: int


class BreakpointResponse(TypedDict):
data: List[BreakpointData]


seer_connection_pool = connection_from_url(
settings.ANOMALY_DETECTION_URL,
retries=Retry(
Expand All @@ -14,7 +36,7 @@
)


def detect_breakpoints(breakpoint_request):
def detect_breakpoints(breakpoint_request) -> BreakpointResponse:
response = seer_connection_pool.urlopen(
"POST",
"/trends/breakpoint-detector",
Expand Down
3 changes: 3 additions & 0 deletions src/sentry/snuba/referrer.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,9 @@ class Referrer(Enum):
API_PROFILING_FUNCTION_TRENDS_TOP_EVENTS = "api.profiling.function-trends.top-events"
API_PROFILING_FUNCTION_TRENDS_STATS = "api.profiling.function-trends.stats"
API_PROFILING_FUNCTIONS_STATISTICAL_DETECTOR = "api.profiling.functions.statistical-detector"
API_PROFILING_FUNCTIONS_STATISTICAL_DETECTOR_STATS = (
"api.profiling.functions.statistical-detector.stats"
)
API_PROJECT_EVENTS = "api.project-events"
API_RELEASES_RELEASE_DETAILS_CHART = "api.releases.release-details-chart"
API_REPLAY_DETAILS_PAGE = "api.replay.details-page"
Expand Down
150 changes: 133 additions & 17 deletions src/sentry/tasks/statistical_detectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,13 @@
)

from sentry import options
from sentry.api.serializers.snuba import SnubaTSResultSerializer
from sentry.constants import ObjectStatus
from sentry.models.project import Project
from sentry.search.events.builder import ProfileTopFunctionsTimeseriesQueryBuilder
from sentry.search.events.fields import get_function_alias
from sentry.search.events.types import QueryBuilderConfig
from sentry.seer.utils import BreakpointData, detect_breakpoints
from sentry.sentry_metrics import indexer
from sentry.sentry_metrics.use_case_id_registry import UseCaseID
from sentry.snuba import functions
Expand Down Expand Up @@ -153,20 +158,36 @@ def detect_function_trends(project_ids: List[int], start: datetime, *args, **kwa
max_retries=0,
)
def detect_function_change_points(
functions: List[Tuple[int, str | int]], start: datetime, *args, **kwargs
functions_list: List[Tuple[int, int]], start: datetime, *args, **kwargs
) -> None:
for project_id, function_id in functions:
breakpoint_count = 0

breakpoints = _detect_function_change_points(functions_list, start)

for entry in breakpoints:
breakpoint_count += 1

with sentry_sdk.push_scope() as scope:
scope.set_tag("regressed_project_id", project_id)
scope.set_tag("regressed_function_id", function_id)
scope.set_tag("regressed_project_id", entry["project"])
# the service was originally meant for transactions so this
# naming is a result of this
scope.set_tag("regressed_function_id", entry["transaction"])
scope.set_context(
"statistical_detectors",
{
**entry,
"timestamp": start.isoformat(),
"breakpoint": datetime.fromtimestamp(entry["breakpoint"]),
},
)
sentry_sdk.capture_message("Potential Regression")

metrics.incr(
"statistical_detectors.breakpoint.functions",
amount=breakpoint_count,
sample_rate=1.0,
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you think it also makes sense to record a percentage of timeseries that actually had breakpoints

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can derive the percentage by dividing this metric with the existing metrics so just having this count will suffice.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍



def _detect_function_trends(
project_ids: List[int], start: datetime
Expand Down Expand Up @@ -238,6 +259,38 @@ def _detect_function_trends(
)


def _detect_function_change_points(
functions_list: List[Tuple[int, int]],
start: datetime,
) -> Generator[BreakpointData, None, None]:
serializer = SnubaTSResultSerializer(None, None, None)

trend_function = "p95()"

for chunk in chunked(query_functions_timeseries(functions_list, start, trend_function), 10):
data = {}
for project_id, fingerprint, timeseries in chunk:
serialized = serializer.serialize(timeseries, get_function_alias(trend_function))
data[f"{project_id},{fingerprint}"] = {
"data": serialized["data"],
"data_start": serialized["start"],
"data_end": serialized["end"],
# only look at the last 24 hours as the request data
"request_start": serialized["end"] - 24 * 60 * 60,
"request_end": serialized["end"],
}

request = {
"data": data,
"sort": "-trend_percentage()",
"trendFunction": trend_function,
}

breakpoints = detect_breakpoints(request)["data"]

yield from breakpoints


def all_function_payloads(
project_ids: List[int],
start: datetime,
Expand Down Expand Up @@ -363,7 +416,18 @@ def query_transactions(


def query_functions(projects: List[Project], start: datetime) -> List[DetectorPayload]:
params = _get_function_query_params(projects, start)
# The functions dataset only supports 1 hour granularity.
# So we always look back at the last full hour that just elapsed.
# And since the timestamps are truncated to the start of the hour
# we just need to query for the 1 minute of data.
start = start - timedelta(hours=1)
start = start.replace(minute=0, second=0, microsecond=0)
params: Dict[str, Any] = {
"start": start,
"end": start + timedelta(minutes=1),
"project_id": [project.id for project in projects],
"project_objects": projects,
}

# TODOs: handle any errors
query_results = functions.query(
Expand Down Expand Up @@ -397,17 +461,69 @@ def query_functions(projects: List[Project], start: datetime) -> List[DetectorPa
]


def _get_function_query_params(projects: List[Project], start: datetime) -> Dict[str, Any]:
# The functions dataset only supports 1 hour granularity.
# So we always look back at the last full hour that just elapsed.
# And since the timestamps are truncated to the start of the hour
# we just need to query for the 1 minute of data.
start = start - timedelta(hours=1)
start = start.replace(minute=0, second=0, microsecond=0)

return {
"start": start,
"end": start + timedelta(minutes=1),
"project_id": [project.id for project in projects],
def query_functions_timeseries(
functions_list: List[Tuple[int, int]],
start: datetime,
agg_function: str,
) -> Generator[Tuple[int, int, Any], None, None]:
project_ids = [project_id for project_id, _ in functions_list]
projects = Project.objects.filter(id__in=project_ids)

# take the last 14 days as our window
end = start.replace(minute=0, second=0, microsecond=0) + timedelta(hours=1)
params: Dict[str, Any] = {
"start": end - timedelta(days=14),
"end": end,
"project_id": project_ids,
"project_objects": projects,
}
interval = 3600 # 1 hour

# make sure that each chunk can fit in the 10,000 row limit
# imposed by snuba
for functions_chunk in chunked(functions_list, 25):
chunk: List[Dict[str, Any]] = [
{
"project.id": project_id,
"fingerprint": fingerprint,
}
for project_id, fingerprint in functions_chunk
]

builder = ProfileTopFunctionsTimeseriesQueryBuilder(
dataset=Dataset.Functions,
params=params,
interval=interval,
top_events=chunk,
other=False,
query="is_application:1",
selected_columns=["project.id", "fingerprint"],
timeseries_columns=[agg_function],
config=QueryBuilderConfig(
skip_tag_resolution=True,
),
)
raw_results = raw_snql_query(
builder.get_snql_query(),
referrer=Referrer.API_PROFILING_FUNCTIONS_STATISTICAL_DETECTOR_STATS.value,
)

results = functions.format_top_events_timeseries_results(
raw_results,
builder,
params,
interval,
top_events={"data": chunk},
result_key_order=["project.id", "fingerprint"],
)

for project_id, fingerprint in functions_chunk:
key = f"{project_id},{fingerprint}"
if key not in results:
logger.warning(
"Missing timeseries for project: {} function: {}",
project_id,
fingerprint,
)
continue
yield project_id, fingerprint, results[key]
61 changes: 53 additions & 8 deletions tests/sentry/tasks/test_statistical_detectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from sentry.snuba.metrics.naming_layer.mri import TransactionMRI
from sentry.statistical_detectors.detector import DetectorPayload
from sentry.tasks.statistical_detectors import (
detect_function_change_points,
detect_function_trends,
detect_transaction_trends,
query_functions,
Expand Down Expand Up @@ -260,24 +261,68 @@ def test_detect_function_trends(
assert detect_function_change_points.delay.called


@mock.patch("sentry.tasks.statistical_detectors.detect_breakpoints")
@mock.patch("sentry.tasks.statistical_detectors.raw_snql_query")
@django_db_all
def test_detect_function_change_points(
mock_raw_snql_query, mock_detect_breakpoints, timestamp, project
):
start_of_hour = timestamp.replace(minute=0, second=0, microsecond=0, tzinfo=timezone.utc)

fingerprint = 12345

mock_raw_snql_query.return_value = {
"data": [
{
"time": (start_of_hour - timedelta(days=day, hours=hour)).isoformat(),
"project.id": project.id,
"fingerprint": fingerprint,
"p95": 2 if day < 1 and hour < 8 else 1,
}
for day in reversed(range(14))
for hour in reversed(range(24))
]
}

mock_detect_breakpoints.return_value = {
"data": [
{
"absolute_percentage_change": 5.0,
"aggregate_range_1": 100000000.0,
"aggregate_range_2": 500000000.0,
"breakpoint": 1687323600,
"change": "regression",
"project": str(project.id),
"transaction": str(fingerprint),
"trend_difference": 400000000.0,
"trend_percentage": 5.0,
"unweighted_p_value": 0.0,
"unweighted_t_value": -float("inf"),
},
]
}

detect_function_change_points([(project.id, fingerprint)], timestamp)


@region_silo_test(stable=True)
class FunctionsQueryTest(ProfilesSnubaTestCase):
class FunctionsTasksTest(ProfilesSnubaTestCase):
def setUp(self):
super().setUp()

self.now = before_now(minutes=10)
self.hour_ago = (self.now - timedelta(hours=1)).replace(
minute=0, second=0, microsecond=0, tzinfo=timezone.utc
)

@mock.patch("sentry.tasks.statistical_detectors.FUNCTIONS_PER_PROJECT", 1)
def test_functions_query(self):
projects = [
self.projects = [
self.create_project(organization=self.organization, teams=[self.team], name="Foo"),
self.create_project(organization=self.organization, teams=[self.team], name="Bar"),
]

for project in projects:
@mock.patch("sentry.tasks.statistical_detectors.FUNCTIONS_PER_PROJECT", 1)
def test_functions_query(self):

for project in self.projects:
self.store_functions(
[
{
Expand Down Expand Up @@ -310,7 +355,7 @@ def test_functions_query(self):
timestamp=self.hour_ago,
)

results = query_functions(projects, self.now)
results = query_functions(self.projects, self.now)
assert results == [
DetectorPayload(
project_id=project.id,
Expand All @@ -319,7 +364,7 @@ def test_functions_query(self):
value=pytest.approx(100), # type: ignore[arg-type]
timestamp=self.hour_ago,
)
for project in projects
for project in self.projects
]


Expand Down