Skip to content

Commit a816d8d

Browse files
authored
ref(discover2) Test out sampling on tag values (#16387)
The facets endpoint is still not fast. I want to see what kind of results can be obtained if we aggressively sample tag values. If this yields acceptable performance I want to try scaling the sampling rate based on the data volume so that we can better balance accurracy and performance.
1 parent 7ee75b9 commit a816d8d

File tree

1 file changed

+12
-6
lines changed

1 file changed

+12
-6
lines changed

src/sentry/snuba/discover.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -447,15 +447,14 @@ def get_facets(query, params, limit=10, referrer=None):
447447
# Resolve the public aliases into the discover dataset names.
448448
snuba_args, translated_columns = resolve_discover_aliases(snuba_args)
449449

450-
# Force sampling for multi-project results as we don't need accuracy
451-
# with that much data.
452-
sample = len(snuba_filter.filter_keys["project_id"]) > 2
453-
454450
# Exclude tracing tags as they are noisy and generally not helpful.
455451
excluded_tags = ["tags_key", "NOT IN", ["trace", "trace.ctx", "trace.span"]]
456452

457-
# Get the most frequent tag keys, enable sampling
458-
# as we don't need accuracy here.
453+
# Sampling keys for multi-project results as we don't need accuracy
454+
# with that much data.
455+
sample = len(snuba_filter.filter_keys["project_id"]) > 2
456+
457+
# Get the most frequent tag keys
459458
key_names = raw_query(
460459
aggregations=[["count", None, "count"]],
461460
start=snuba_args.get("start"),
@@ -474,6 +473,11 @@ def get_facets(query, params, limit=10, referrer=None):
474473
if not top_tags:
475474
return []
476475

476+
# TODO(mark) Make the sampling rate scale based on the result size and scaling factor in
477+
# sentry.options.
478+
# To test the lowest acceptable sampling rate, we use turbo mode.
479+
turbo_values = key_names["data"][0]["count"] > 10000
480+
477481
fetch_projects = False
478482
if len(params.get("project_id", [])) > 1:
479483
if len(top_tags) == limit:
@@ -492,6 +496,7 @@ def get_facets(query, params, limit=10, referrer=None):
492496
orderby="-count",
493497
dataset=Dataset.Discover,
494498
referrer=referrer,
499+
turbo=turbo_values,
495500
)
496501
results.extend(
497502
[FacetResult("project", r["project_id"], r["count"]) for r in project_values["data"]]
@@ -513,6 +518,7 @@ def get_facets(query, params, limit=10, referrer=None):
513518
limit=TOP_VALUES_DEFAULT_LIMIT,
514519
dataset=Dataset.Discover,
515520
referrer=referrer,
521+
turbo=turbo_values,
516522
)
517523
results.extend([FacetResult(tag_name, r[tag], int(r["count"])) for r in tag_values["data"]])
518524

0 commit comments

Comments
 (0)