Skip to content

Commit dadb76a

Browse files
markstoryNisanthan Nanthakumar
authored and
Nisanthan Nanthakumar
committed
ref(perf) Attempt to improve performance of facets endpoint (#16359)
* Fetch tag values individually. This will let snuba use the promoted columns more effectively as they perform better. * Only get 10 facets by default. * Use a having clause instead of WHERE to exclude tracing tags as it performs better. * Don't use fields that unpack into arrayJoin() expressions where possible. * Enable sampling at lower project thresholds, as we don't need full accuracy.
1 parent 1bf55bf commit dadb76a

File tree

1 file changed

+20
-38
lines changed

1 file changed

+20
-38
lines changed

src/sentry/snuba/discover.py

+20-38
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from sentry import eventstore
1717

1818
from sentry.models import Project, ProjectStatus
19+
from sentry.tagstore.base import TOP_VALUES_DEFAULT_LIMIT
1920
from sentry.utils.snuba import (
2021
Dataset,
2122
SnubaTSResult,
@@ -419,7 +420,7 @@ def get_pagination_ids(event, query, params, reference_event=None, referrer=None
419420
)
420421

421422

422-
def get_facets(query, params, limit=20, referrer=None):
423+
def get_facets(query, params, limit=10, referrer=None):
423424
"""
424425
High-level API for getting 'facet map' results.
425426
@@ -446,12 +447,12 @@ def get_facets(query, params, limit=20, referrer=None):
446447
# Resolve the public aliases into the discover dataset names.
447448
snuba_args, translated_columns = resolve_discover_aliases(snuba_args)
448449

449-
# Force sampling for more than 9 projects. 9 was chosen arbitrarily.
450-
sample = len(snuba_filter.filter_keys["project_id"]) > 9
450+
# Force sampling for multi-project results as we don't need accuracy
451+
# with that much data.
452+
sample = len(snuba_filter.filter_keys["project_id"]) > 2
451453

452454
# Exclude tracing tags as they are noisy and generally not helpful.
453-
conditions = snuba_args.get("conditions", [])
454-
conditions.append(["tags_key", "NOT IN", ["trace", "trace.ctx", "trace.span"]])
455+
excluded_tags = ["tags_key", "NOT IN", ["trace", "trace.ctx", "trace.span"]]
455456

456457
# Get the most frequent tag keys, enable sampling
457458
# as we don't need accuracy here.
@@ -463,6 +464,7 @@ def get_facets(query, params, limit=20, referrer=None):
463464
filter_keys=snuba_args.get("filter_keys"),
464465
orderby=["-count", "tags_key"],
465466
groupby="tags_key",
467+
having=[excluded_tags],
466468
dataset=Dataset.Discover,
467469
limit=limit,
468470
referrer=referrer,
@@ -481,7 +483,7 @@ def get_facets(query, params, limit=20, referrer=None):
481483
results = []
482484
if fetch_projects:
483485
project_values = raw_query(
484-
aggregations=[["uniq", "event_id", "count"]],
486+
aggregations=[["count", None, "count"]],
485487
start=snuba_args.get("start"),
486488
end=snuba_args.get("end"),
487489
conditions=snuba_args.get("conditions"),
@@ -495,43 +497,23 @@ def get_facets(query, params, limit=20, referrer=None):
495497
[FacetResult("project", r["project_id"], r["count"]) for r in project_values["data"]]
496498
)
497499

498-
# Environment is a special case because of the "" value which is stored as null
499-
# in the environment column but not in the tag arrays.
500-
if "environment" in top_tags:
501-
top_tags.remove("environment")
502-
environment_values = raw_query(
503-
aggregations=[["uniq", "event_id", "count"]],
500+
# Get tag counts for our top tags. Fetching them individually
501+
# allows snuba to leverage promoted tags better and enables us to get
502+
# the value count we want.
503+
for tag_name in top_tags:
504+
tag = u"tags[{}]".format(tag_name)
505+
tag_values = raw_query(
506+
aggregations=[["count", None, "count"]],
507+
conditions=snuba_args.get("conditions"),
504508
start=snuba_args.get("start"),
505509
end=snuba_args.get("end"),
506-
conditions=snuba_args.get("conditions"),
507510
filter_keys=snuba_args.get("filter_keys"),
508-
groupby="environment",
509-
orderby=["-count", "environment"],
511+
orderby=["-count"],
512+
groupby=[tag],
513+
limit=TOP_VALUES_DEFAULT_LIMIT,
510514
dataset=Dataset.Discover,
511515
referrer=referrer,
512516
)
513-
results.extend(
514-
[
515-
FacetResult("environment", r["environment"], r["count"])
516-
for r in environment_values["data"]
517-
]
518-
)
519-
520-
# Get tag counts for our top tags.
521-
conditions.append(["tags_key", "IN", top_tags])
522-
tag_values = raw_query(
523-
aggregations=[["count", None, "count"]],
524-
conditions=conditions,
525-
start=snuba_args.get("start"),
526-
end=snuba_args.get("end"),
527-
filter_keys=snuba_args.get("filter_keys"),
528-
orderby=["tags_key", "-count"],
529-
groupby=["tags_key", "tags_value"],
530-
dataset=Dataset.Discover,
531-
referrer=referrer,
532-
)
533-
results.extend(
534-
[FacetResult(r["tags_key"], r["tags_value"], int(r["count"])) for r in tag_values["data"]]
535-
)
517+
results.extend([FacetResult(tag_name, r[tag], int(r["count"])) for r in tag_values["data"]])
536518

537519
return results

0 commit comments

Comments
 (0)