Skip to content

Commit 6d79b5c

Browse files
markstoryNisanthan Nanthakumar
authored and
Nisanthan Nanthakumar
committed
ref: Remove now unused discover query code (#16306)
Remove the automatic dataset detection logic from sentry. We have switched over to the snuba based implementation of this. Not having dataset selection in sentry also means we can remove some (but not all) of the supporting logic as well.
1 parent da68bab commit 6d79b5c

File tree

7 files changed

+20
-468
lines changed

7 files changed

+20
-468
lines changed

src/sentry/api/bases/organization_events.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@
55
from sentry.api.bases import OrganizationEndpoint, OrganizationEventsError
66
from sentry.api.event_search import get_filter, InvalidSearchQuery
77
from sentry.models.project import Project
8-
from sentry.snuba.dataset import Dataset
98
from sentry.snuba.discover import ReferenceEvent
10-
from sentry.utils import snuba
119

1210

1311
class OrganizationEventsEndpointBase(OrganizationEndpoint):
@@ -70,12 +68,4 @@ def get_snuba_query_args_legacy(self, request, organization):
7068
"filter_keys": _filter.filter_keys,
7169
}
7270

73-
# 'legacy' endpoints cannot access transactions dataset.
74-
# as they often have assumptions about which columns are returned.
75-
dataset = snuba.detect_dataset(snuba_args)
76-
if dataset != Dataset.Events:
77-
raise OrganizationEventsError(
78-
"Invalid query. You cannot reference non-events data in this endpoint."
79-
)
80-
8171
return snuba_args

src/sentry/eventstore/snuba/backend.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def get_events(
6868
limit=limit,
6969
offset=offset,
7070
referrer=referrer,
71+
dataset=snuba.Dataset.Events,
7172
)
7273

7374
if "error" not in result:
@@ -188,6 +189,9 @@ def __get_event_id_from_filter(self, filter=None, orderby=None):
188189
columns = [Columns.EVENT_ID.value.alias, Columns.PROJECT_ID.value.alias]
189190

190191
try:
192+
# This query uses the discover dataset to enable
193+
# getting events across both errors and transactions, which is
194+
# required when doing pagination in discover
191195
result = snuba.dataset_query(
192196
selected_columns=columns,
193197
conditions=filter.conditions,
@@ -197,7 +201,7 @@ def __get_event_id_from_filter(self, filter=None, orderby=None):
197201
limit=1,
198202
referrer="eventstore.get_next_or_prev_event_id",
199203
orderby=orderby,
200-
dataset=snuba.detect_dataset({"conditions": filter.conditions}),
204+
dataset=snuba.Dataset.Discover,
201205
)
202206
except (snuba.QueryOutsideRetentionError, snuba.QueryOutsideGroupActivityError):
203207
# This can happen when the date conditions for paging

src/sentry/utils/snuba.py

Lines changed: 13 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -292,88 +292,6 @@ def get_snuba_column_name(name, dataset=Dataset.Events):
292292
return DATASETS[dataset].get(name, u"tags[{}]".format(name))
293293

294294

295-
def detect_dataset(query_args, aliased_conditions=False):
296-
"""
297-
Determine the dataset to use based on the conditions, selected_columns,
298-
groupby clauses.
299-
300-
This function operates on the end user field aliases and not the internal column
301-
names that have been converted using the field mappings.
302-
303-
The aliased_conditions parameter switches column detection between
304-
the public aliases and the internal names. When query conditions
305-
have been pre-parsed by api.event_search set aliased_conditions=True
306-
as we need to look for internal names.
307-
308-
:deprecated: This method and the automatic dataset resolution is deprecated.
309-
You should use sentry.snuba.discover instead.
310-
"""
311-
if query_args.get("dataset", None):
312-
return query_args["dataset"]
313-
314-
dataset = Dataset.Events
315-
transaction_fields = set(DATASETS[Dataset.Transactions].keys()) - set(
316-
DATASETS[Dataset.Events].keys()
317-
)
318-
condition_fieldset = transaction_fields
319-
320-
if aliased_conditions:
321-
# Release and user are also excluded as they are present on both
322-
# datasets and don't trigger usage of transactions.
323-
condition_fieldset = (
324-
set(DATASET_FIELDS[Dataset.Transactions])
325-
- set(DATASET_FIELDS[Dataset.Events])
326-
- set(["release", "user"])
327-
)
328-
329-
for condition in query_args.get("conditions") or []:
330-
if isinstance(condition[0], six.string_types) and condition[0] in condition_fieldset:
331-
return Dataset.Transactions
332-
if condition == ["event.type", "=", "transaction"] or condition == [
333-
"type",
334-
"=",
335-
"transaction",
336-
]:
337-
return Dataset.Transactions
338-
339-
if condition == ["event.type", "!=", "transaction"] or condition == [
340-
"type",
341-
"!=",
342-
"transaction",
343-
]:
344-
return Dataset.Events
345-
346-
for field in query_args.get("selected_columns") or []:
347-
if isinstance(field, six.string_types) and field in transaction_fields:
348-
return Dataset.Transactions
349-
350-
for field in query_args.get("aggregations") or []:
351-
if len(field) != 3:
352-
continue
353-
# Check field or fields
354-
if isinstance(field[1], six.string_types) and field[1] in transaction_fields:
355-
return Dataset.Transactions
356-
if isinstance(field[1], (list, tuple)):
357-
is_transaction = [column for column in field[1] if column in transaction_fields]
358-
if is_transaction:
359-
return Dataset.Transactions
360-
# Check for transaction only field aliases
361-
if isinstance(field[2], six.string_types) and field[2] in (
362-
"apdex",
363-
"impact",
364-
"p75",
365-
"p95",
366-
"p99",
367-
):
368-
return Dataset.Transactions
369-
370-
for field in query_args.get("groupby") or []:
371-
if field in transaction_fields:
372-
return Dataset.Transactions
373-
374-
return dataset
375-
376-
377295
def get_function_index(column_expr, depth=0):
378296
"""
379297
If column_expr list contains a function, returns the index of its function name
@@ -411,7 +329,7 @@ def get_function_index(column_expr, depth=0):
411329
return None
412330

413331

414-
def parse_columns_in_functions(col, context=None, index=None, dataset=Dataset.Events):
332+
def parse_columns_in_functions(col, context=None, index=None):
415333
"""
416334
Checks expressions for arguments that should be considered a column while
417335
ignoring strings that represent clickhouse function names
@@ -434,23 +352,23 @@ def parse_columns_in_functions(col, context=None, index=None, dataset=Dataset.Ev
434352
if function_name_index > 0:
435353
for i in six.moves.xrange(0, function_name_index):
436354
if context is not None:
437-
context[i] = get_snuba_column_name(col[i], dataset)
355+
context[i] = get_snuba_column_name(col[i])
438356

439357
args = col[function_name_index + 1]
440358

441359
# check for nested functions in args
442360
if get_function_index(args):
443361
# look for columns
444-
return parse_columns_in_functions(args, args, dataset=dataset)
362+
return parse_columns_in_functions(args, args)
445363

446364
# check each argument for column names
447365
else:
448366
for (i, arg) in enumerate(args):
449-
parse_columns_in_functions(arg, args, i, dataset=dataset)
367+
parse_columns_in_functions(arg, args, i)
450368
else:
451369
# probably a column name
452370
if context is not None and index is not None:
453-
context[index] = get_snuba_column_name(col, dataset)
371+
context[index] = get_snuba_column_name(col)
454372

455373

456374
def get_arrayjoin(column):
@@ -459,23 +377,6 @@ def get_arrayjoin(column):
459377
return match.groups()[0]
460378

461379

462-
def valid_orderby(orderby, custom_fields=None, dataset=Dataset.Events):
463-
"""
464-
Check if a field can be used in sorting. We don't allow
465-
sorting on fields that would be aliased as tag[foo] because those
466-
fields are unlikely to be selected.
467-
"""
468-
if custom_fields is None:
469-
custom_fields = []
470-
fields = orderby if isinstance(orderby, (list, tuple)) else [orderby]
471-
mapping = DATASETS[dataset]
472-
for field in fields:
473-
field = field.lstrip("-")
474-
if field not in mapping and field not in custom_fields:
475-
return False
476-
return True
477-
478-
479380
def transform_aliases_and_query(**kwargs):
480381
"""
481382
Convert aliases in selected_columns, groupby, aggregation, conditions,
@@ -499,7 +400,7 @@ def transform_aliases_and_query(**kwargs):
499400
arrayjoin = kwargs.get("arrayjoin")
500401
orderby = kwargs.get("orderby")
501402
having = kwargs.get("having", [])
502-
dataset = detect_dataset(kwargs)
403+
dataset = Dataset.Events
503404

504405
if selected_columns:
505406
for (idx, col) in enumerate(selected_columns):
@@ -511,14 +412,14 @@ def transform_aliases_and_query(**kwargs):
511412
translated_columns[col[2]] = col[2]
512413
derived_columns.add(col[2])
513414
else:
514-
name = get_snuba_column_name(col, dataset)
415+
name = get_snuba_column_name(col)
515416
selected_columns[idx] = name
516417
translated_columns[name] = col
517418

518419
if groupby:
519420
for (idx, col) in enumerate(groupby):
520421
if col not in derived_columns:
521-
name = get_snuba_column_name(col, dataset)
422+
name = get_snuba_column_name(col)
522423
else:
523424
name = col
524425

@@ -528,12 +429,12 @@ def transform_aliases_and_query(**kwargs):
528429
for aggregation in aggregations or []:
529430
derived_columns.add(aggregation[2])
530431
if isinstance(aggregation[1], six.string_types):
531-
aggregation[1] = get_snuba_column_name(aggregation[1], dataset)
432+
aggregation[1] = get_snuba_column_name(aggregation[1])
532433
elif isinstance(aggregation[1], (set, tuple, list)):
533-
aggregation[1] = [get_snuba_column_name(col, dataset) for col in aggregation[1]]
434+
aggregation[1] = [get_snuba_column_name(col) for col in aggregation[1]]
534435

535436
for col in filter_keys.keys():
536-
name = get_snuba_column_name(col, dataset)
437+
name = get_snuba_column_name(col)
537438
filter_keys[name] = filter_keys.pop(col)
538439

539440
if conditions:
@@ -558,7 +459,7 @@ def transform_aliases_and_query(**kwargs):
558459
translated_orderby.append(
559460
u"{}{}".format(
560461
"-" if field_with_order.startswith("-") else "",
561-
field if field in derived_columns else get_snuba_column_name(field, dataset),
462+
field if field in derived_columns else get_snuba_column_name(field),
562463
)
563464
)
564465

@@ -1052,15 +953,7 @@ def dataset_query(
1052953
You should use sentry.snuba.discover instead.
1053954
"""
1054955
if dataset is None:
1055-
dataset = detect_dataset(
1056-
dict(
1057-
dataset=dataset,
1058-
aggregations=aggregations,
1059-
conditions=conditions,
1060-
selected_columns=selected_columns,
1061-
groupby=groupby,
1062-
)
1063-
)
956+
raise ValueError("A dataset is required, and is no longer automatically detected.")
1064957

1065958
derived_columns = []
1066959
if selected_columns:

tests/sentry/eventstore/test_base.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import logging
44
import mock
5+
import pytest
56
import six
67

78
from sentry import eventstore
@@ -71,8 +72,8 @@ def setUp(self):
7172

7273
self.transaction_event = self.store_event(data=event_data, project_id=self.project.id)
7374

75+
@pytest.mark.skip(reason="There is no longer a difference in underlying dataset.")
7476
def test_logs_differences(self):
75-
7677
logger = logging.getLogger("sentry.eventstore")
7778

7879
with mock.patch.object(logger, "info") as mock_logger:

0 commit comments

Comments
 (0)