Skip to content

SQL: Verify GROUP BY ordering on grouped columns #30585

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 15, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -211,12 +211,13 @@ static Collection<Failure> verify(LogicalPlan plan) {

/**
* Check validity of Aggregate/GroupBy.
* This rule is needed for two reasons:
* This rule is needed for multiple reasons:
* 1. a user might specify an invalid aggregate (SELECT foo GROUP BY bar)
* 2. the order/having might contain a non-grouped attribute. This is typically
* caught by the Analyzer however if wrapped in a function (ABS()) it gets resolved
* (because the expression gets resolved little by little without being pushed down,
* without the Analyzer modifying anything.
* 3. composite agg (used for GROUP BY) allows ordering only on the group keys
*/
private static boolean checkGroupBy(LogicalPlan p, Set<Failure> localFailures,
Map<String, Function> resolvedFunctions, Set<LogicalPlan> groupingFailures) {
Expand All @@ -225,7 +226,7 @@ && checkGroupByOrder(p, localFailures, groupingFailures, resolvedFunctions)
&& checkGroupByHaving(p, localFailures, groupingFailures, resolvedFunctions);
}

// check whether an orderBy failed
// check whether an orderBy failed or if it occurs on a non-key
private static boolean checkGroupByOrder(LogicalPlan p, Set<Failure> localFailures,
Set<LogicalPlan> groupingFailures, Map<String, Function> functions) {
if (p instanceof OrderBy) {
Expand All @@ -234,7 +235,23 @@ private static boolean checkGroupByOrder(LogicalPlan p, Set<Failure> localFailur
Aggregate a = (Aggregate) o.child();

Map<Expression, Node<?>> missing = new LinkedHashMap<>();
o.order().forEach(oe -> oe.collectFirstChildren(c -> checkGroupMatch(c, oe, a.groupings(), missing, functions)));
o.order().forEach(oe -> {
Expression e = oe.child();
// cannot order by aggregates (not supported by composite)
if (Functions.isAggregate(e)) {
missing.put(e, oe);
return;
}

// make sure to compare attributes directly
if (Expressions.anyMatch(a.groupings(),
g -> e.semanticEquals(e instanceof Attribute ? Expressions.attribute(g) : g))) {
return;
}

// nothing matched, cannot group by it
missing.put(e, oe);
});

if (!missing.isEmpty()) {
String plural = missing.size() > 1 ? "s" : StringUtils.EMPTY;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ public void testGroupByOrderByNonGrouped() {
}

public void testGroupByOrderByScalarOverNonGrouped() {
assertEquals("1:50: Cannot order by non-grouped column [date], expected [text]",
assertEquals("1:50: Cannot order by non-grouped column [YEAR(date [UTC])], expected [text]",
verify("SELECT MAX(int) FROM test GROUP BY text ORDER BY YEAR(date)"));
}

Expand Down Expand Up @@ -144,4 +144,19 @@ public void testUnsupportedType() {
assertEquals("1:8: Cannot use field [unsupported] type [ip_range] as is unsupported",
verify("SELECT unsupported FROM test"));
}
}

public void testGroupByOrderByNonKey() {
assertEquals("1:52: Cannot order by non-grouped column [a], expected [bool]",
verify("SELECT AVG(int) a FROM test GROUP BY bool ORDER BY a"));
}

public void testGroupByOrderByFunctionOverKey() {
assertEquals("1:44: Cannot order by non-grouped column [MAX(int)], expected [int]",
verify("SELECT int FROM test GROUP BY int ORDER BY MAX(int)"));
}

public void testGroupByOrderByScore() {
assertEquals("1:44: Cannot order by non-grouped column [SCORE()], expected [int]",
verify("SELECT int FROM test GROUP BY int ORDER BY SCORE()"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,18 @@ public void testMultiGroupBy() {
assertEquals("1:32: Currently, only a single expression can be used with GROUP BY; please select one of [bool, keyword]",
verify("SELECT bool FROM test GROUP BY bool, keyword"));
}

//
// TODO potential improvements
//
// regarding resolution
// public void testGroupByOrderByKeyAlias() {
// assertEquals("1:8: Cannot use field [unsupported] type [ip_range] as is unsupported",
// verify("SELECT int i FROM test GROUP BY int ORDER BY i"));
// }
//
// public void testGroupByAlias() {
// assertEquals("1:8: Cannot use field [unsupported] type [ip_range] as is unsupported",
// verify("SELECT int i FROM test GROUP BY i ORDER BY int"));
// }
}