Skip to content

SQL: HAVING clause should accept only aggregates #31872

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 11, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -213,10 +213,11 @@ static Collection<Failure> verify(LogicalPlan plan) {
* Check validity of Aggregate/GroupBy.
* This rule is needed for multiple reasons:
* 1. a user might specify an invalid aggregate (SELECT foo GROUP BY bar)
* 2. the order/having might contain a non-grouped attribute. This is typically
* 2. the ORDER BY/HAVING might contain a non-grouped attribute. This is typically
* caught by the Analyzer however if wrapped in a function (ABS()) it gets resolved
* (because the expression gets resolved little by little without being pushed down,
* without the Analyzer modifying anything.
* 2a. HAVING also requires an Aggregate function
* 3. composite agg (used for GROUP BY) allows ordering only on the group keys
*/
private static boolean checkGroupBy(LogicalPlan p, Set<Failure> localFailures,
Expand Down Expand Up @@ -244,7 +245,7 @@ private static boolean checkGroupByOrder(LogicalPlan p, Set<Failure> localFailur
}

// make sure to compare attributes directly
if (Expressions.anyMatch(a.groupings(),
if (Expressions.anyMatch(a.groupings(),
g -> e.semanticEquals(e instanceof Attribute ? Expressions.attribute(g) : g))) {
return;
}
Expand Down Expand Up @@ -278,13 +279,14 @@ private static boolean checkGroupByHaving(LogicalPlan p, Set<Failure> localFailu

Map<Expression, Node<?>> missing = new LinkedHashMap<>();
Expression condition = f.condition();
condition.collectFirstChildren(c -> checkGroupMatch(c, condition, a.groupings(), missing, functions));
// variation of checkGroupMatch customized for HAVING, which requires just aggregations
condition.collectFirstChildren(c -> checkGroupByHavingHasOnlyAggs(c, condition, missing, functions));

if (!missing.isEmpty()) {
String plural = missing.size() > 1 ? "s" : StringUtils.EMPTY;
localFailures.add(fail(condition, "Cannot filter by non-grouped column" + plural + " %s, expected %s",
Expressions.names(missing.keySet()),
Expressions.names(a.groupings())));
localFailures.add(
fail(condition, "Cannot filter HAVING on non-aggregate" + plural + " %s; consider using WHERE instead",
Expressions.names(missing.keySet())));
groupingFailures.add(a);
return false;
}
Expand All @@ -294,6 +296,57 @@ private static boolean checkGroupByHaving(LogicalPlan p, Set<Failure> localFailu
}


private static boolean checkGroupByHavingHasOnlyAggs(Expression e, Node<?> source,
Map<Expression, Node<?>> missing, Map<String, Function> functions) {

// resolve FunctionAttribute to backing functions
if (e instanceof FunctionAttribute) {
FunctionAttribute fa = (FunctionAttribute) e;
Function function = functions.get(fa.functionId());
// TODO: this should be handled by a different rule
if (function == null) {
return false;
}
e = function;
}

// scalar functions can be a binary tree
// first test the function against the grouping
// and if that fails, start unpacking hoping to find matches
if (e instanceof ScalarFunction) {
ScalarFunction sf = (ScalarFunction) e;

// unwrap function to find the base
for (Expression arg : sf.arguments()) {
arg.collectFirstChildren(c -> checkGroupByHavingHasOnlyAggs(c, source, missing, functions));
}
return true;

} else if (e instanceof Score) {
// Score can't be used for having
missing.put(e, source);
return true;
}

// skip literals / foldable
if (e.foldable()) {
return true;
}
// skip aggs (allowed to refer to non-group columns)
if (Functions.isAggregate(e)) {
return true;
}

// left without leaves which have to match; that's a failure since everything should be based on an agg
if (e instanceof Attribute) {
missing.put(e, source);
return true;
}

return false;
}


// check whether plain columns specified in an agg are mentioned in the group-by
private static boolean checkGroupByAgg(LogicalPlan p, Set<Failure> localFailures,
Set<LogicalPlan> groupingFailures, Map<String, Function> functions) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,4 +159,14 @@ public void testGroupByOrderByScore() {
assertEquals("1:44: Cannot order by non-grouped column [SCORE()], expected [int]",
verify("SELECT int FROM test GROUP BY int ORDER BY SCORE()"));
}

public void testHavingOnColumn() {
assertEquals("1:42: Cannot filter HAVING on non-aggregate [int]; consider using WHERE instead",
verify("SELECT int FROM test GROUP BY int HAVING int > 2"));
}

public void testHavingOnScalar() {
assertEquals("1:42: Cannot filter HAVING on non-aggregate [int]; consider using WHERE instead",
verify("SELECT int FROM test GROUP BY int HAVING 2 < ABS(int)"));
}
}