Skip to content

Commit d2cd896

Browse files
committed
SQL: HAVING clause should accept only aggregates (#31872)
Improve Verifier to allow HAVING clauses only on aggregates Close #31726 (cherry picked from commit 5440fb4)
1 parent c209f56 commit d2cd896

File tree

2 files changed

+69
-6
lines changed

2 files changed

+69
-6
lines changed

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java

+59-6
Original file line numberDiff line numberDiff line change
@@ -213,10 +213,11 @@ static Collection<Failure> verify(LogicalPlan plan) {
213213
* Check validity of Aggregate/GroupBy.
214214
* This rule is needed for multiple reasons:
215215
* 1. a user might specify an invalid aggregate (SELECT foo GROUP BY bar)
216-
* 2. the order/having might contain a non-grouped attribute. This is typically
216+
* 2. the ORDER BY/HAVING might contain a non-grouped attribute. This is typically
217217
* caught by the Analyzer however if wrapped in a function (ABS()) it gets resolved
218218
* (because the expression gets resolved little by little without being pushed down,
219219
* without the Analyzer modifying anything.
220+
* 2a. HAVING also requires an Aggregate function
220221
* 3. composite agg (used for GROUP BY) allows ordering only on the group keys
221222
*/
222223
private static boolean checkGroupBy(LogicalPlan p, Set<Failure> localFailures,
@@ -244,7 +245,7 @@ private static boolean checkGroupByOrder(LogicalPlan p, Set<Failure> localFailur
244245
}
245246

246247
// make sure to compare attributes directly
247-
if (Expressions.anyMatch(a.groupings(),
248+
if (Expressions.anyMatch(a.groupings(),
248249
g -> e.semanticEquals(e instanceof Attribute ? Expressions.attribute(g) : g))) {
249250
return;
250251
}
@@ -278,13 +279,14 @@ private static boolean checkGroupByHaving(LogicalPlan p, Set<Failure> localFailu
278279

279280
Map<Expression, Node<?>> missing = new LinkedHashMap<>();
280281
Expression condition = f.condition();
281-
condition.collectFirstChildren(c -> checkGroupMatch(c, condition, a.groupings(), missing, functions));
282+
// variation of checkGroupMatch customized for HAVING, which requires just aggregations
283+
condition.collectFirstChildren(c -> checkGroupByHavingHasOnlyAggs(c, condition, missing, functions));
282284

283285
if (!missing.isEmpty()) {
284286
String plural = missing.size() > 1 ? "s" : StringUtils.EMPTY;
285-
localFailures.add(fail(condition, "Cannot filter by non-grouped column" + plural + " %s, expected %s",
286-
Expressions.names(missing.keySet()),
287-
Expressions.names(a.groupings())));
287+
localFailures.add(
288+
fail(condition, "Cannot filter HAVING on non-aggregate" + plural + " %s; consider using WHERE instead",
289+
Expressions.names(missing.keySet())));
288290
groupingFailures.add(a);
289291
return false;
290292
}
@@ -294,6 +296,57 @@ private static boolean checkGroupByHaving(LogicalPlan p, Set<Failure> localFailu
294296
}
295297

296298

299+
private static boolean checkGroupByHavingHasOnlyAggs(Expression e, Node<?> source,
300+
Map<Expression, Node<?>> missing, Map<String, Function> functions) {
301+
302+
// resolve FunctionAttribute to backing functions
303+
if (e instanceof FunctionAttribute) {
304+
FunctionAttribute fa = (FunctionAttribute) e;
305+
Function function = functions.get(fa.functionId());
306+
// TODO: this should be handled by a different rule
307+
if (function == null) {
308+
return false;
309+
}
310+
e = function;
311+
}
312+
313+
// scalar functions can be a binary tree
314+
// first test the function against the grouping
315+
// and if that fails, start unpacking hoping to find matches
316+
if (e instanceof ScalarFunction) {
317+
ScalarFunction sf = (ScalarFunction) e;
318+
319+
// unwrap function to find the base
320+
for (Expression arg : sf.arguments()) {
321+
arg.collectFirstChildren(c -> checkGroupByHavingHasOnlyAggs(c, source, missing, functions));
322+
}
323+
return true;
324+
325+
} else if (e instanceof Score) {
326+
// Score can't be used for having
327+
missing.put(e, source);
328+
return true;
329+
}
330+
331+
// skip literals / foldable
332+
if (e.foldable()) {
333+
return true;
334+
}
335+
// skip aggs (allowed to refer to non-group columns)
336+
if (Functions.isAggregate(e)) {
337+
return true;
338+
}
339+
340+
// left without leaves which have to match; that's a failure since everything should be based on an agg
341+
if (e instanceof Attribute) {
342+
missing.put(e, source);
343+
return true;
344+
}
345+
346+
return false;
347+
}
348+
349+
297350
// check whether plain columns specified in an agg are mentioned in the group-by
298351
private static boolean checkGroupByAgg(LogicalPlan p, Set<Failure> localFailures,
299352
Set<LogicalPlan> groupingFailures, Map<String, Function> functions) {

x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java

+10
Original file line numberDiff line numberDiff line change
@@ -159,4 +159,14 @@ public void testGroupByOrderByScore() {
159159
assertEquals("1:44: Cannot order by non-grouped column [SCORE()], expected [int]",
160160
verify("SELECT int FROM test GROUP BY int ORDER BY SCORE()"));
161161
}
162+
163+
public void testHavingOnColumn() {
164+
assertEquals("1:42: Cannot filter HAVING on non-aggregate [int]; consider using WHERE instead",
165+
verify("SELECT int FROM test GROUP BY int HAVING int > 2"));
166+
}
167+
168+
public void testHavingOnScalar() {
169+
assertEquals("1:42: Cannot filter HAVING on non-aggregate [int]; consider using WHERE instead",
170+
verify("SELECT int FROM test GROUP BY int HAVING 2 < ABS(int)"));
171+
}
162172
}

0 commit comments

Comments
 (0)