Skip to content

Commit a460a19

Browse files
committed
SQL: Remove restriction for single column grouping
For historical reasons SQL restricts GROUP BY to only one field. This commit removes the restriction and improves the test suite with multi group by tests. Close elastic#31793
1 parent f40581c commit a460a19

File tree

5 files changed

+139
-70
lines changed

5 files changed

+139
-70
lines changed

docs/reference/sql/language/syntax/select.asciidoc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,13 @@ And grouping by column expression (typically used along-side an alias):
177177
include-tagged::{sql-specs}/docs.csv-spec[groupByExpression]
178178
----
179179

180+
Or a mixture of the above:
181+
["source","sql",subs="attributes,callouts,macros"]
182+
----
183+
include-tagged::{sql-specs}/docs.csv-spec[groupByMulti]
184+
----
185+
186+
180187
When a `GROUP BY` clause is used in a `SELECT`, _all_ output expressions must be either aggregate functions or expressions used for grouping or derivatives of (otherwise there would be more than one possible value to return for each ungrouped column).
181188

182189
To wit:

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/Verifier.java

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
*/
66
package org.elasticsearch.xpack.sql.planner;
77

8-
import org.elasticsearch.xpack.sql.expression.Expressions;
9-
import org.elasticsearch.xpack.sql.plan.physical.AggregateExec;
108
import org.elasticsearch.xpack.sql.plan.physical.PhysicalPlan;
119
import org.elasticsearch.xpack.sql.plan.physical.Unexecutable;
1210
import org.elasticsearch.xpack.sql.plan.physical.UnplannedExec;
@@ -71,23 +69,11 @@ static List<Failure> verifyMappingPlan(PhysicalPlan plan) {
7169
failures.add(fail(e, "Unresolved expression"));
7270
}
7371
});
74-
75-
if (p instanceof AggregateExec) {
76-
forbidMultiFieldGroupBy((AggregateExec) p, failures);
77-
}
7872
});
7973

8074
return failures;
8175
}
8276

83-
private static void forbidMultiFieldGroupBy(AggregateExec a, List<Failure> failures) {
84-
if (a.groupings().size() > 1) {
85-
failures.add(fail(a.groupings().get(0), "Currently, only a single expression can be used with GROUP BY; please select one of "
86-
+ Expressions.names(a.groupings())));
87-
}
88-
}
89-
90-
9177
static List<Failure> verifyExecutingPlan(PhysicalPlan plan) {
9278
List<Failure> failures = new ArrayList<>();
9379

x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/VerifierErrorMessagesTests.java

Lines changed: 0 additions & 52 deletions
This file was deleted.

x-pack/qa/sql/src/main/resources/agg.sql-spec

Lines changed: 111 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,39 @@ SELECT emp_no * 2 AS e FROM test_emp GROUP BY e ORDER BY e;
4848
groupByModScalar
4949
SELECT (emp_no % 3) + 1 AS e FROM test_emp GROUP BY e ORDER BY e;
5050

51+
// multiple group by
52+
groupByMultiOnText
53+
SELECT gender g, languages l FROM "test_emp" GROUP BY gender, languages ORDER BY gender ASC, languages ASC;
54+
groupByMultiOnTextWithWhereClause
55+
SELECT gender g, languages l FROM "test_emp" WHERE emp_no < 10020 GROUP BY gender, languages ORDER BY gender ASC, languages ASC;
56+
groupByMultiOnTextWithWhereAndLimit
57+
SELECT gender g, languages l FROM "test_emp" WHERE emp_no < 10020 GROUP BY gender, languages ORDER BY gender, languages LIMIT 1;
58+
groupByMultiOnTextOnAlias
59+
SELECT gender g, languages l FROM "test_emp" WHERE emp_no < 10020 GROUP BY g, l ORDER BY gender ASC, languages ASC;
60+
groupByMultiOnTextOnAliasOrderDesc
61+
SELECT gender g, languages l FROM "test_emp" WHERE emp_no < 10020 GROUP BY g, l ORDER BY g, l ASC;
62+
63+
groupByMultiOnDate
64+
SELECT birth_date b, languages l FROM "test_emp" GROUP BY birth_date, languages ORDER BY birth_date DESC, languages;
65+
groupByMultiOnDateWithWhereClause
66+
SELECT birth_date b, languages l FROM "test_emp" WHERE emp_no < 10020 GROUP BY birth_date, languages ORDER BY birth_date DESC, languages;
67+
groupByMultiOnDateWithWhereAndLimit
68+
SELECT birth_date b, languages l FROM "test_emp" WHERE emp_no < 10020 GROUP BY birth_date, languages ORDER BY birth_date DESC, languages LIMIT 1;
69+
groupByMultiOnDateOnAlias
70+
SELECT birth_date b, languages l FROM "test_emp" WHERE emp_no < 10020 GROUP BY b, l ORDER BY birth_date DESC, languages;
71+
72+
groupByMultiAddScalar
73+
SELECT emp_no + 1 AS e, languages + 5 AS l FROM test_emp GROUP BY e, l ORDER BY e, l;
74+
groupByMultiMinScalarDesc
75+
SELECT emp_no - 1 AS e, languages - 5 AS l FROM test_emp GROUP BY e, l ORDER BY e DESC, l;
76+
groupByMultiAddScalarDesc
77+
SELECT emp_no % 2 AS e, languages % 10 AS l FROM test_emp GROUP BY e, l ORDER BY e DESC, l;
78+
groupByMultiMulScalar
79+
SELECT emp_no * 2 AS e, languages * 2 AS l FROM test_emp GROUP BY e, l ORDER BY e, l;
80+
groupByMultiModScalar
81+
SELECT (emp_no % 3) + 1 AS e, (languages % 3) + 1 AS l FROM test_emp GROUP BY e, l ORDER BY e, l;
82+
83+
5184
//
5285
// Aggregate Functions
5386
//
@@ -76,13 +109,21 @@ countDistinct
76109
SELECT COUNT(DISTINCT hire_date) AS count FROM test_emp;
77110
// end::countDistinct
78111

112+
aggCountAliasAndWhereClauseMultiGroupBy
113+
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" WHERE emp_no < 10020 GROUP BY gender, languages ORDER BY gender, languages;
114+
aggCountAliasAndWhereClauseAndLimitMultiGroupBy
115+
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" WHERE emp_no < 10020 GROUP BY gender, languages ORDER BY gender, languages LIMIT 1;
116+
aggCountAliasWithCastAndFilterMultiGroupBy
117+
SELECT gender g, languages l, CAST(COUNT(*) AS INT) c FROM "test_emp" WHERE emp_no < 10020 GROUP BY gender, languages ORDER BY gender, languages;
118+
aggCountWithAliasMultiGroupBy
119+
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" GROUP BY g, l ORDER BY gender, languages;
120+
79121

80122
// Conditional COUNT
81123
aggCountAndHaving
82124
SELECT gender g, COUNT(*) c FROM "test_emp" GROUP BY g HAVING COUNT(*) > 10 ORDER BY gender;
83125
aggCountOnColumnAndHaving
84126
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING COUNT(gender) > 10 ORDER BY gender;
85-
// NOT supported yet since Having introduces a new agg
86127
aggCountOnColumnAndWildcardAndHaving
87128
SELECT gender g, COUNT(*) c FROM "test_emp" GROUP BY g HAVING COUNT(gender) > 10 ORDER BY gender;
88129
aggCountAndHavingOnAlias
@@ -97,15 +138,41 @@ aggCountOnColumnAndHavingBetween
97138
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c BETWEEN 10 AND 70 ORDER BY gender;
98139
aggCountOnColumnAndHavingBetweenWithLimit
99140
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c BETWEEN 10 AND 70 ORDER BY gender LIMIT 1;
100-
101141
aggCountOnColumnAndHavingOnAliasAndFunction
102142
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND COUNT(gender) < 70 ORDER BY gender;
103-
// NOT supported yet since Having introduces a new agg
104143
aggCountOnColumnAndHavingOnAliasAndFunctionWildcard -> COUNT(*/1) vs COUNT(gender)
105144
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND COUNT(*) < 70 ORDER BY gender;
106145
aggCountOnColumnAndHavingOnAliasAndFunctionConstant
107146
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND COUNT(1) < 70 ORDER BY gender;
108147

148+
aggCountAndHavingMultiGroupBy
149+
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" GROUP BY g, l HAVING COUNT(*) > 10 ORDER BY gender, l;
150+
aggCountOnColumnAndHavingMultiGroupBy
151+
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING COUNT(gender) > 10 ORDER BY gender, languages;
152+
aggCountOnSecondColumnAndHavingMultiGroupBy
153+
SELECT gender g, languages l, COUNT(languages) c FROM "test_emp" GROUP BY g, l HAVING COUNT(gender) > 10 ORDER BY gender, languages;
154+
aggCountOnColumnAndWildcardAndHavingMultiGroupBy
155+
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" GROUP BY g, l HAVING COUNT(gender) > 10 ORDER BY gender, languages;
156+
aggCountAndHavingOnAliasMultiGroupBy
157+
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" GROUP BY g, l HAVING c > 10 ORDER BY gender, languages;
158+
aggCountOnColumnAndHavingOnAliasMultiGroupBy
159+
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING c > 10 ORDER BY gender, languages;
160+
aggCountOnColumnAndMultipleHavingMultiGroupBy
161+
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING c > 10 AND c < 70 ORDER BY gender, languages;
162+
aggCountOnColumnAndMultipleHavingWithLimitMultiGroupBy
163+
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING c > 10 AND c < 70 ORDER BY gender, languages LIMIT 1;
164+
aggCountOnColumnAndHavingBetweenMultiGroupBy
165+
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING c BETWEEN 10 AND 70 ORDER BY gender, languages;
166+
aggCountOnColumnAndHavingBetweenWithLimitMultiGroupBy
167+
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING c BETWEEN 10 AND 70 ORDER BY gender, languages LIMIT 1;
168+
169+
aggCountOnColumnAndHavingOnAliasAndFunctionMultiGroupBy
170+
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING c > 10 AND COUNT(gender) < 70 ORDER BY gender, languages;
171+
aggCountOnColumnAndHavingOnAliasAndFunctionWildcardMultiGroupBy -> COUNT(*/1) vs COUNT(gender)
172+
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING c > 10 AND COUNT(*) < 70 ORDER BY gender, languages;
173+
aggCountOnColumnAndHavingOnAliasAndFunctionConstantMultiGroupBy
174+
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING c > 10 AND COUNT(1) < 70 ORDER BY gender, languages;
175+
109176

110177
// MIN
111178
aggMinImplicit
@@ -145,6 +212,22 @@ SELECT gender g, MIN(emp_no) m FROM "test_emp" GROUP BY g HAVING m BETWEEN 10 AN
145212
aggMinWithMultipleHavingOnAliasAndFunction
146213
SELECT gender g, MIN(emp_no) m FROM "test_emp" GROUP BY g HAVING m > 10 AND MIN(emp_no) < 99999 ORDER BY gender;
147214

215+
aggMinWithHavingGroupMultiGroupBy
216+
SELECT gender g, languages l, MIN(emp_no) m FROM "test_emp" GROUP BY g, l HAVING MIN(emp_no) > 10 ORDER BY gender, languages;
217+
aggMinWithHavingOnAliasMultiGroupBy
218+
SELECT gender g, languages l, MIN(emp_no) m FROM "test_emp" GROUP BY g, l HAVING m > 10 ORDER BY gender, languages;
219+
aggMinWithMultipleHavingMultiGroupBy
220+
SELECT gender g, languages l, MIN(emp_no) m FROM "test_emp" GROUP BY g, l HAVING m > 10 AND m < 99999 ORDER BY gender, languages;
221+
aggMinWithMultipleHavingBetweenMultiGroupBy
222+
SELECT gender g, languages l, MIN(emp_no) m FROM "test_emp" GROUP BY g, l HAVING m BETWEEN 10 AND 99999 ORDER BY gender, languages;
223+
aggMinWithMultipleHavingWithLimitMultiGroupBy
224+
SELECT gender g, languages l, MIN(emp_no) m FROM "test_emp" GROUP BY g, l HAVING m > 10 AND m < 99999 ORDER BY g, l LIMIT 1;
225+
aggMinWithMultipleHavingBetweenMultiGroupBy
226+
SELECT gender g, languages l, MIN(emp_no) m FROM "test_emp" GROUP BY g, l HAVING m BETWEEN 10 AND 99999 ORDER BY g, l LIMIT 1;
227+
aggMinWithMultipleHavingOnAliasAndFunctionMultiGroupBy
228+
SELECT gender g, languages l, MIN(emp_no) m FROM "test_emp" GROUP BY g, l HAVING m > 10 AND MIN(emp_no) < 99999 ORDER BY gender, languages;
229+
230+
148231
// MAX
149232
aggMaxImplicit
150233
// tag::max
@@ -253,6 +336,8 @@ SELECT gender g, CAST(AVG(emp_no) AS FLOAT) a FROM "test_emp" GROUP BY g HAVING
253336
//
254337
aggGroupByOnScalarWithHaving
255338
SELECT emp_no + 1 AS e FROM test_emp GROUP BY e HAVING AVG(salary) BETWEEN 1 AND 10010 ORDER BY e;
339+
aggMultiGroupByOnScalarWithHaving
340+
SELECT emp_no + 1 AS e, languages % 10 AS l FROM test_emp GROUP BY e, l HAVING AVG(salary) BETWEEN 1 AND 10010 ORDER BY e, l;
256341

257342
//
258343
// Mixture of Aggs that triggers promotion of aggs to stats
@@ -272,12 +357,34 @@ SELECT MIN(salary) mi, MAX(salary) ma, MAX(salary) - MIN(salary) AS d FROM test_
272357
aggHavingScalarOnAggFunctionsWithoutAliasesInAndNotInGroupBy
273358
SELECT MIN(salary) mi, MAX(salary) ma, MAX(salary) - MIN(salary) AS d FROM test_emp GROUP BY languages HAVING MAX(salary) % MIN(salary) + AVG(salary) > 3000 ORDER BY languages;
274359

360+
aggMultiGroupByMultiIncludingScalarFunction
361+
SELECT MIN(salary) mi, MAX(salary) ma, MAX(salary) - MIN(salary) AS d FROM test_emp GROUP BY gender, languages ORDER BY gender, languages;
362+
aggMultiGroupByHavingWithAggNotInGroupBy
363+
SELECT MIN(salary) mi, MAX(salary) ma, MAX(salary) - MIN(salary) AS d FROM test_emp GROUP BY gender, languages HAVING AVG(salary) > 30000 ORDER BY gender, languages;
364+
aggMultiGroupByHavingWithAliasOnScalarFromGroupBy
365+
SELECT MIN(salary) mi, MAX(salary) ma, MAX(salary) - MIN(salary) AS d FROM test_emp GROUP BY gender, languages HAVING d BETWEEN 50 AND 10000 AND AVG(salary) > 30000 ORDER BY gender, languages;
366+
aggMultiGroupByHavingWithScalarFunctionBasedOnAliasFromGroupBy
367+
SELECT MIN(salary) mi, MAX(salary) ma, MAX(salary) - MIN(salary) AS d FROM test_emp GROUP BY gender, languages HAVING ma % mi > 1 AND AVG(salary) > 30000 ORDER BY gender, languages;
368+
aggMultiGroupByHavingWithMultipleScalarFunctionsBasedOnAliasFromGroupBy
369+
SELECT MIN(salary) mi, MAX(salary) ma, MAX(salary) - MIN(salary) AS d FROM test_emp GROUP BY gender, languages HAVING d - ma % mi > 0 AND AVG(salary) > 30000 ORDER BY gender, languages;
370+
aggMultiGroupByHavingWithMultipleScalarFunctionsBasedOnAliasFromGroupByAndAggNotInGroupBy
371+
SELECT MIN(salary) mi, MAX(salary) ma, MAX(salary) - MIN(salary) AS d FROM test_emp GROUP BY gender, languages HAVING ROUND(d - ABS(ma % mi)) + AVG(salary) > 0 AND AVG(salary) > 30000 ORDER BY gender, languages;
372+
aggMultiGroupByHavingScalarOnAggFunctionsWithoutAliasesInAndNotInGroupBy
373+
SELECT MIN(salary) mi, MAX(salary) ma, MAX(salary) - MIN(salary) AS d FROM test_emp GROUP BY gender, languages HAVING MAX(salary) % MIN(salary) + AVG(salary) > 3000 ORDER BY gender, languages;
374+
375+
275376
//
276377
// Mixture of aggs that get promoted plus filtering on one of them
277378
//
278379
aggMultiWithHaving
279380
SELECT MIN(salary) min, MAX(salary) max, gender g, COUNT(*) c FROM "test_emp" WHERE languages > 0 GROUP BY g HAVING max > 74600 ORDER BY gender;
280381

382+
aggMultiGroupByMultiWithHaving
383+
SELECT MIN(salary) min, MAX(salary) max, gender g, languages l, COUNT(*) c FROM "test_emp" WHERE languages > 0 GROUP BY g, languages HAVING max > 74600 ORDER BY gender, languages;
384+
385+
281386
// filter on count (which is a special agg)
282387
aggMultiWithHavingOnCount
283-
SELECT MIN(salary) min, MAX(salary) max, gender g, COUNT(*) c FROM "test_emp" WHERE languages > 0 GROUP BY g HAVING c > 40 ORDER BY gender;
388+
SELECT MIN(salary) min, MAX(salary) max, gender g, COUNT(*) c FROM "test_emp" WHERE languages > 0 GROUP BY g HAVING c > 40 ORDER BY gender;
389+
aggMultiGroupByMultiWithHavingOnCount
390+
SELECT MIN(salary) min, MAX(salary) max, gender g, languages l, COUNT(*) c FROM "test_emp" WHERE languages > 0 GROUP BY g, languages HAVING c > 40 ORDER BY gender, languages;

x-pack/qa/sql/src/main/resources/docs.csv-spec

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,27 @@ SELECT languages + 1 AS l FROM emp GROUP BY l;
456456
// end::groupByExpression
457457
;
458458

459+
groupByMulti
460+
// tag::groupByMulti
461+
SELECT gender AS g, languages + 1 AS l, COUNT(*) AS c FROM emp GROUP BY gender, l;
462+
463+
g | l | c
464+
---------------+---------------+---------------
465+
F |2 |4
466+
F |3 |8
467+
F |4 |7
468+
F |5 |7
469+
F |6 |11
470+
M |2 |12
471+
M |3 |12
472+
M |4 |15
473+
M |5 |11
474+
M |6 |13
475+
476+
// end::groupByMulti
477+
;
478+
479+
459480
groupByAndAgg
460481
// tag::groupByAndAgg
461482
SELECT gender AS g, COUNT(*) AS c FROM emp GROUP BY gender;

0 commit comments

Comments
 (0)