Skip to content

SQL: Remove restriction for single column grouping #31818

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 6, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/reference/sql/language/syntax/select.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,13 @@ And grouping by column expression (typically used along-side an alias):
include-tagged::{sql-specs}/docs.csv-spec[groupByExpression]
----

Or a mixture of the above:
["source","sql",subs="attributes,callouts,macros"]
----
include-tagged::{sql-specs}/docs.csv-spec[groupByMulti]
----


When a `GROUP BY` clause is used in a `SELECT`, _all_ output expressions must be either aggregate functions or expressions used for grouping or derivatives of (otherwise there would be more than one possible value to return for each ungrouped column).

To wit:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
*/
package org.elasticsearch.xpack.sql.planner;

import org.elasticsearch.xpack.sql.expression.Expressions;
import org.elasticsearch.xpack.sql.plan.physical.AggregateExec;
import org.elasticsearch.xpack.sql.plan.physical.PhysicalPlan;
import org.elasticsearch.xpack.sql.plan.physical.Unexecutable;
import org.elasticsearch.xpack.sql.plan.physical.UnplannedExec;
Expand Down Expand Up @@ -71,23 +69,11 @@ static List<Failure> verifyMappingPlan(PhysicalPlan plan) {
failures.add(fail(e, "Unresolved expression"));
}
});

if (p instanceof AggregateExec) {
forbidMultiFieldGroupBy((AggregateExec) p, failures);
}
});

return failures;
}

private static void forbidMultiFieldGroupBy(AggregateExec a, List<Failure> failures) {
if (a.groupings().size() > 1) {
failures.add(fail(a.groupings().get(0), "Currently, only a single expression can be used with GROUP BY; please select one of "
+ Expressions.names(a.groupings())));
}
}


static List<Failure> verifyExecutingPlan(PhysicalPlan plan) {
List<Failure> failures = new ArrayList<>();

Expand Down

This file was deleted.

115 changes: 111 additions & 4 deletions x-pack/qa/sql/src/main/resources/agg.sql-spec
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,39 @@ SELECT emp_no * 2 AS e FROM test_emp GROUP BY e ORDER BY e;
groupByModScalar
SELECT (emp_no % 3) + 1 AS e FROM test_emp GROUP BY e ORDER BY e;

// multiple group by
groupByMultiOnText
SELECT gender g, languages l FROM "test_emp" GROUP BY gender, languages ORDER BY gender ASC, languages ASC;
groupByMultiOnTextWithWhereClause
SELECT gender g, languages l FROM "test_emp" WHERE emp_no < 10020 GROUP BY gender, languages ORDER BY gender ASC, languages ASC;
groupByMultiOnTextWithWhereAndLimit
SELECT gender g, languages l FROM "test_emp" WHERE emp_no < 10020 GROUP BY gender, languages ORDER BY gender, languages LIMIT 1;
groupByMultiOnTextOnAlias
SELECT gender g, languages l FROM "test_emp" WHERE emp_no < 10020 GROUP BY g, l ORDER BY gender ASC, languages ASC;
groupByMultiOnTextOnAliasOrderDesc
SELECT gender g, languages l FROM "test_emp" WHERE emp_no < 10020 GROUP BY g, l ORDER BY g, l ASC;

groupByMultiOnDate
SELECT birth_date b, languages l FROM "test_emp" GROUP BY birth_date, languages ORDER BY birth_date DESC, languages;
groupByMultiOnDateWithWhereClause
SELECT birth_date b, languages l FROM "test_emp" WHERE emp_no < 10020 GROUP BY birth_date, languages ORDER BY birth_date DESC, languages;
groupByMultiOnDateWithWhereAndLimit
SELECT birth_date b, languages l FROM "test_emp" WHERE emp_no < 10020 GROUP BY birth_date, languages ORDER BY birth_date DESC, languages LIMIT 1;
groupByMultiOnDateOnAlias
SELECT birth_date b, languages l FROM "test_emp" WHERE emp_no < 10020 GROUP BY b, l ORDER BY birth_date DESC, languages;

groupByMultiAddScalar
SELECT emp_no + 1 AS e, languages + 5 AS l FROM test_emp GROUP BY e, l ORDER BY e, l;
groupByMultiMinScalarDesc
SELECT emp_no - 1 AS e, languages - 5 AS l FROM test_emp GROUP BY e, l ORDER BY e DESC, l;
groupByMultiAddScalarDesc
SELECT emp_no % 2 AS e, languages % 10 AS l FROM test_emp GROUP BY e, l ORDER BY e DESC, l;
groupByMultiMulScalar
SELECT emp_no * 2 AS e, languages * 2 AS l FROM test_emp GROUP BY e, l ORDER BY e, l;
groupByMultiModScalar
SELECT (emp_no % 3) + 1 AS e, (languages % 3) + 1 AS l FROM test_emp GROUP BY e, l ORDER BY e, l;


//
// Aggregate Functions
//
Expand Down Expand Up @@ -76,13 +109,21 @@ countDistinct
SELECT COUNT(DISTINCT hire_date) AS count FROM test_emp;
// end::countDistinct

aggCountAliasAndWhereClauseMultiGroupBy
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" WHERE emp_no < 10020 GROUP BY gender, languages ORDER BY gender, languages;
aggCountAliasAndWhereClauseAndLimitMultiGroupBy
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" WHERE emp_no < 10020 GROUP BY gender, languages ORDER BY gender, languages LIMIT 1;
aggCountAliasWithCastAndFilterMultiGroupBy
SELECT gender g, languages l, CAST(COUNT(*) AS INT) c FROM "test_emp" WHERE emp_no < 10020 GROUP BY gender, languages ORDER BY gender, languages;
aggCountWithAliasMultiGroupBy
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" GROUP BY g, l ORDER BY gender, languages;


// Conditional COUNT
aggCountAndHaving
SELECT gender g, COUNT(*) c FROM "test_emp" GROUP BY g HAVING COUNT(*) > 10 ORDER BY gender;
aggCountOnColumnAndHaving
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING COUNT(gender) > 10 ORDER BY gender;
// NOT supported yet since Having introduces a new agg
aggCountOnColumnAndWildcardAndHaving
SELECT gender g, COUNT(*) c FROM "test_emp" GROUP BY g HAVING COUNT(gender) > 10 ORDER BY gender;
aggCountAndHavingOnAlias
Expand All @@ -97,15 +138,41 @@ aggCountOnColumnAndHavingBetween
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c BETWEEN 10 AND 70 ORDER BY gender;
aggCountOnColumnAndHavingBetweenWithLimit
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c BETWEEN 10 AND 70 ORDER BY gender LIMIT 1;

aggCountOnColumnAndHavingOnAliasAndFunction
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND COUNT(gender) < 70 ORDER BY gender;
// NOT supported yet since Having introduces a new agg
aggCountOnColumnAndHavingOnAliasAndFunctionWildcard -> COUNT(*/1) vs COUNT(gender)
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND COUNT(*) < 70 ORDER BY gender;
aggCountOnColumnAndHavingOnAliasAndFunctionConstant
SELECT gender g, COUNT(gender) c FROM "test_emp" GROUP BY g HAVING c > 10 AND COUNT(1) < 70 ORDER BY gender;

aggCountAndHavingMultiGroupBy
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" GROUP BY g, l HAVING COUNT(*) > 10 ORDER BY gender, l;
aggCountOnColumnAndHavingMultiGroupBy
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING COUNT(gender) > 10 ORDER BY gender, languages;
aggCountOnSecondColumnAndHavingMultiGroupBy
SELECT gender g, languages l, COUNT(languages) c FROM "test_emp" GROUP BY g, l HAVING COUNT(gender) > 10 ORDER BY gender, languages;
aggCountOnColumnAndWildcardAndHavingMultiGroupBy
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" GROUP BY g, l HAVING COUNT(gender) > 10 ORDER BY gender, languages;
aggCountAndHavingOnAliasMultiGroupBy
SELECT gender g, languages l, COUNT(*) c FROM "test_emp" GROUP BY g, l HAVING c > 10 ORDER BY gender, languages;
aggCountOnColumnAndHavingOnAliasMultiGroupBy
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING c > 10 ORDER BY gender, languages;
aggCountOnColumnAndMultipleHavingMultiGroupBy
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING c > 10 AND c < 70 ORDER BY gender, languages;
aggCountOnColumnAndMultipleHavingWithLimitMultiGroupBy
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING c > 10 AND c < 70 ORDER BY gender, languages LIMIT 1;
aggCountOnColumnAndHavingBetweenMultiGroupBy
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING c BETWEEN 10 AND 70 ORDER BY gender, languages;
aggCountOnColumnAndHavingBetweenWithLimitMultiGroupBy
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING c BETWEEN 10 AND 70 ORDER BY gender, languages LIMIT 1;

aggCountOnColumnAndHavingOnAliasAndFunctionMultiGroupBy
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING c > 10 AND COUNT(gender) < 70 ORDER BY gender, languages;
aggCountOnColumnAndHavingOnAliasAndFunctionWildcardMultiGroupBy -> COUNT(*/1) vs COUNT(gender)
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING c > 10 AND COUNT(*) < 70 ORDER BY gender, languages;
aggCountOnColumnAndHavingOnAliasAndFunctionConstantMultiGroupBy
SELECT gender g, languages l, COUNT(gender) c FROM "test_emp" GROUP BY g, l HAVING c > 10 AND COUNT(1) < 70 ORDER BY gender, languages;


// MIN
aggMinImplicit
Expand Down Expand Up @@ -145,6 +212,22 @@ SELECT gender g, MIN(emp_no) m FROM "test_emp" GROUP BY g HAVING m BETWEEN 10 AN
aggMinWithMultipleHavingOnAliasAndFunction
SELECT gender g, MIN(emp_no) m FROM "test_emp" GROUP BY g HAVING m > 10 AND MIN(emp_no) < 99999 ORDER BY gender;

aggMinWithHavingGroupMultiGroupBy
SELECT gender g, languages l, MIN(emp_no) m FROM "test_emp" GROUP BY g, l HAVING MIN(emp_no) > 10 ORDER BY gender, languages;
aggMinWithHavingOnAliasMultiGroupBy
SELECT gender g, languages l, MIN(emp_no) m FROM "test_emp" GROUP BY g, l HAVING m > 10 ORDER BY gender, languages;
aggMinWithMultipleHavingMultiGroupBy
SELECT gender g, languages l, MIN(emp_no) m FROM "test_emp" GROUP BY g, l HAVING m > 10 AND m < 99999 ORDER BY gender, languages;
aggMinWithMultipleHavingBetweenMultiGroupBy
SELECT gender g, languages l, MIN(emp_no) m FROM "test_emp" GROUP BY g, l HAVING m BETWEEN 10 AND 99999 ORDER BY gender, languages;
aggMinWithMultipleHavingWithLimitMultiGroupBy
SELECT gender g, languages l, MIN(emp_no) m FROM "test_emp" GROUP BY g, l HAVING m > 10 AND m < 99999 ORDER BY g, l LIMIT 1;
aggMinWithMultipleHavingBetweenMultiGroupBy
SELECT gender g, languages l, MIN(emp_no) m FROM "test_emp" GROUP BY g, l HAVING m BETWEEN 10 AND 99999 ORDER BY g, l LIMIT 1;
aggMinWithMultipleHavingOnAliasAndFunctionMultiGroupBy
SELECT gender g, languages l, MIN(emp_no) m FROM "test_emp" GROUP BY g, l HAVING m > 10 AND MIN(emp_no) < 99999 ORDER BY gender, languages;


// MAX
aggMaxImplicit
// tag::max
Expand Down Expand Up @@ -253,6 +336,8 @@ SELECT gender g, CAST(AVG(emp_no) AS FLOAT) a FROM "test_emp" GROUP BY g HAVING
//
aggGroupByOnScalarWithHaving
SELECT emp_no + 1 AS e FROM test_emp GROUP BY e HAVING AVG(salary) BETWEEN 1 AND 10010 ORDER BY e;
aggMultiGroupByOnScalarWithHaving
SELECT emp_no + 1 AS e, languages % 10 AS l FROM test_emp GROUP BY e, l HAVING AVG(salary) BETWEEN 1 AND 10010 ORDER BY e, l;

//
// Mixture of Aggs that triggers promotion of aggs to stats
Expand All @@ -272,12 +357,34 @@ SELECT MIN(salary) mi, MAX(salary) ma, MAX(salary) - MIN(salary) AS d FROM test_
aggHavingScalarOnAggFunctionsWithoutAliasesInAndNotInGroupBy
SELECT MIN(salary) mi, MAX(salary) ma, MAX(salary) - MIN(salary) AS d FROM test_emp GROUP BY languages HAVING MAX(salary) % MIN(salary) + AVG(salary) > 3000 ORDER BY languages;

aggMultiGroupByMultiIncludingScalarFunction
SELECT MIN(salary) mi, MAX(salary) ma, MAX(salary) - MIN(salary) AS d FROM test_emp GROUP BY gender, languages ORDER BY gender, languages;
aggMultiGroupByHavingWithAggNotInGroupBy
SELECT MIN(salary) mi, MAX(salary) ma, MAX(salary) - MIN(salary) AS d FROM test_emp GROUP BY gender, languages HAVING AVG(salary) > 30000 ORDER BY gender, languages;
aggMultiGroupByHavingWithAliasOnScalarFromGroupBy
SELECT MIN(salary) mi, MAX(salary) ma, MAX(salary) - MIN(salary) AS d FROM test_emp GROUP BY gender, languages HAVING d BETWEEN 50 AND 10000 AND AVG(salary) > 30000 ORDER BY gender, languages;
aggMultiGroupByHavingWithScalarFunctionBasedOnAliasFromGroupBy
SELECT MIN(salary) mi, MAX(salary) ma, MAX(salary) - MIN(salary) AS d FROM test_emp GROUP BY gender, languages HAVING ma % mi > 1 AND AVG(salary) > 30000 ORDER BY gender, languages;
aggMultiGroupByHavingWithMultipleScalarFunctionsBasedOnAliasFromGroupBy
SELECT MIN(salary) mi, MAX(salary) ma, MAX(salary) - MIN(salary) AS d FROM test_emp GROUP BY gender, languages HAVING d - ma % mi > 0 AND AVG(salary) > 30000 ORDER BY gender, languages;
aggMultiGroupByHavingWithMultipleScalarFunctionsBasedOnAliasFromGroupByAndAggNotInGroupBy
SELECT MIN(salary) mi, MAX(salary) ma, MAX(salary) - MIN(salary) AS d FROM test_emp GROUP BY gender, languages HAVING ROUND(d - ABS(ma % mi)) + AVG(salary) > 0 AND AVG(salary) > 30000 ORDER BY gender, languages;
aggMultiGroupByHavingScalarOnAggFunctionsWithoutAliasesInAndNotInGroupBy
SELECT MIN(salary) mi, MAX(salary) ma, MAX(salary) - MIN(salary) AS d FROM test_emp GROUP BY gender, languages HAVING MAX(salary) % MIN(salary) + AVG(salary) > 3000 ORDER BY gender, languages;


//
// Mixture of aggs that get promoted plus filtering on one of them
//
aggMultiWithHaving
SELECT MIN(salary) min, MAX(salary) max, gender g, COUNT(*) c FROM "test_emp" WHERE languages > 0 GROUP BY g HAVING max > 74600 ORDER BY gender;

aggMultiGroupByMultiWithHaving
SELECT MIN(salary) min, MAX(salary) max, gender g, languages l, COUNT(*) c FROM "test_emp" WHERE languages > 0 GROUP BY g, languages HAVING max > 74600 ORDER BY gender, languages;


// filter on count (which is a special agg)
aggMultiWithHavingOnCount
SELECT MIN(salary) min, MAX(salary) max, gender g, COUNT(*) c FROM "test_emp" WHERE languages > 0 GROUP BY g HAVING c > 40 ORDER BY gender;
SELECT MIN(salary) min, MAX(salary) max, gender g, COUNT(*) c FROM "test_emp" WHERE languages > 0 GROUP BY g HAVING c > 40 ORDER BY gender;
aggMultiGroupByMultiWithHavingOnCount
SELECT MIN(salary) min, MAX(salary) max, gender g, languages l, COUNT(*) c FROM "test_emp" WHERE languages > 0 GROUP BY g, languages HAVING c > 40 ORDER BY gender, languages;
21 changes: 21 additions & 0 deletions x-pack/qa/sql/src/main/resources/docs.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,27 @@ SELECT languages + 1 AS l FROM emp GROUP BY l;
// end::groupByExpression
;

groupByMulti
// tag::groupByMulti
SELECT gender AS g, languages + 1 AS l, COUNT(*) AS c FROM emp GROUP BY gender, l;

g | l | c
---------------+---------------+---------------
F |2 |4
F |3 |8
F |4 |7
F |5 |7
F |6 |11
M |2 |12
M |3 |12
M |4 |15
M |5 |11
M |6 |13

// end::groupByMulti
;


groupByAndAgg
// tag::groupByAndAgg
SELECT gender AS g, COUNT(*) AS c FROM emp GROUP BY gender;
Expand Down