Skip to content

Commit 67704b0

Browse files
author
Andras Palinkas
authored
SQL: Enable the InnerAggregates inside PIVOT (#65792)
* Remove the limitation of not being able to use `InnerAggregate` inside PIVOTs (aggregations using extended and matrix stats) * The limitation was introduced as part of the original `PIVOT` implementation in #46489, but after #49693 it could be lifted. * Test that the `PIVOT` results in the same query as the `GROUP BY`. This should hold across all the `AggregateFunction`s we have.
1 parent 9b47889 commit 67704b0

File tree

4 files changed

+79
-95
lines changed

4 files changed

+79
-95
lines changed

x-pack/plugin/sql/qa/server/src/main/resources/pivot.csv-spec

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,51 @@ null |10044 |Mingsen |F |1994-05-21
201201
// end::sumWithoutSubquery
202202
;
203203

204+
sumWithInnerAggregateSumOfSquares
205+
schema::birth_date:ts|emp_no:i|first_name:s|gender:s|hire_date:ts|last_name:s|1:d|2:d
206+
SELECT * FROM test_emp PIVOT (SUM_OF_SQUARES(salary) FOR languages IN (1, 2)) LIMIT 5;
207+
208+
birth_date | emp_no | first_name | gender | hire_date | last_name | 1 | 2
209+
---------------------+---------------+---------------+---------------+------------------------+---------------+---------------+---------------
210+
null |10041 |Uri |F |1989-11-12T00:00:00.000Z|Lenart |3.182652225E9 |null
211+
null |10043 |Yishay |M |1990-10-20T00:00:00.000Z|Tzvieli |1.179304281E9 |null
212+
null |10044 |Mingsen |F |1994-05-21T00:00:00.000Z|Casley |1.578313984E9 |null
213+
1952-04-19T00:00:00Z |10009 |Sumant |F |1985-02-18T00:00:00.000Z|Peac |4.378998276E9 |null
214+
1953-01-07T00:00:00Z |10067 |Claudi |M |1987-03-04T00:00:00.000Z|Stavenow |null |2.708577936E9
215+
;
216+
217+
sumWithInnerAggregateSumOfSquaresRound
218+
schema::birth_date:ts|emp_no:i|first_name:s|gender:s|hire_date:ts|last_name:s|1:d|2:d
219+
SELECT * FROM test_emp PIVOT (ROUND(SUM_OF_SQUARES(salary)/1E6, 2) FOR languages IN (1, 2)) LIMIT 5;
220+
221+
birth_date | emp_no | first_name | gender | hire_date | last_name | 1 | 2
222+
---------------------+---------------+---------------+---------------+------------------------+---------------+---------------+---------------
223+
null |10041 |Uri |F |1989-11-12T00:00:00.000Z|Lenart |3182.65 |null
224+
null |10043 |Yishay |M |1990-10-20T00:00:00.000Z|Tzvieli |1179.30 |null
225+
null |10044 |Mingsen |F |1994-05-21T00:00:00.000Z|Casley |1578.31 |null
226+
1952-04-19T00:00:00Z |10009 |Sumant |F |1985-02-18T00:00:00.000Z|Peac |4379.00 |null
227+
1953-01-07T00:00:00Z |10067 |Claudi |M |1987-03-04T00:00:00.000Z|Stavenow |null |2708.58
228+
;
229+
230+
sumWithInnerAggregateKurtosis
231+
schema::client_port:i|'OK':d|'Error':d
232+
SELECT * FROM (SELECT client_port, status, bytes_in FROM logs WHERE client_port IS NULL) PIVOT (KURTOSIS(bytes_in) FOR status IN ('OK', 'Error')) LIMIT 10;
233+
234+
client_port | 'OK' | 'Error'
235+
---------------+------------------+---------------
236+
null |2.0016153277578916|NaN
237+
;
238+
239+
sumWithInnerAggregateKurtosisRound
240+
schema::client_port:i|'OK':d|'Error':d
241+
SELECT * FROM (SELECT client_port, status, bytes_in FROM logs WHERE client_port IS NULL) PIVOT (ROUND(KURTOSIS(bytes_in), 3) FOR status IN ('OK', 'Error')) LIMIT 10;
242+
243+
client_port | 'OK' | 'Error'
244+
---------------+------------------+---------------
245+
null |2.002 |-0.0
246+
;
247+
248+
204249
averageWithOneValueAndMath
205250
schema::languages:bt|'F':d
206251
SELECT * FROM (SELECT languages, gender, salary FROM test_emp) PIVOT (ROUND(AVG(salary) / 2) FOR gender IN ('F'));
@@ -214,3 +259,4 @@ null |31070.0
214259
4 |24646.0
215260
5 |23353.0
216261
;
262+

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/Verifier.java

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@
66
package org.elasticsearch.xpack.sql.planner;
77

88
import org.elasticsearch.xpack.ql.common.Failure;
9-
import org.elasticsearch.xpack.ql.expression.function.aggregate.InnerAggregate;
109
import org.elasticsearch.xpack.sql.plan.physical.PhysicalPlan;
11-
import org.elasticsearch.xpack.sql.plan.physical.PivotExec;
1210
import org.elasticsearch.xpack.sql.plan.physical.Unexecutable;
1311
import org.elasticsearch.xpack.sql.plan.physical.UnplannedExec;
1412

@@ -32,22 +30,10 @@ static List<Failure> verifyMappingPlan(PhysicalPlan plan) {
3230
}
3331
});
3432
});
35-
// verify Pivot
36-
checkInnerAggsPivot(plan, failures);
3733

3834
return failures;
3935
}
4036

41-
private static void checkInnerAggsPivot(PhysicalPlan plan, List<Failure> failures) {
42-
plan.forEachDown(p -> {
43-
p.pivot().aggregates().forEach(agg -> agg.forEachDown(e -> {
44-
if (e instanceof InnerAggregate) {
45-
failures.add(fail(e, "Aggregation [{}] not supported (yet) by PIVOT", e.sourceText()));
46-
}
47-
}));
48-
}, PivotExec.class);
49-
}
50-
5137
static List<Failure> verifyExecutingPlan(PhysicalPlan plan) {
5238
List<Failure> failures = new ArrayList<>();
5339

x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/PostOptimizerVerifierTests.java

Lines changed: 0 additions & 77 deletions
This file was deleted.

x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryFolderTests.java

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import org.elasticsearch.test.ESTestCase;
99
import org.elasticsearch.xpack.ql.expression.Expressions;
1010
import org.elasticsearch.xpack.ql.expression.ReferenceAttribute;
11+
import org.elasticsearch.xpack.ql.expression.function.aggregate.AggregateFunction;
1112
import org.elasticsearch.xpack.ql.index.EsIndex;
1213
import org.elasticsearch.xpack.ql.index.IndexResolution;
1314
import org.elasticsearch.xpack.ql.type.EsField;
@@ -20,16 +21,19 @@
2021
import org.elasticsearch.xpack.sql.plan.physical.EsQueryExec;
2122
import org.elasticsearch.xpack.sql.plan.physical.LocalExec;
2223
import org.elasticsearch.xpack.sql.plan.physical.PhysicalPlan;
24+
import org.elasticsearch.xpack.sql.querydsl.container.QueryContainer;
2325
import org.elasticsearch.xpack.sql.session.EmptyExecutable;
2426
import org.elasticsearch.xpack.sql.session.SingletonExecutable;
2527
import org.elasticsearch.xpack.sql.stats.Metrics;
2628
import org.elasticsearch.xpack.sql.types.SqlTypesTests;
2729
import org.junit.AfterClass;
2830
import org.junit.BeforeClass;
2931

30-
import java.util.Arrays;
32+
import java.util.List;
3133
import java.util.Map;
3234

35+
import static java.util.Arrays.asList;
36+
import static java.util.stream.Collectors.toList;
3337
import static org.hamcrest.Matchers.containsString;
3438
import static org.hamcrest.Matchers.endsWith;
3539
import static org.hamcrest.Matchers.startsWith;
@@ -449,15 +453,15 @@ public void testSelectLiteralWithGroupBy() {
449453
assertEquals(EsQueryExec.class, p.getClass());
450454
EsQueryExec ee = (EsQueryExec) p;
451455
assertEquals(2, ee.output().size());
452-
assertEquals(Arrays.asList("1", "MAX(int)"), Expressions.names(ee.output()));
456+
assertEquals(asList("1", "MAX(int)"), Expressions.names(ee.output()));
453457
assertThat(ee.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""),
454458
containsString("\"max\":{\"field\":\"int\""));
455459

456460
p = plan("SELECT 1, count(*) FROM test GROUP BY int");
457461
assertEquals(EsQueryExec.class, p.getClass());
458462
ee = (EsQueryExec) p;
459463
assertEquals(2, ee.output().size());
460-
assertEquals(Arrays.asList("1", "count(*)"), Expressions.names(ee.output()));
464+
assertEquals(asList("1", "count(*)"), Expressions.names(ee.output()));
461465
assertThat(ee.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""),
462466
containsString("\"terms\":{\"field\":\"int\""));
463467
}
@@ -495,14 +499,39 @@ public void testFoldingOfPivot() {
495499
assertEquals(EsQueryExec.class, p.getClass());
496500
EsQueryExec ee = (EsQueryExec) p;
497501
assertEquals(3, ee.output().size());
498-
assertEquals(Arrays.asList("bool", "'A'", "'B'"), Expressions.names(ee.output()));
502+
assertEquals(asList("bool", "'A'", "'B'"), Expressions.names(ee.output()));
499503
String q = ee.toString().replaceAll("\\s+", "");
500504
assertThat(q, containsString("\"query\":{\"terms\":{\"keyword\":[\"A\",\"B\"]"));
501505
String a = ee.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", "");
502506
assertThat(a, containsString("\"terms\":{\"field\":\"bool\""));
503507
assertThat(a, containsString("\"terms\":{\"field\":\"keyword\""));
504508
assertThat(a, containsString("{\"avg\":{\"field\":\"int\"}"));
505509
}
510+
511+
public void testPivotHasSameQueryAsGroupBy() {
512+
final Map<String, String> aggFnsWithMultipleArguments = Map.of(
513+
"PERCENTILE", "PERCENTILE(int, 0)",
514+
"PERCENTILE_RANK", "PERCENTILE_RANK(int, 0)"
515+
);
516+
List<String> aggregations = new SqlFunctionRegistry().listFunctions()
517+
.stream()
518+
.filter(def -> AggregateFunction.class.isAssignableFrom(def.clazz()))
519+
.map(def -> aggFnsWithMultipleArguments.getOrDefault(def.name(), def.name() + "(int)"))
520+
.collect(toList());
521+
for (String aggregationStr : aggregations) {
522+
PhysicalPlan pivotPlan = plan("SELECT * FROM (SELECT some.dotted.field, bool, keyword, int FROM test) " +
523+
"PIVOT(" + aggregationStr + " FOR keyword IN ('A', 'B'))");
524+
PhysicalPlan groupByPlan = plan("SELECT some.dotted.field, bool, keyword, " + aggregationStr + " " +
525+
"FROM test WHERE keyword IN ('A', 'B') GROUP BY some.dotted.field, bool, keyword");
526+
assertEquals(EsQueryExec.class, pivotPlan.getClass());
527+
assertEquals(EsQueryExec.class, groupByPlan.getClass());
528+
QueryContainer pivotQueryContainer = ((EsQueryExec) pivotPlan).queryContainer();
529+
QueryContainer groupByQueryContainer = ((EsQueryExec) groupByPlan).queryContainer();
530+
assertEquals(pivotQueryContainer.query(), groupByQueryContainer.query());
531+
assertEquals(pivotQueryContainer.aggs(), groupByQueryContainer.aggs());
532+
assertEquals(pivotPlan.toString(), groupByPlan.toString());
533+
}
534+
}
506535

507536
private static String randomOrderByAndLimit(int noOfSelectArgs) {
508537
return SqlTestUtils.randomOrderByAndLimit(noOfSelectArgs, random());

0 commit comments

Comments
 (0)