Skip to content

Commit 38f8123

Browse files
authored
SQL: implement SUM, MIN, MAX, AVG over literals (elastic#56786) (elastic#56851)
* Adds support for MIN, MAX, AVG, SUM aggregates acting on literals. SELECT SUM(1) FROM index and SELECT SUM(1), AVG(2) work both on indices and as local execution. (cherry picked from commit efb7290)
1 parent bfd29fb commit 38f8123

File tree

6 files changed

+217
-5
lines changed

6 files changed

+217
-5
lines changed

x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/expression/function/scalar/ScalarFunction.java

-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,6 @@ protected ScriptTemplate scriptWithFoldable(Expression foldable) {
9595
return new ScriptTemplate(processScript("{sql}.stWktToSql({})"), paramsBuilder().variable(fold.toString()).build(), dataType());
9696
}
9797

98-
9998
return new ScriptTemplate(processScript("{}"),
10099
paramsBuilder().variable(fold).build(),
101100
dataType());

x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec

+85
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,91 @@ F |F |1666196|1666196 |1666196
175175
M |M |2671054|2671054 |2671054
176176
;
177177

178+
sumLiteralWithTrueConditionAndHavingWithCount
179+
SELECT SUM(1) AS c FROM test_emp WHERE 'a'='a' HAVING COUNT(1) > 0;
180+
181+
c:i
182+
---------------
183+
100
184+
;
185+
186+
sumLiteralWithTwoConditionsAndGroupByField
187+
SELECT SUM(10) AS s10, birth_date, SUM(1) AS c FROM test_emp WHERE (birth_date >= {ts '1959-01-01 00:00:00'}) AND (birth_date <= {ts '1959-12-31 23:59:59'}) GROUP BY 2;
188+
189+
s10:l | birth_date:ts | c:l
190+
---------------+------------------------+---------------
191+
10 |1959-01-27T00:00:00.000Z|1
192+
10 |1959-04-07T00:00:00.000Z|1
193+
20 |1959-07-23T00:00:00.000Z|2
194+
10 |1959-08-10T00:00:00.000Z|1
195+
10 |1959-08-19T00:00:00.000Z|1
196+
10 |1959-10-01T00:00:00.000Z|1
197+
10 |1959-12-03T00:00:00.000Z|1
198+
10 |1959-12-25T00:00:00.000Z|1
199+
;
200+
201+
sumLiteralWithGroupByAndTwoConditionsOnSum
202+
SELECT first_name, SUM(1) AS c FROM test_emp GROUP BY 1 HAVING ((SUM(1) >= 0) AND (SUM(1) <= 577)) LIMIT 10;
203+
204+
first_name:s | c:l
205+
---------------+---------------
206+
null |10
207+
Alejandro |1
208+
Amabile |1
209+
Anneke |1
210+
Anoosh |1
211+
Arumugam |1
212+
Basil |1
213+
Berhard |1
214+
Berni |1
215+
Bezalel |1
216+
;
217+
218+
sumFieldWithSumLiteralAsCondition
219+
SELECT first_name, last_name, SUM(salary) AS s, birth_date AS y, COUNT(1) FROM test_emp GROUP BY 1, 2, 4 HAVING ((SUM(1) >= 1) AND (SUM(1) <= 577)) AND ((SUM(salary) >= 35000) AND (SUM(salary) <= 45000));
220+
221+
first_name:s | last_name:s | s:i | y:ts | COUNT(1):l
222+
---------------+---------------+---------------+------------------------+---------------
223+
null |Brender |36051 |1959-10-01T00:00:00.000Z|1
224+
null |Joslin |37716 |1959-01-27T00:00:00.000Z|1
225+
null |Lortz |35222 |1960-07-20T00:00:00.000Z|1
226+
null |Makrucki |37691 |1963-07-22T00:00:00.000Z|1
227+
null |Swan |39878 |1962-12-29T00:00:00.000Z|1
228+
Alejandro |McAlpine |44307 |1953-09-19T00:00:00.000Z|1
229+
Amabile |Gomatam |38645 |1955-10-04T00:00:00.000Z|1
230+
Basil |Tramer |37853 |null |1
231+
Berhard |McFarlin |38376 |1954-10-01T00:00:00.000Z|1
232+
Berni |Genin |37137 |1956-02-12T00:00:00.000Z|1
233+
Chirstian |Koblick |36174 |1954-05-01T00:00:00.000Z|1
234+
Domenick |Tempesti |39356 |1963-11-26T00:00:00.000Z|1
235+
Hilari |Morton |37702 |1965-01-03T00:00:00.000Z|1
236+
Hisao |Lipner |40612 |1958-01-21T00:00:00.000Z|1
237+
Jayson |Mandell |43889 |1954-09-16T00:00:00.000Z|1
238+
Jungsoon |Syrzycki |39638 |1954-02-25T00:00:00.000Z|1
239+
Kendra |Hofting |44956 |1961-05-30T00:00:00.000Z|1
240+
Kenroku |Malabarba |35742 |1962-11-07T00:00:00.000Z|1
241+
Margareta |Bierman |41933 |1960-09-06T00:00:00.000Z|1
242+
Mayuko |Warwick |40031 |1952-12-24T00:00:00.000Z|1
243+
Mingsen |Casley |39728 |null |1
244+
Mokhtar |Bernatsky |38992 |1955-08-28T00:00:00.000Z|1
245+
Saniya |Kalloufi |43906 |1958-02-19T00:00:00.000Z|1
246+
Sreekrishna |Servieres |44817 |1961-09-23T00:00:00.000Z|1
247+
Sudharsan |Flasterstein |43602 |1963-03-21T00:00:00.000Z|1
248+
Vishv |Zockler |39110 |1959-07-23T00:00:00.000Z|1
249+
Weiyi |Meriste |37112 |null |1
250+
Yinghua |Dredge |43026 |1958-05-21T00:00:00.000Z|1
251+
Zvonko |Nyanchama |42716 |null |1
252+
;
253+
254+
mirrorIifForNumericAggregate
255+
SELECT IIF(COUNT(1)=0, NULL, 123)+5, AVG(123), MIN(123)+5, IIF(COUNT(1)=0, NULL, 30*COUNT(1)), SUM(30) FROM test_emp;
256+
257+
IIF(COUNT(1)=0, NULL, 123)+5:i| AVG(123):d | MIN(123)+5:i |IIF(COUNT(1)=0, NULL, 30*COUNT(1)):l| SUM(30):l
258+
------------------------------+-----------------+-----------------+------------------------------------+---------------
259+
128 |123 |128 |3000 |3000
260+
;
261+
262+
178263
aggByComplexCastedValue
179264
SELECT CONVERT(CONCAT(LTRIM(CONVERT("emp_no", SQL_VARCHAR)), LTRIM(CONVERT("languages", SQL_VARCHAR))), SQL_BIGINT) AS "TEMP"
180265
FROM "test_emp" GROUP BY "TEMP" ORDER BY "TEMP" LIMIT 20;

x-pack/plugin/sql/qa/src/main/resources/agg.sql-spec

+16
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,22 @@ SELECT gender g, languages l, COUNT(*) c FROM "test_emp" GROUP BY g, l ORDER BY
178178
aggCountDistinctWithAliasAndGroupBy
179179
SELECT COUNT(*) cnt, COUNT(DISTINCT first_name) as names, gender FROM test_emp GROUP BY gender ORDER BY gender;
180180

181+
localSum
182+
SELECT CAST(SUM(1) AS BIGINT);
183+
localSumWithAlias
184+
SELECT CAST(SUM(1) AS BIGINT) AS s, CAST(SUM(1) AS BIGINT);
185+
localMax
186+
SELECT MAX(1);
187+
localAggregates
188+
SELECT CAST(SUM(1) AS BIGINT), CAST(SUM(123) AS BIGINT), MAX(1), MAX(32), MIN(3), MIN(55+2) AS mn, CAST(AVG(33/3) AS INTEGER) AS av, CAST(AVG(1) AS INTEGER);
189+
aggregatesOfLiteralsFromIndex
190+
SELECT MAX(1), MIN(1), CAST(SUM(1) AS BIGINT), CAST(AVG(1) AS INTEGER), COUNT(1) FROM test_emp;
191+
aggregatesOfLiteralsFromIndex_WithNoMatchingFilter
192+
SELECT MAX(1), MIN(1), CAST(SUM(1) AS BIGINT), CAST(AVG(1) AS INTEGER), COUNT(1) FROM test_emp WHERE gender='123';
193+
sumOfLiteralInHavingOnly
194+
SELECT gender, COUNT(*) FROM test_emp GROUP BY gender HAVING SUM(10) > 200 ORDER BY gender;
195+
sumLiteralAndSumFieldWithComplexHaving
196+
SELECT gender, CAST(SUM("salary") AS BIGINT), CAST(SUM(1) AS BIGINT), CAST(SUM(10) AS BIGINT), COUNT(*) FROM test_emp GROUP BY gender HAVING ((SUM(1) >= 0) AND (SUM(1) <= 50) AND (SUM(salary) >= 250000) AND (SUM(salary) <= 5000000)) ORDER BY gender;
181197

182198

183199
// Conditional COUNT

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/NumericAggregate.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
import static org.elasticsearch.xpack.ql.expression.TypeResolutions.isNumeric;
1818

19-
abstract class NumericAggregate extends AggregateFunction {
19+
public abstract class NumericAggregate extends AggregateFunction {
2020

2121
NumericAggregate(Source source, Expression field, List<Expression> parameters) {
2222
super(source, field, parameters);

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/optimizer/Optimizer.java

+57-3
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.elasticsearch.xpack.ql.expression.UnresolvedAttribute;
2121
import org.elasticsearch.xpack.ql.expression.function.Function;
2222
import org.elasticsearch.xpack.ql.expression.function.aggregate.AggregateFunction;
23+
import org.elasticsearch.xpack.ql.expression.function.aggregate.Count;
2324
import org.elasticsearch.xpack.ql.expression.function.aggregate.InnerAggregate;
2425
import org.elasticsearch.xpack.ql.expression.predicate.logical.And;
2526
import org.elasticsearch.xpack.ql.expression.predicate.logical.Or;
@@ -58,6 +59,7 @@
5859
import org.elasticsearch.xpack.ql.util.Holder;
5960
import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
6061
import org.elasticsearch.xpack.sql.analysis.analyzer.Analyzer.CleanAliases;
62+
import org.elasticsearch.xpack.sql.expression.function.aggregate.Avg;
6163
import org.elasticsearch.xpack.sql.expression.function.aggregate.ExtendedStats;
6264
import org.elasticsearch.xpack.sql.expression.function.aggregate.ExtendedStatsEnclosed;
6365
import org.elasticsearch.xpack.sql.expression.function.aggregate.First;
@@ -66,17 +68,21 @@
6668
import org.elasticsearch.xpack.sql.expression.function.aggregate.MatrixStatsEnclosed;
6769
import org.elasticsearch.xpack.sql.expression.function.aggregate.Max;
6870
import org.elasticsearch.xpack.sql.expression.function.aggregate.Min;
71+
import org.elasticsearch.xpack.sql.expression.function.aggregate.NumericAggregate;
6972
import org.elasticsearch.xpack.sql.expression.function.aggregate.Percentile;
7073
import org.elasticsearch.xpack.sql.expression.function.aggregate.PercentileRank;
7174
import org.elasticsearch.xpack.sql.expression.function.aggregate.PercentileRanks;
7275
import org.elasticsearch.xpack.sql.expression.function.aggregate.Percentiles;
7376
import org.elasticsearch.xpack.sql.expression.function.aggregate.Stats;
77+
import org.elasticsearch.xpack.sql.expression.function.aggregate.Sum;
7478
import org.elasticsearch.xpack.sql.expression.function.aggregate.TopHits;
7579
import org.elasticsearch.xpack.sql.expression.function.scalar.Cast;
7680
import org.elasticsearch.xpack.sql.expression.predicate.conditional.ArbitraryConditionalFunction;
7781
import org.elasticsearch.xpack.sql.expression.predicate.conditional.Case;
7882
import org.elasticsearch.xpack.sql.expression.predicate.conditional.Coalesce;
7983
import org.elasticsearch.xpack.sql.expression.predicate.conditional.IfConditional;
84+
import org.elasticsearch.xpack.sql.expression.predicate.conditional.Iif;
85+
import org.elasticsearch.xpack.sql.expression.predicate.operator.arithmetic.Mul;
8086
import org.elasticsearch.xpack.sql.expression.predicate.operator.comparison.In;
8187
import org.elasticsearch.xpack.sql.plan.logical.LocalRelation;
8288
import org.elasticsearch.xpack.sql.plan.logical.Pivot;
@@ -119,7 +125,10 @@ protected Iterable<RuleExecutor<LogicalPlan>.Batch> batches() {
119125
new RewritePivot());
120126

121127
Batch refs = new Batch("Replace References", Limiter.ONCE,
122-
new ReplaceReferenceAttributeWithSource());
128+
new ReplaceReferenceAttributeWithSource(),
129+
new ReplaceAggregatesWithLiterals(),
130+
new ReplaceCountInLocalRelation()
131+
);
123132

124133
Batch operators = new Batch("Operator Optimization",
125134
// combining
@@ -776,6 +785,52 @@ private Expression simplify(BinaryComparison bc) {
776785
}
777786
}
778787

788+
/**
789+
* Any numeric aggregates (avg, min, max, sum) acting on literals are converted to an iif(count(1)=0, null, literal*count(1)) for sum,
790+
* and to iif(count(1)=0,null,literal) for the other three.
791+
*/
792+
private static class ReplaceAggregatesWithLiterals extends OptimizerRule<LogicalPlan> {
793+
794+
@Override
795+
protected LogicalPlan rule(LogicalPlan p) {
796+
return p.transformExpressionsDown(e -> {
797+
if (e instanceof Min || e instanceof Max || e instanceof Avg || e instanceof Sum) {
798+
NumericAggregate a = (NumericAggregate) e;
799+
800+
if (a.field().foldable()) {
801+
Expression countOne = new Count(a.source(), new Literal(Source.EMPTY, 1, a.dataType()), false);
802+
Equals countEqZero = new Equals(a.source(), countOne, new Literal(Source.EMPTY, 0, a.dataType()));
803+
Expression argument = a.field();
804+
Literal foldedArgument = new Literal(argument.source(), argument.fold(), a.dataType());
805+
806+
Expression iifElseResult = foldedArgument;
807+
if (e instanceof Sum) {
808+
iifElseResult = new Mul(a.source(), countOne, foldedArgument);
809+
}
810+
811+
return new Iif(a.source(), countEqZero, Literal.NULL, iifElseResult);
812+
}
813+
}
814+
return e;
815+
});
816+
}
817+
}
818+
819+
/**
820+
* A COUNT in a local relation will always be 1.
821+
*/
822+
private static class ReplaceCountInLocalRelation extends OptimizerRule<Aggregate> {
823+
824+
@Override
825+
protected LogicalPlan rule(Aggregate a) {
826+
boolean hasLocalRelation = a.anyMatch(LocalRelation.class::isInstance);
827+
828+
return hasLocalRelation ? a.transformExpressionsDown(c -> {
829+
return c instanceof Count ? new Literal(c.source(), 1, c.dataType()) : c;
830+
}) : a;
831+
}
832+
}
833+
779834
static class ReplaceAggsWithMatrixStats extends OptimizerBasicRule {
780835

781836
@Override
@@ -1157,8 +1212,7 @@ private List<Object> extractConstants(List<? extends NamedExpression> named) {
11571212
}
11581213
} else if (n.foldable()) {
11591214
values.add(n.fold());
1160-
}
1161-
else {
1215+
} else {
11621216
// not everything is foldable, bail-out early
11631217
return values;
11641218
}

x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryFolderTests.java

+58
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,64 @@ public void testLocalExecWithoutFromClauseWithPrunedFilter() {
175175
assertThat(ee.output().get(0).toString(), startsWith("E(){r}#"));
176176
}
177177

178+
public void testLocalExecWithAggs() {
179+
PhysicalPlan p = plan("SELECT MIN(10), MAX(123), SUM(1), AVG(0)");
180+
assertEquals(LocalExec.class, p.getClass());
181+
LocalExec le = (LocalExec) p;
182+
assertEquals(SingletonExecutable.class, le.executable().getClass());
183+
SingletonExecutable ee = (SingletonExecutable) le.executable();
184+
assertEquals(4, ee.output().size());
185+
assertThat(ee.output().get(0).toString(), startsWith("MIN(10){r}#"));
186+
assertThat(ee.output().get(1).toString(), startsWith("MAX(123){r}#"));
187+
assertThat(ee.output().get(2).toString(), startsWith("SUM(1){r}#"));
188+
assertThat(ee.output().get(3).toString(), startsWith("AVG(0){r}#"));
189+
}
190+
191+
public void testLocalExecWithAggsAndWhereFalseFilter() {
192+
PhysicalPlan p = plan("SELECT SUM(10) WHERE 2 > 3");
193+
assertEquals(LocalExec.class, p.getClass());
194+
LocalExec le = (LocalExec) p;
195+
assertEquals(EmptyExecutable.class, le.executable().getClass());
196+
EmptyExecutable ee = (EmptyExecutable) le.executable();
197+
assertEquals(1, ee.output().size());
198+
assertThat(ee.output().get(0).toString(), startsWith("SUM(10){r}#"));
199+
}
200+
201+
public void testLocalExecWithAggsAndWhereTrueFilter() {
202+
PhysicalPlan p = plan("SELECT MIN(10), MAX(123), SUM(1), AVG(0) WHERE 1 = 1");
203+
assertEquals(LocalExec.class, p.getClass());
204+
LocalExec le = (LocalExec) p;
205+
assertEquals(SingletonExecutable.class, le.executable().getClass());
206+
SingletonExecutable ee = (SingletonExecutable) le.executable();
207+
assertEquals(4, ee.output().size());
208+
assertThat(ee.output().get(0).toString(), startsWith("MIN(10){r}#"));
209+
assertThat(ee.output().get(1).toString(), startsWith("MAX(123){r}#"));
210+
assertThat(ee.output().get(2).toString(), startsWith("SUM(1){r}#"));
211+
assertThat(ee.output().get(3).toString(), startsWith("AVG(0){r}#"));
212+
}
213+
214+
public void testLocalExecWithAggsAndWhereTrueFilterAndOrderBy() {
215+
PhysicalPlan p = plan("SELECT MAX(23), SUM(1) WHERE 1 = 1 ORDER BY 1, 2 DESC");
216+
assertEquals(LocalExec.class, p.getClass());
217+
LocalExec le = (LocalExec) p;
218+
assertEquals(SingletonExecutable.class, le.executable().getClass());
219+
SingletonExecutable ee = (SingletonExecutable) le.executable();
220+
assertEquals(2, ee.output().size());
221+
assertThat(ee.output().get(0).toString(), startsWith("MAX(23){r}#"));
222+
assertThat(ee.output().get(1).toString(), startsWith("SUM(1){r}#"));
223+
}
224+
225+
public void testLocalExecWithAggsAndWhereTrueFilterAndOrderByAndLimit() {
226+
PhysicalPlan p = plan("SELECT AVG(10), SUM(2) WHERE 1 = 1 ORDER BY 1, 2 DESC LIMIT 5");
227+
assertEquals(LocalExec.class, p.getClass());
228+
LocalExec le = (LocalExec) p;
229+
assertEquals(SingletonExecutable.class, le.executable().getClass());
230+
SingletonExecutable ee = (SingletonExecutable) le.executable();
231+
assertEquals(2, ee.output().size());
232+
assertThat(ee.output().get(0).toString(), startsWith("AVG(10){r}#"));
233+
assertThat(ee.output().get(1).toString(), startsWith("SUM(2){r}#"));
234+
}
235+
178236
public void testFoldingOfIsNull() {
179237
PhysicalPlan p = plan("SELECT keyword FROM test WHERE (keyword IS NOT NULL) IS NULL");
180238
assertEquals(LocalExec.class, p.getClass());

0 commit comments

Comments
 (0)