Skip to content

Commit f41efd6

Browse files
authored
SQL: Fix ORDER BY YEAR() function (#51562)
Previously, if YEAR() was used as and ORDER BY argument without being wrapped with another scalar (e.g. YEAR(birth_date) + 10), no script ordering was used but instead the underlying field (e.g. birth_date) was used instead as a performance optimisation. This works correctly if YEAR() is the only ORDER BY arg but if further args are used as tie breakers for the ordering wrong results are produced. This is because 2 rows with the different birth_date but on the same year are not tied as the underlying ordering is on birth_date and not on the YEAR(birth_date), and the following ORDER BY args are ignored. Remove this optimisation for YEAR() to avoid incorrect results in such cases. As a consequence another bug is revealed: scalar functions on top of nested fields produce scripted sorting/filtering which is not yet supported. In such cases no error was thrown but instead all values for such nested fields were null and were passed to the script implementing the sorting/filtering, producing incorrect results. Detect such cases and throw a validation exception. Fixes: #51224
1 parent ccc323f commit f41efd6

File tree

10 files changed

+137
-60
lines changed

10 files changed

+137
-60
lines changed

docs/reference/sql/limitations.asciidoc

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,34 @@ For example:
3131
SELECT dep.dep_name.keyword FROM test_emp GROUP BY languages;
3232
--------------------------------------------------
3333

34+
[float]
35+
=== Scalar functions on nested fields are not allowed in `WHERE` and `ORDER BY` clauses
36+
{es-sql} doesn't support the usage of scalar functions on top of nested fields in `WHERE`
37+
and `ORDER BY` clauses with the exception of comparison and logical operators.
38+
39+
For example:
40+
41+
[source, sql]
42+
--------------------------------------------------
43+
SELECT * FROM test_emp WHERE LENGTH(dep.dep_name.keyword) > 5;
44+
--------------------------------------------------
45+
46+
and
47+
48+
[source, sql]
49+
--------------------------------------------------
50+
SELECT * FROM test_emp ORDER BY YEAR(dep.start_date);
51+
--------------------------------------------------
52+
53+
are not supported but:
54+
55+
[source, sql]
56+
--------------------------------------------------
57+
SELECT * FROM test_emp WHERE dep.start_date >= CAST('2020-01-01' AS DATE) OR dep.dep_end_date IS NULL;
58+
--------------------------------------------------
59+
60+
is supported.
61+
3462
[float]
3563
=== Multi-nested fields
3664

x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/expression/function/scalar/ScalarFunction.java

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,17 +39,9 @@ protected ScalarFunction(Source source, List<Expression> fields) {
3939
super(source, fields);
4040
}
4141

42-
// used if the function is monotonic and thus does not have to be computed for ordering purposes
43-
// null means the script needs to be used; expression means the field/expression to be used instead
44-
public Expression orderBy() {
45-
return null;
46-
}
47-
48-
4942
//
5043
// Script generation
5144
//
52-
5345
public ScriptTemplate asScript(Expression exp) {
5446
if (exp.foldable()) {
5547
return scriptWithFoldable(exp);
@@ -73,7 +65,6 @@ public ScriptTemplate asScript(Expression exp) {
7365
throw new QlIllegalArgumentException("Cannot evaluate script for expression {}", exp);
7466
}
7567

76-
7768
protected ScriptTemplate scriptWithFoldable(Expression foldable) {
7869
Object fold = foldable.fold();
7970

@@ -144,4 +135,4 @@ protected String processScript(String script) {
144135
protected String formatTemplate(String template) {
145136
return Scripts.formatTemplate(template);
146137
}
147-
}
138+
}

x-pack/plugin/sql/qa/src/main/resources/datetime.sql-spec

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,12 @@ SELECT first_name FROM test_emp ORDER BY NOW(), first_name NULLS LAST LIMIT 5;
124124
groupByCurrentTimestamp
125125
SELECT MAX(salary) AS max FROM test_emp GROUP BY NOW();
126126

127+
//
128+
// ORDER BY
129+
//
130+
orderByYear
131+
SELECT YEAR(birth_date) as year, emp_no FROM test_emp ORDER BY year NULLS FIRST, emp_no ASC;
132+
127133
// ES will consider a TIME in UTC, if no other indication is given and H2 doesn't consider the timezone for times, so no TZSync'ing needed.
128134
hourFromStringTime
129135
SELECT HOUR(CAST('18:09:03' AS TIME)) AS result;

x-pack/plugin/sql/qa/src/main/resources/docs/docs.csv-spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -939,7 +939,7 @@ SELECT -2 * INTERVAL '3' YEARS AS result;
939939

940940
///////////////////////////////
941941
//
942-
// Order by
942+
// Order By
943943
//
944944
///////////////////////////////
945945

x-pack/plugin/sql/qa/src/main/resources/nested.csv-spec

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -102,27 +102,15 @@ Mayuko | 12
102102
;
103103

104104
selectWithScalarOnNested
105-
SELECT first_name f, last_name l, YEAR(dep.from_date) start FROM test_emp WHERE dep.dep_name = 'Production' AND languages > 1 ORDER BY start LIMIT 5;
105+
SELECT first_name f, last_name l, YEAR(dep.from_date) start FROM test_emp WHERE dep.dep_name = 'Production' AND languages > 1 ORDER BY dep.from_date LIMIT 5;
106106

107107
f:s | l:s | start:i
108108

109-
Sreekrishna |Servieres |1985
110-
Zhongwei |Rosen |1986
111-
Chirstian |Koblick |1986
112-
null |Chappelet |1988
113-
Zvonko |Nyanchama |1989
114-
;
115-
116-
selectWithScalarOnNestedWithoutProjection
117-
SELECT first_name f, last_name l FROM test_emp WHERE dep.dep_name = 'Production' AND languages > 1 ORDER BY YEAR(dep.from_date) LIMIT 5;
118-
119-
f:s | l:s
120-
121-
Sreekrishna |Servieres
122-
Zhongwei |Rosen
123-
Chirstian |Koblick
124-
null |Chappelet
125-
Zvonko |Nyanchama
109+
Sreekrishna |Servieres |1985
110+
Zhongwei |Rosen |1986
111+
Chirstian |Koblick |1986
112+
null |Chappelet |1988
113+
Zvonko |Nyanchama |1989
126114
;
127115

128116
//

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
import org.elasticsearch.xpack.ql.expression.function.grouping.GroupingFunction;
2424
import org.elasticsearch.xpack.ql.expression.function.scalar.ScalarFunction;
2525
import org.elasticsearch.xpack.ql.expression.predicate.fulltext.FullTextPredicate;
26+
import org.elasticsearch.xpack.ql.expression.predicate.logical.BinaryLogic;
27+
import org.elasticsearch.xpack.ql.expression.predicate.logical.Not;
28+
import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.BinaryComparison;
2629
import org.elasticsearch.xpack.ql.plan.logical.Aggregate;
2730
import org.elasticsearch.xpack.ql.plan.logical.Filter;
2831
import org.elasticsearch.xpack.ql.plan.logical.Limit;
@@ -40,6 +43,8 @@
4043
import org.elasticsearch.xpack.sql.expression.function.aggregate.Max;
4144
import org.elasticsearch.xpack.sql.expression.function.aggregate.Min;
4245
import org.elasticsearch.xpack.sql.expression.function.aggregate.TopHits;
46+
import org.elasticsearch.xpack.sql.expression.predicate.nulls.IsNotNull;
47+
import org.elasticsearch.xpack.sql.expression.predicate.nulls.IsNull;
4348
import org.elasticsearch.xpack.sql.plan.logical.Distinct;
4449
import org.elasticsearch.xpack.sql.plan.logical.LocalRelation;
4550
import org.elasticsearch.xpack.sql.plan.logical.Pivot;
@@ -255,7 +260,7 @@ Collection<Failure> verify(LogicalPlan plan) {
255260
}
256261

257262
checkForScoreInsideFunctions(p, localFailures);
258-
checkNestedUsedInGroupByOrHaving(p, localFailures);
263+
checkNestedUsedInGroupByOrHavingOrWhereOrOrderBy(p, localFailures, attributeRefs);
259264
checkForGeoFunctionsOnDocValues(p, localFailures);
260265
checkPivot(p, localFailures, attributeRefs);
261266

@@ -735,33 +740,61 @@ private static void checkForScoreInsideFunctions(LogicalPlan p, Set<Failure> loc
735740
Function.class));
736741
}
737742

738-
private static void checkNestedUsedInGroupByOrHaving(LogicalPlan p, Set<Failure> localFailures) {
743+
private static void checkNestedUsedInGroupByOrHavingOrWhereOrOrderBy(LogicalPlan p, Set<Failure> localFailures,
744+
AttributeMap<Expression> attributeRefs) {
739745
List<FieldAttribute> nested = new ArrayList<>();
740-
Consumer<FieldAttribute> match = fa -> {
746+
Consumer<FieldAttribute> matchNested = fa -> {
741747
if (fa.isNested()) {
742748
nested.add(fa);
743749
}
744750
};
751+
Consumer<Expression> checkForNested = e ->
752+
attributeRefs.getOrDefault(e, e).forEachUp(matchNested, FieldAttribute.class);
753+
Consumer<ScalarFunction> checkForNestedInFunction = f -> f.arguments().forEach(
754+
arg -> arg.forEachUp(matchNested, FieldAttribute.class));
745755

746756
// nested fields shouldn't be used in aggregates or having (yet)
747-
p.forEachDown(a -> a.groupings().forEach(agg -> agg.forEachUp(match, FieldAttribute.class)), Aggregate.class);
748-
757+
p.forEachDown(a -> a.groupings().forEach(agg -> agg.forEachUp(checkForNested)), Aggregate.class);
749758
if (!nested.isEmpty()) {
750759
localFailures.add(
751760
fail(nested.get(0), "Grouping isn't (yet) compatible with nested fields " + new AttributeSet(nested).names()));
752761
nested.clear();
753762
}
754763

755764
// check in having
756-
p.forEachDown(f -> {
757-
if (f.child() instanceof Aggregate) {
758-
f.condition().forEachUp(match, FieldAttribute.class);
759-
}
760-
}, Filter.class);
761-
765+
p.forEachDown(f -> f.forEachDown(a -> f.condition().forEachUp(checkForNested), Aggregate.class), Filter.class);
762766
if (!nested.isEmpty()) {
763767
localFailures.add(
764768
fail(nested.get(0), "HAVING isn't (yet) compatible with nested fields " + new AttributeSet(nested).names()));
769+
nested.clear();
770+
}
771+
772+
// check in where (scalars not allowed)
773+
p.forEachDown(f -> f.condition().forEachUp(e ->
774+
attributeRefs.getOrDefault(e, e).forEachUp(sf -> {
775+
if (sf instanceof BinaryComparison == false &&
776+
sf instanceof IsNull == false &&
777+
sf instanceof IsNotNull == false &&
778+
sf instanceof Not == false &&
779+
sf instanceof BinaryLogic== false) {
780+
checkForNestedInFunction.accept(sf);
781+
}}, ScalarFunction.class)
782+
), Filter.class);
783+
if (!nested.isEmpty()) {
784+
localFailures.add(
785+
fail(nested.get(0), "WHERE isn't (yet) compatible with scalar functions on nested fields " +
786+
new AttributeSet(nested).names()));
787+
nested.clear();
788+
}
789+
790+
// check in order by (scalars not allowed)
791+
p.forEachDown(ob -> ob.order().forEach(o -> o.forEachUp(e ->
792+
attributeRefs.getOrDefault(e, e).forEachUp(checkForNestedInFunction, ScalarFunction.class)
793+
)), OrderBy.class);
794+
if (!nested.isEmpty()) {
795+
localFailures.add(
796+
fail(nested.get(0), "ORDER BY isn't (yet) compatible with scalar functions on nested fields " +
797+
new AttributeSet(nested).names()));
765798
}
766799
}
767800

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/scalar/datetime/Year.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,6 @@ public String dateTimeFormat() {
3939
return "year";
4040
}
4141

42-
@Override
43-
public Expression orderBy() {
44-
return field();
45-
}
46-
4742
@Override
4843
public String calendarInterval() {
4944
return YEAR_INTERVAL;

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -695,21 +695,8 @@ protected PhysicalPlan rule(OrderExec plan) {
695695
// scalar functions typically require script ordering
696696
if (orderExpression instanceof ScalarFunction) {
697697
ScalarFunction sf = (ScalarFunction) orderExpression;
698-
// is there an expression to order by?
699-
if (sf.orderBy() != null) {
700-
Expression orderBy = sf.orderBy();
701-
if (orderBy instanceof NamedExpression) {
702-
orderBy = qContainer.aliases().getOrDefault(orderBy, orderBy);
703-
qContainer = qContainer
704-
.addSort(new AttributeSort(((NamedExpression) orderBy).toAttribute(), direction, missing));
705-
} else if (orderBy.foldable() == false) {
706-
// ignore constant
707-
throw new PlanningException("does not know how to order by expression {}", orderBy);
708-
}
709-
} else {
710-
// nope, use scripted sorting
711-
qContainer = qContainer.addSort(new ScriptSort(sf.asScript(), direction, missing));
712-
}
698+
// nope, use scripted sorting
699+
qContainer = qContainer.addSort(new ScriptSort(sf.asScript(), direction, missing));
713700
}
714701
// score
715702
else if (orderExpression instanceof Score) {

x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,11 +470,46 @@ public void testGroupByOnInexact() {
470470
public void testGroupByOnNested() {
471471
assertEquals("1:38: Grouping isn't (yet) compatible with nested fields [dep.dep_id]",
472472
error("SELECT dep.dep_id FROM test GROUP BY dep.dep_id"));
473+
assertEquals("1:8: Grouping isn't (yet) compatible with nested fields [dep.dep_id]",
474+
error("SELECT dep.dep_id AS a FROM test GROUP BY a"));
475+
assertEquals("1:8: Grouping isn't (yet) compatible with nested fields [dep.dep_id]",
476+
error("SELECT dep.dep_id AS a FROM test GROUP BY 1"));
477+
assertEquals("1:8: Grouping isn't (yet) compatible with nested fields [dep.dep_id, dep.start_date]",
478+
error("SELECT dep.dep_id AS a, dep.start_date AS b FROM test GROUP BY 1, 2"));
479+
assertEquals("1:8: Grouping isn't (yet) compatible with nested fields [dep.dep_id, dep.start_date]",
480+
error("SELECT dep.dep_id AS a, dep.start_date AS b FROM test GROUP BY a, b"));
473481
}
474482

475483
public void testHavingOnNested() {
476484
assertEquals("1:51: HAVING isn't (yet) compatible with nested fields [dep.start_date]",
477485
error("SELECT int FROM test GROUP BY int HAVING AVG(YEAR(dep.start_date)) > 1980"));
486+
assertEquals("1:22: HAVING isn't (yet) compatible with nested fields [dep.start_date]",
487+
error("SELECT int, AVG(YEAR(dep.start_date)) AS average FROM test GROUP BY int HAVING average > 1980"));
488+
assertEquals("1:22: HAVING isn't (yet) compatible with nested fields [dep.start_date, dep.end_date]",
489+
error("SELECT int, AVG(YEAR(dep.start_date)) AS a, MAX(MONTH(dep.end_date)) AS b " +
490+
"FROM test GROUP BY int " +
491+
"HAVING a > 1980 AND b < 10"));
492+
}
493+
494+
public void testWhereOnNested() {
495+
assertEquals("1:33: WHERE isn't (yet) compatible with scalar functions on nested fields [dep.start_date]",
496+
error("SELECT int FROM test WHERE YEAR(dep.start_date) + 10 > 0"));
497+
assertEquals("1:13: WHERE isn't (yet) compatible with scalar functions on nested fields [dep.start_date]",
498+
error("SELECT YEAR(dep.start_date) + 10 AS a FROM test WHERE int > 10 AND (int < 3 OR NOT (a > 5))"));
499+
accept("SELECT int FROM test WHERE dep.start_date > '2020-01-30'::date AND (int > 10 OR dep.end_date IS NULL)");
500+
accept("SELECT int FROM test WHERE dep.start_date > '2020-01-30'::date AND (int > 10 OR dep.end_date IS NULL) " +
501+
"OR NOT(dep.start_date >= '2020-01-01')");
502+
}
503+
504+
public void testOrderByOnNested() {
505+
assertEquals("1:36: ORDER BY isn't (yet) compatible with scalar functions on nested fields [dep.start_date]",
506+
error("SELECT int FROM test ORDER BY YEAR(dep.start_date) + 10"));
507+
assertEquals("1:13: ORDER BY isn't (yet) compatible with scalar functions on nested fields [dep.start_date]",
508+
error("SELECT YEAR(dep.start_date) + 10 FROM test ORDER BY 1"));
509+
assertEquals("1:13: ORDER BY isn't (yet) compatible with scalar functions on nested fields " +
510+
"[dep.start_date, dep.end_date]",
511+
error("SELECT YEAR(dep.start_date) + 10 AS a, MONTH(dep.end_date) - 10 as b FROM test ORDER BY 1, 2"));
512+
accept("SELECT int FROM test ORDER BY dep.start_date, dep.end_date");
478513
}
479514

480515
public void testGroupByScalarFunctionWithAggOnTarget() {

x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,6 +1025,20 @@ public void testGroupByYearAndScalarsQueryTranslator() {
10251025
"\"calendar_interval\":\"1y\",\"time_zone\":\"Z\"}}}]}}}"));
10261026
}
10271027

1028+
public void testOrderByYear() {
1029+
PhysicalPlan p = optimizeAndPlan("SELECT YEAR(date) FROM test ORDER BY 1");
1030+
assertEquals(EsQueryExec.class, p.getClass());
1031+
EsQueryExec eqe = (EsQueryExec) p;
1032+
assertEquals(1, eqe.output().size());
1033+
assertEquals("YEAR(date)", eqe.output().get(0).qualifiedName());
1034+
assertEquals(INTEGER, eqe.output().get(0).dataType());
1035+
assertThat(eqe.queryContainer().toString().replaceAll("\\s+", ""),
1036+
endsWith("\"sort\":[{\"_script\":{\"script\":{\"source\":\"InternalSqlScriptUtils.nullSafeSortNumeric(" +
1037+
"InternalSqlScriptUtils.dateTimeChrono(InternalSqlScriptUtils.docValue(doc,params.v0)," +
1038+
"params.v1,params.v2))\",\"lang\":\"painless\",\"params\":{\"v0\":\"date\",\"v1\":\"Z\"," +
1039+
"\"v2\":\"YEAR\"}},\"type\":\"number\",\"order\":\"asc\"}}]}"));
1040+
}
1041+
10281042
public void testGroupByHistogramWithDate() {
10291043
LogicalPlan p = plan("SELECT MAX(int) FROM test GROUP BY HISTOGRAM(CAST(date AS DATE), INTERVAL 2 MONTHS)");
10301044
assertTrue(p instanceof Aggregate);

0 commit comments

Comments
 (0)