Skip to content

Commit 10ab391

Browse files
authored
SQL: Enhance checks for inexact fields (#39427)
For functions: move checks for `text` fields without underlying `keyword` fields or with many of them (ambiguity) to the type resolution stage. For Order By/Group By: move checks to the `Verifier` to catch early before `QueryTranslator` or execution. Closes: #38501 Fixes: #35203
1 parent 29938b1 commit 10ab391

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+509
-282
lines changed

x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2348,25 +2348,25 @@ Arumugam
23482348
////////////
23492349
limitationSubSelect
23502350
// tag::limitationSubSelect
2351-
SELECT * FROM (SELECT first_name, last_name FROM emp WHERE last_name NOT LIKE '%a%') WHERE first_name LIKE 'A%';
2351+
SELECT * FROM (SELECT first_name, last_name FROM emp WHERE last_name NOT LIKE '%a%') WHERE first_name LIKE 'A%' ORDER BY 1;
23522352

23532353
first_name | last_name
23542354
---------------+---------------
2355-
Anneke |Preusig
2356-
Alejandro |McAlpine
2357-
Anoosh |Peyn
2358-
Arumugam |Ossenbruggen
2355+
Alejandro |McAlpine
2356+
Anneke |Preusig
2357+
Anoosh |Peyn
2358+
Arumugam |Ossenbruggen
23592359
// end::limitationSubSelect
23602360
;
23612361

2362-
limitationSubSelect
2362+
limitationSubSelectRewritten
23632363
// tag::limitationSubSelectRewritten
2364-
SELECT first_name, last_name FROM emp WHERE last_name NOT LIKE '%a%' AND first_name LIKE 'A%';
2364+
SELECT first_name, last_name FROM emp WHERE last_name NOT LIKE '%a%' AND first_name LIKE 'A%' ORDER BY 1;
23652365
// end::limitationSubSelectRewritten
23662366
first_name | last_name
23672367
---------------+---------------
2368-
Anneke |Preusig
2369-
Alejandro |McAlpine
2370-
Anoosh |Peyn
2371-
Arumugam |Ossenbruggen
2368+
Alejandro |McAlpine
2369+
Anneke |Preusig
2370+
Anoosh |Peyn
2371+
Arumugam |Ossenbruggen
23722372
;

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import org.elasticsearch.xpack.sql.stats.Metrics;
4242
import org.elasticsearch.xpack.sql.tree.Node;
4343
import org.elasticsearch.xpack.sql.type.DataType;
44+
import org.elasticsearch.xpack.sql.type.EsField;
4445
import org.elasticsearch.xpack.sql.util.StringUtils;
4546

4647
import java.util.ArrayList;
@@ -294,7 +295,8 @@ Collection<Failure> verify(LogicalPlan plan) {
294295
*/
295296
private static boolean checkGroupBy(LogicalPlan p, Set<Failure> localFailures,
296297
Map<String, Function> resolvedFunctions, Set<LogicalPlan> groupingFailures) {
297-
return checkGroupByAgg(p, localFailures, resolvedFunctions)
298+
return checkGroupByInexactField(p, localFailures)
299+
&& checkGroupByAgg(p, localFailures, resolvedFunctions)
298300
&& checkGroupByOrder(p, localFailures, groupingFailures)
299301
&& checkGroupByHaving(p, localFailures, groupingFailures, resolvedFunctions);
300302
}
@@ -463,6 +465,21 @@ private static boolean checkGroupByHavingHasOnlyAggs(Expression e, Set<Expressio
463465
return false;
464466
}
465467

468+
private static boolean checkGroupByInexactField(LogicalPlan p, Set<Failure> localFailures) {
469+
if (p instanceof Aggregate) {
470+
Aggregate a = (Aggregate) p;
471+
472+
// The grouping can not be an aggregate function or an inexact field (e.g. text without a keyword)
473+
a.groupings().forEach(e -> e.forEachUp(c -> {
474+
EsField.Exact exact = c.getExactInfo();
475+
if (exact.hasExact() == false) {
476+
localFailures.add(fail(c, "Field of data type [" + c.dataType().typeName + "] cannot be used for grouping; " +
477+
exact.errorMsg()));
478+
}
479+
}, FieldAttribute.class));
480+
}
481+
return true;
482+
}
466483

467484
// check whether plain columns specified in an agg are mentioned in the group-by
468485
private static boolean checkGroupByAgg(LogicalPlan p, Set<Failure> localFailures, Map<String, Function> functions) {

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/SourceGenerator.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333

3434
public abstract class SourceGenerator {
3535

36+
private SourceGenerator() {}
37+
3638
private static final List<String> NO_STORED_FIELD = singletonList(StoredFieldsContext._NONE_);
3739

3840
public static SearchSourceBuilder sourceBuilder(QueryContainer container, QueryBuilder filter, Integer size) {
@@ -107,8 +109,7 @@ private static void sorting(QueryContainer container, SearchSourceBuilder source
107109

108110
// sorting only works on not-analyzed fields - look for a multi-field replacement
109111
if (attr instanceof FieldAttribute) {
110-
FieldAttribute fa = (FieldAttribute) attr;
111-
fa = fa.isInexact() ? fa.exactAttribute() : fa;
112+
FieldAttribute fa = ((FieldAttribute) attr).exactAttribute();
112113

113114
sortBuilder = fieldSort(fa.name())
114115
.missing(as.missing().position())
@@ -125,7 +126,8 @@ private static void sorting(QueryContainer container, SearchSourceBuilder source
125126
if (nestedSort == null) {
126127
fieldSort.setNestedSort(newSort);
127128
} else {
128-
for (; nestedSort.getNestedSort() != null; nestedSort = nestedSort.getNestedSort()) {
129+
while (nestedSort.getNestedSort() != null) {
130+
nestedSort = nestedSort.getNestedSort();
129131
}
130132
nestedSort.setNestedSort(newSort);
131133
}

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/Expressions.java

Lines changed: 0 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,16 @@
66
package org.elasticsearch.xpack.sql.expression;
77

88
import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
9-
import org.elasticsearch.xpack.sql.expression.Expression.TypeResolution;
109
import org.elasticsearch.xpack.sql.expression.gen.pipeline.Pipe;
1110
import org.elasticsearch.xpack.sql.type.DataType;
12-
import org.elasticsearch.xpack.sql.type.DataTypes;
1311

1412
import java.util.ArrayList;
1513
import java.util.Collection;
1614
import java.util.List;
17-
import java.util.Locale;
18-
import java.util.StringJoiner;
1915
import java.util.function.Predicate;
2016

2117
import static java.util.Collections.emptyList;
2218
import static java.util.Collections.emptyMap;
23-
import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
24-
import static org.elasticsearch.xpack.sql.type.DataType.BOOLEAN;
2519

2620
public final class Expressions {
2721

@@ -154,55 +148,4 @@ public static List<Pipe> pipe(List<Expression> expressions) {
154148
}
155149
return pipes;
156150
}
157-
158-
public static TypeResolution typeMustBeBoolean(Expression e, String operationName, ParamOrdinal paramOrd) {
159-
return typeMustBe(e, dt -> dt == BOOLEAN, operationName, paramOrd, "boolean");
160-
}
161-
162-
public static TypeResolution typeMustBeInteger(Expression e, String operationName, ParamOrdinal paramOrd) {
163-
return typeMustBe(e, DataType::isInteger, operationName, paramOrd, "integer");
164-
}
165-
166-
public static TypeResolution typeMustBeNumeric(Expression e, String operationName, ParamOrdinal paramOrd) {
167-
return typeMustBe(e, DataType::isNumeric, operationName, paramOrd, "numeric");
168-
}
169-
170-
public static TypeResolution typeMustBeString(Expression e, String operationName, ParamOrdinal paramOrd) {
171-
return typeMustBe(e, DataType::isString, operationName, paramOrd, "string");
172-
}
173-
174-
public static TypeResolution typeMustBeDate(Expression e, String operationName, ParamOrdinal paramOrd) {
175-
return typeMustBe(e, DataType::isDateBased, operationName, paramOrd, "date", "datetime");
176-
}
177-
178-
public static TypeResolution typeMustBeNumericOrDate(Expression e, String operationName, ParamOrdinal paramOrd) {
179-
return typeMustBe(e, dt -> dt.isNumeric() || dt.isDateBased(), operationName, paramOrd, "date", "datetime", "numeric");
180-
}
181-
182-
public static TypeResolution typeMustBe(Expression e,
183-
Predicate<DataType> predicate,
184-
String operationName,
185-
ParamOrdinal paramOrd,
186-
String... acceptedTypes) {
187-
return predicate.test(e.dataType()) || DataTypes.isNull(e.dataType())?
188-
TypeResolution.TYPE_RESOLVED :
189-
new TypeResolution(format(null, "[{}]{} argument must be [{}], found value [{}] type [{}]",
190-
operationName,
191-
paramOrd == null || paramOrd == ParamOrdinal.DEFAULT ? "" : " " + paramOrd.name().toLowerCase(Locale.ROOT),
192-
acceptedTypesForErrorMsg(acceptedTypes),
193-
Expressions.name(e),
194-
e.dataType().typeName));
195-
}
196-
197-
private static String acceptedTypesForErrorMsg(String... acceptedTypes) {
198-
StringJoiner sj = new StringJoiner(", ");
199-
for (int i = 0; i < acceptedTypes.length - 1; i++) {
200-
sj.add(acceptedTypes[i]);
201-
}
202-
if (acceptedTypes.length > 1) {
203-
return sj.toString() + " or " + acceptedTypes[acceptedTypes.length - 1];
204-
} else {
205-
return acceptedTypes[0];
206-
}
207-
}
208151
}

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/FieldAttribute.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,12 +81,13 @@ public FieldAttribute nestedParent() {
8181
return nestedParent;
8282
}
8383

84-
public boolean isInexact() {
85-
return field.isExact() == false;
84+
public EsField.Exact getExactInfo() {
85+
return field.getExactInfo();
8686
}
8787

8888
public FieldAttribute exactAttribute() {
89-
if (field.isExact() == false) {
89+
EsField exactField = field.getExactField();
90+
if (exactField.equals(field) == false) {
9091
return innerField(field.getExactField());
9192
}
9293
return this;

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/Order.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,15 @@
55
*/
66
package org.elasticsearch.xpack.sql.expression;
77

8-
import org.elasticsearch.xpack.sql.tree.Source;
98
import org.elasticsearch.xpack.sql.tree.NodeInfo;
9+
import org.elasticsearch.xpack.sql.tree.Source;
1010
import org.elasticsearch.xpack.sql.type.DataType;
1111

1212
import java.util.List;
1313
import java.util.Objects;
1414

1515
import static java.util.Collections.singletonList;
16+
import static org.elasticsearch.xpack.sql.expression.TypeResolutions.isExact;
1617

1718
public class Order extends Expression {
1819

@@ -45,6 +46,11 @@ public Nullability nullable() {
4546
return Nullability.FALSE;
4647
}
4748

49+
@Override
50+
protected TypeResolution resolveType() {
51+
return isExact(child, "ORDER BY cannot be applied to field of data type [{}]: {}");
52+
}
53+
4854
@Override
4955
public DataType dataType() {
5056
return child.dataType();
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
package org.elasticsearch.xpack.sql.expression;
7+
8+
import org.elasticsearch.xpack.sql.type.DataType;
9+
import org.elasticsearch.xpack.sql.type.DataTypes;
10+
import org.elasticsearch.xpack.sql.type.EsField;
11+
12+
import java.util.Locale;
13+
import java.util.StringJoiner;
14+
import java.util.function.Predicate;
15+
16+
import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
17+
import static org.elasticsearch.xpack.sql.expression.Expression.TypeResolution;
18+
import static org.elasticsearch.xpack.sql.expression.Expressions.ParamOrdinal;
19+
import static org.elasticsearch.xpack.sql.expression.Expressions.name;
20+
import static org.elasticsearch.xpack.sql.type.DataType.BOOLEAN;
21+
22+
public final class TypeResolutions {
23+
24+
private TypeResolutions() {}
25+
26+
public static TypeResolution isBoolean(Expression e, String operationName, ParamOrdinal paramOrd) {
27+
return isType(e, dt -> dt == BOOLEAN, operationName, paramOrd, "boolean");
28+
}
29+
30+
public static TypeResolution isInteger(Expression e, String operationName, ParamOrdinal paramOrd) {
31+
return isType(e, DataType::isInteger, operationName, paramOrd, "integer");
32+
}
33+
34+
public static TypeResolution isNumeric(Expression e, String operationName, ParamOrdinal paramOrd) {
35+
return isType(e, DataType::isNumeric, operationName, paramOrd, "numeric");
36+
}
37+
38+
public static TypeResolution isString(Expression e, String operationName, ParamOrdinal paramOrd) {
39+
return isType(e, DataType::isString, operationName, paramOrd, "string");
40+
}
41+
42+
public static TypeResolution isDate(Expression e, String operationName, ParamOrdinal paramOrd) {
43+
return isType(e, DataType::isDateBased, operationName, paramOrd, "date", "datetime");
44+
}
45+
46+
public static TypeResolution isNumericOrDate(Expression e, String operationName, ParamOrdinal paramOrd) {
47+
return isType(e, dt -> dt.isNumeric() || dt.isDateBased(), operationName, paramOrd, "date", "datetime", "numeric");
48+
}
49+
50+
public static TypeResolution isExact(Expression e, String message) {
51+
if (e instanceof FieldAttribute) {
52+
EsField.Exact exact = ((FieldAttribute) e).getExactInfo();
53+
if (exact.hasExact() == false) {
54+
return new TypeResolution(format(null, message, e.dataType().typeName, exact.errorMsg()));
55+
}
56+
}
57+
return TypeResolution.TYPE_RESOLVED;
58+
}
59+
60+
public static TypeResolution isExact(Expression e, String operationName, ParamOrdinal paramOrd) {
61+
if (e instanceof FieldAttribute) {
62+
EsField.Exact exact = ((FieldAttribute) e).getExactInfo();
63+
if (exact.hasExact() == false) {
64+
return new TypeResolution(format(null, "[{}] cannot operate on {}field of data type [{}]: {}",
65+
operationName,
66+
paramOrd == null || paramOrd == ParamOrdinal.DEFAULT ?
67+
"" : paramOrd.name().toLowerCase(Locale.ROOT) + " argument ",
68+
e.dataType().typeName, exact.errorMsg()));
69+
}
70+
}
71+
return TypeResolution.TYPE_RESOLVED;
72+
}
73+
74+
public static TypeResolution isStringAndExact(Expression e, String operationName, ParamOrdinal paramOrd) {
75+
TypeResolution resolution = isString(e, operationName, paramOrd);
76+
if (resolution.unresolved()) {
77+
return resolution;
78+
}
79+
80+
return isExact(e, operationName, paramOrd);
81+
}
82+
83+
public static TypeResolution isFoldable(Expression e, String operationName, ParamOrdinal paramOrd) {
84+
if (!e.foldable()) {
85+
return new TypeResolution(format(null, "{}argument of [{}] must be a constant, received [{}]",
86+
paramOrd == null || paramOrd == ParamOrdinal.DEFAULT ? "" : paramOrd.name().toLowerCase(Locale.ROOT) + " ",
87+
operationName,
88+
Expressions.name(e)));
89+
}
90+
return TypeResolution.TYPE_RESOLVED;
91+
}
92+
93+
public static TypeResolution isNotFoldable(Expression e, String operationName, ParamOrdinal paramOrd) {
94+
if (e.foldable()) {
95+
return new TypeResolution(format(null, "{}argument of [{}] must be a table column, found constant [{}]",
96+
paramOrd == null || paramOrd == ParamOrdinal.DEFAULT ? "" : paramOrd.name().toLowerCase(Locale.ROOT) + " ",
97+
operationName,
98+
Expressions.name(e)));
99+
}
100+
return TypeResolution.TYPE_RESOLVED;
101+
}
102+
103+
public static TypeResolution isType(Expression e,
104+
Predicate<DataType> predicate,
105+
String operationName,
106+
ParamOrdinal paramOrd,
107+
String... acceptedTypes) {
108+
return predicate.test(e.dataType()) || DataTypes.isNull(e.dataType())?
109+
TypeResolution.TYPE_RESOLVED :
110+
new TypeResolution(format(null, "{}argument of [{}] must be [{}], found value [{}] type [{}]",
111+
paramOrd == null || paramOrd == ParamOrdinal.DEFAULT ? "" : paramOrd.name().toLowerCase(Locale.ROOT) + " ",
112+
operationName,
113+
acceptedTypesForErrorMsg(acceptedTypes),
114+
name(e),
115+
e.dataType().typeName));
116+
}
117+
118+
private static String acceptedTypesForErrorMsg(String... acceptedTypes) {
119+
StringJoiner sj = new StringJoiner(", ");
120+
for (int i = 0; i < acceptedTypes.length - 1; i++) {
121+
sj.add(acceptedTypes[i]);
122+
}
123+
if (acceptedTypes.length > 1) {
124+
return sj.toString() + " or " + acceptedTypes[acceptedTypes.length - 1];
125+
} else {
126+
return acceptedTypes[0];
127+
}
128+
}
129+
}

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/AggregateFunction.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
99
import org.elasticsearch.xpack.sql.expression.Expression;
10+
import org.elasticsearch.xpack.sql.expression.Expressions;
11+
import org.elasticsearch.xpack.sql.expression.TypeResolutions;
1012
import org.elasticsearch.xpack.sql.expression.function.Function;
1113
import org.elasticsearch.xpack.sql.expression.gen.pipeline.AggNameInput;
1214
import org.elasticsearch.xpack.sql.expression.gen.pipeline.Pipe;
@@ -78,8 +80,13 @@ public boolean equals(Object obj) {
7880
&& Objects.equals(other.parameters(), parameters());
7981
}
8082

83+
@Override
84+
protected TypeResolution resolveType() {
85+
return TypeResolutions.isExact(field, sourceText(), Expressions.ParamOrdinal.DEFAULT);
86+
}
87+
8188
@Override
8289
public int hashCode() {
8390
return Objects.hash(field(), parameters());
8491
}
85-
}
92+
}

0 commit comments

Comments
 (0)