Skip to content

SQL: Enhance checks for inexact fields #39427

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 28, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -2348,23 +2348,25 @@ Arumugam
////////////
limitationSubSelect
// tag::limitationSubSelect
SELECT * FROM (SELECT first_name, last_name FROM emp WHERE last_name NOT LIKE '%a%') WHERE first_name LIKE 'A%';
SELECT * FROM (SELECT first_name, last_name FROM emp WHERE last_name NOT LIKE '%a%') WHERE first_name LIKE 'A%' ORDER BY 1;

first_name | last_name
---------------+---------------
Anneke |Preusig
Anoosh |Peyn
Arumugam |Ossenbruggen
Alejandro |McAlpine
Anneke |Preusig
Anoosh |Peyn
Arumugam |Ossenbruggen
// end::limitationSubSelect
;

limitationSubSelect
limitationSubSelectRewritten
// tag::limitationSubSelectRewritten
SELECT first_name, last_name FROM emp WHERE last_name NOT LIKE '%a%' AND first_name LIKE 'A%';
SELECT first_name, last_name FROM emp WHERE last_name NOT LIKE '%a%' AND first_name LIKE 'A%' ORDER BY 1;
// end::limitationSubSelectRewritten
first_name | last_name
---------------+---------------
Anneke |Preusig
Anoosh |Peyn
Arumugam |Ossenbruggen
Alejandro |McAlpine
Anneke |Preusig
Anoosh |Peyn
Arumugam |Ossenbruggen
;
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*/
package org.elasticsearch.xpack.sql.analysis.analyzer;

import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.xpack.sql.capabilities.Unresolvable;
import org.elasticsearch.xpack.sql.expression.Alias;
import org.elasticsearch.xpack.sql.expression.Attribute;
Expand Down Expand Up @@ -294,7 +295,8 @@ Collection<Failure> verify(LogicalPlan plan) {
*/
private static boolean checkGroupBy(LogicalPlan p, Set<Failure> localFailures,
Map<String, Function> resolvedFunctions, Set<LogicalPlan> groupingFailures) {
return checkGroupByAgg(p, localFailures, resolvedFunctions)
return checkGroupByInexactField(p, localFailures)
&& checkGroupByAgg(p, localFailures, resolvedFunctions)
&& checkGroupByOrder(p, localFailures, groupingFailures)
&& checkGroupByHaving(p, localFailures, groupingFailures, resolvedFunctions);
}
Expand Down Expand Up @@ -463,6 +465,21 @@ private static boolean checkGroupByHavingHasOnlyAggs(Expression e, Set<Expressio
return false;
}

private static boolean checkGroupByInexactField(LogicalPlan p, Set<Failure> localFailures) {
if (p instanceof Aggregate) {
Aggregate a = (Aggregate) p;

// The grouping can not be an aggregate function or an inexact field (e.g. text without a keyword)
a.groupings().forEach(e -> e.forEachUp(c -> {
Tuple<Boolean, String> hasExact = c.hasExact();
if (hasExact.v1() == Boolean.FALSE) {
localFailures.add(fail(c, "Grouping field of data type [" + c.dataType().typeName + "] for grouping; " +
hasExact.v2()));
}
}, FieldAttribute.class));
}
return true;
}

// check whether plain columns specified in an agg are mentioned in the group-by
private static boolean checkGroupByAgg(LogicalPlan p, Set<Failure> localFailures, Map<String, Function> functions) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@

public abstract class SourceGenerator {

private SourceGenerator() {}

private static final List<String> NO_STORED_FIELD = singletonList(StoredFieldsContext._NONE_);

public static SearchSourceBuilder sourceBuilder(QueryContainer container, QueryBuilder filter, Integer size) {
Expand Down Expand Up @@ -107,8 +109,7 @@ private static void sorting(QueryContainer container, SearchSourceBuilder source

// sorting only works on not-analyzed fields - look for a multi-field replacement
if (attr instanceof FieldAttribute) {
FieldAttribute fa = (FieldAttribute) attr;
fa = fa.isInexact() ? fa.exactAttribute() : fa;
FieldAttribute fa = ((FieldAttribute) attr).exactAttribute();

sortBuilder = fieldSort(fa.name())
.missing(as.missing().position())
Expand All @@ -125,7 +126,8 @@ private static void sorting(QueryContainer container, SearchSourceBuilder source
if (nestedSort == null) {
fieldSort.setNestedSort(newSort);
} else {
for (; nestedSort.getNestedSort() != null; nestedSort = nestedSort.getNestedSort()) {
while (nestedSort.getNestedSort() != null) {
nestedSort = nestedSort.getNestedSort();
}
nestedSort.setNestedSort(newSort);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,16 @@
package org.elasticsearch.xpack.sql.expression;

import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
import org.elasticsearch.xpack.sql.expression.Expression.TypeResolution;
import org.elasticsearch.xpack.sql.expression.gen.pipeline.Pipe;
import org.elasticsearch.xpack.sql.type.DataType;
import org.elasticsearch.xpack.sql.type.DataTypes;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.StringJoiner;
import java.util.function.Predicate;

import static java.util.Collections.emptyList;
import static java.util.Collections.emptyMap;
import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
import static org.elasticsearch.xpack.sql.type.DataType.BOOLEAN;

public final class Expressions {

Expand Down Expand Up @@ -154,55 +148,4 @@ public static List<Pipe> pipe(List<Expression> expressions) {
}
return pipes;
}

public static TypeResolution typeMustBeBoolean(Expression e, String operationName, ParamOrdinal paramOrd) {
return typeMustBe(e, dt -> dt == BOOLEAN, operationName, paramOrd, "boolean");
}

public static TypeResolution typeMustBeInteger(Expression e, String operationName, ParamOrdinal paramOrd) {
return typeMustBe(e, DataType::isInteger, operationName, paramOrd, "integer");
}

public static TypeResolution typeMustBeNumeric(Expression e, String operationName, ParamOrdinal paramOrd) {
return typeMustBe(e, DataType::isNumeric, operationName, paramOrd, "numeric");
}

public static TypeResolution typeMustBeString(Expression e, String operationName, ParamOrdinal paramOrd) {
return typeMustBe(e, DataType::isString, operationName, paramOrd, "string");
}

public static TypeResolution typeMustBeDate(Expression e, String operationName, ParamOrdinal paramOrd) {
return typeMustBe(e, DataType::isDateBased, operationName, paramOrd, "date", "datetime");
}

public static TypeResolution typeMustBeNumericOrDate(Expression e, String operationName, ParamOrdinal paramOrd) {
return typeMustBe(e, dt -> dt.isNumeric() || dt.isDateBased(), operationName, paramOrd, "date", "datetime", "numeric");
}

public static TypeResolution typeMustBe(Expression e,
Predicate<DataType> predicate,
String operationName,
ParamOrdinal paramOrd,
String... acceptedTypes) {
return predicate.test(e.dataType()) || DataTypes.isNull(e.dataType())?
TypeResolution.TYPE_RESOLVED :
new TypeResolution(format(null, "[{}]{} argument must be [{}], found value [{}] type [{}]",
operationName,
paramOrd == null || paramOrd == ParamOrdinal.DEFAULT ? "" : " " + paramOrd.name().toLowerCase(Locale.ROOT),
acceptedTypesForErrorMsg(acceptedTypes),
Expressions.name(e),
e.dataType().typeName));
}

private static String acceptedTypesForErrorMsg(String... acceptedTypes) {
StringJoiner sj = new StringJoiner(", ");
for (int i = 0; i < acceptedTypes.length - 1; i++) {
sj.add(acceptedTypes[i]);
}
if (acceptedTypes.length > 1) {
return sj.toString() + " or " + acceptedTypes[acceptedTypes.length - 1];
} else {
return acceptedTypes[0];
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package org.elasticsearch.xpack.sql.expression;

import org.elasticsearch.common.Strings;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.tree.Source;
import org.elasticsearch.xpack.sql.type.DataType;
Expand Down Expand Up @@ -81,8 +82,8 @@ public FieldAttribute nestedParent() {
return nestedParent;
}

public boolean isInexact() {
return field.isExact() == false;
public Tuple<Boolean, String> hasExact() {
return field.hasExact();
}

public FieldAttribute exactAttribute() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@
*/
package org.elasticsearch.xpack.sql.expression;

import org.elasticsearch.xpack.sql.tree.Source;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.tree.Source;
import org.elasticsearch.xpack.sql.type.DataType;

import java.util.List;
import java.util.Objects;

import static java.util.Collections.singletonList;
import static org.elasticsearch.xpack.sql.expression.TypeResolutionUtils.typeMustBeExact;

public class Order extends Expression {

Expand Down Expand Up @@ -45,6 +46,11 @@ public Nullability nullable() {
return Nullability.FALSE;
}

@Override
protected TypeResolution resolveType() {
return typeMustBeExact(child, "ORDER BY cannot be applied to field of data type[{}]: {}");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

White space between type and [{}]: ...to field of data type [{}]: {}.

}

@Override
public DataType dataType() {
return child.dataType();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.expression;

import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.xpack.sql.type.DataType;
import org.elasticsearch.xpack.sql.type.DataTypes;

import java.util.Locale;
import java.util.StringJoiner;
import java.util.function.Predicate;

import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
import static org.elasticsearch.xpack.sql.expression.Expression.TypeResolution;
import static org.elasticsearch.xpack.sql.expression.Expressions.ParamOrdinal;
import static org.elasticsearch.xpack.sql.expression.Expressions.name;
import static org.elasticsearch.xpack.sql.type.DataType.BOOLEAN;

public final class TypeResolutionUtils {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer using the plural instead of Utils - TypeResolutions. Utils has been used where the plural already existed in the rest of the codebase to avoid name clashes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The plural just sounded to me a bit weird, but I'll change.


private TypeResolutionUtils() {}

public static TypeResolution typeMustBeBoolean(Expression e, String operationName, ParamOrdinal paramOrd) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Additionally, how about simplifying the method names from "typeMustBeNumeric" to "isNumeric" similar to the assertion in JUnit? It also add consistency ("expressionMustBe", "typeMustBe...").

return typeMustBe(e, dt -> dt == BOOLEAN, operationName, paramOrd, "boolean");
}

public static TypeResolution typeMustBeInteger(Expression e, String operationName, ParamOrdinal paramOrd) {
return typeMustBe(e, DataType::isInteger, operationName, paramOrd, "integer");
}

public static TypeResolution typeMustBeNumeric(Expression e, String operationName, ParamOrdinal paramOrd) {
return typeMustBe(e, DataType::isNumeric, operationName, paramOrd, "numeric");
}

public static TypeResolution typeMustBeString(Expression e, String operationName, ParamOrdinal paramOrd) {
return typeMustBe(e, DataType::isString, operationName, paramOrd, "string");
}

public static TypeResolution typeMustBeDate(Expression e, String operationName, ParamOrdinal paramOrd) {
return typeMustBe(e, DataType::isDateBased, operationName, paramOrd, "date", "datetime");
}

public static TypeResolution typeMustBeNumericOrDate(Expression e, String operationName, ParamOrdinal paramOrd) {
return typeMustBe(e, dt -> dt.isNumeric() || dt.isDateBased(), operationName, paramOrd, "date", "datetime", "numeric");
}

public static TypeResolution typeMustBeExact(Expression e, String message) {
if (e instanceof FieldAttribute) {
Tuple<Boolean, String> hasExact = ((FieldAttribute) e).hasExact();
if (hasExact.v1() == Boolean.FALSE) {
return new TypeResolution(format(null, message, e.dataType().typeName, hasExact.v2()));
}
}
return TypeResolution.TYPE_RESOLVED;
}

public static TypeResolution typeMustBeExact(Expression e, String operationName, ParamOrdinal paramOrd) {
if (e instanceof FieldAttribute) {
Tuple<Boolean, String> hasExact = ((FieldAttribute) e).hasExact();
if (hasExact.v1() == Boolean.FALSE) {
return new TypeResolution(format(null, "[{}] cannot operate on {}field of data type [{}]: {}",
operationName,
paramOrd == null || paramOrd == ParamOrdinal.DEFAULT ?
"" : paramOrd.name().toLowerCase(Locale.ROOT) + " argument ",
e.dataType().typeName, hasExact.v2()));
}
}
return TypeResolution.TYPE_RESOLVED;
}

public static TypeResolution typeMustBeStringAndExact(Expression e, String operationName, ParamOrdinal paramOrd) {
TypeResolution resolution = typeMustBe(e, DataType::isString, operationName, paramOrd, "string");
if (resolution.unresolved()) {
return resolution;
}

return typeMustBeExact(e, operationName, paramOrd);
}

public static TypeResolution expressionMustBeConstant(Expression e, String operationName, ParamOrdinal paramOrd) {
if (!e.foldable()) {
return new TypeResolution(format(null, "{}argument of [{}] must be a constant, received [{}]",
paramOrd == null || paramOrd == ParamOrdinal.DEFAULT ? "" : paramOrd.name().toLowerCase(Locale.ROOT) + " ",
operationName,
Expressions.name(e)));
}
return TypeResolution.TYPE_RESOLVED;
}

public static TypeResolution expressionMustBeTableColumn(Expression e, String operationName, ParamOrdinal paramOrd) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

constant/table column is a bit misleading - I would just use isFoldable/notFoldable since it's a terminology already used through-out the code.

if (e.foldable()) {
return new TypeResolution(format(null, "{}argument of [{}] must be a table column, found constant [{}]",
paramOrd == null || paramOrd == ParamOrdinal.DEFAULT ? "" : paramOrd.name().toLowerCase(Locale.ROOT) + " ",
operationName,
Expressions.name(e)));
}
return TypeResolution.TYPE_RESOLVED;
}

public static TypeResolution typeMustBe(Expression e,
Predicate<DataType> predicate,
String operationName,
ParamOrdinal paramOrd,
String... acceptedTypes) {
return predicate.test(e.dataType()) || DataTypes.isNull(e.dataType())?
TypeResolution.TYPE_RESOLVED :
new TypeResolution(format(null, "{}argument of [{}] must be [{}], found value [{}] type [{}]",
paramOrd == null || paramOrd == ParamOrdinal.DEFAULT ? "" : paramOrd.name().toLowerCase(Locale.ROOT) + " ",
operationName,
acceptedTypesForErrorMsg(acceptedTypes),
name(e),
e.dataType().typeName));
}

private static String acceptedTypesForErrorMsg(String... acceptedTypes) {
StringJoiner sj = new StringJoiner(", ");
for (int i = 0; i < acceptedTypes.length - 1; i++) {
sj.add(acceptedTypes[i]);
}
if (acceptedTypes.length > 1) {
return sj.toString() + " or " + acceptedTypes[acceptedTypes.length - 1];
} else {
return acceptedTypes[0];
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
import org.elasticsearch.xpack.sql.expression.Expression;
import org.elasticsearch.xpack.sql.expression.Expressions;
import org.elasticsearch.xpack.sql.expression.function.Function;
import org.elasticsearch.xpack.sql.expression.gen.pipeline.AggNameInput;
import org.elasticsearch.xpack.sql.expression.gen.pipeline.Pipe;
Expand All @@ -19,6 +20,7 @@

import static java.util.Collections.emptyList;
import static java.util.Collections.singletonList;
import static org.elasticsearch.xpack.sql.expression.TypeResolutionUtils.typeMustBeExact;

/**
* A type of {@code Function} that takes multiple values and extracts a single value out of them. For example, {@code AVG()}.
Expand Down Expand Up @@ -78,8 +80,13 @@ public boolean equals(Object obj) {
&& Objects.equals(other.parameters(), parameters());
}

@Override
protected TypeResolution resolveType() {
return typeMustBeExact(field, sourceText(), Expressions.ParamOrdinal.DEFAULT);
}

@Override
public int hashCode() {
return Objects.hash(field(), parameters());
}
}
}
Loading