From 285e1a72d09b8ac8f20aeed2d138a2e6473d520f Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Thu, 27 Mar 2025 12:15:00 +0100 Subject: [PATCH 01/10] Revert changes to Layout.java The change in 80125a4bac31f54cc86fdee26ef0ac5dfa61eee3 is a quick fix and allows breaking an invariant of Layout. Revert that. --- .../xpack/esql/planner/Layout.java | 23 ++++--------------- 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/Layout.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/Layout.java index a707bb1c6c000..b0e10ca7975ca 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/Layout.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/Layout.java @@ -107,26 +107,13 @@ public Layout build() { Map layout = new HashMap<>(); int numberOfChannels = 0; for (ChannelSet set : channels) { - boolean createNewChannel = true; - int channel = 0; + int channel = numberOfChannels++; for (NameId id : set.nameIds) { - if (layout.containsKey(id)) { - // If a NameId already exists in the map, do not increase the numberOfChannels, it can cause inverse() to create - // a null in the list of channels, and NullPointerException when build() is called. - // TODO avoid adding duplicated attributes with the same id in the plan, ReplaceMissingFieldWithNull may add nulls - // with the same ids as the missing field ids. - continue; - } - if (createNewChannel) { - channel = numberOfChannels++; - createNewChannel = false; - } + // Duplicate name ids would mean that have 2 channels that are declared under the same id. That makes no sense - which + // channel should subsequent operators use, then, when they want to refer to this id? + assert (layout.containsKey(id) == false) : "Duplicate name ids are not allowed in layouts"; ChannelAndType next = new ChannelAndType(channel, set.type); - ChannelAndType prev = layout.put(id, next); - // Do allow multiple name to point to the same channel - see https://github.com/elastic/elasticsearch/pull/100238 - // if (prev != null) { - // throw new IllegalArgumentException("Name [" + id + "] is on two channels [" + prev + "] and [" + next + "]"); - // } + layout.put(id, next); } } return new DefaultLayout(Collections.unmodifiableMap(layout), numberOfChannels); From 809c66ad44605fe1b6a6cf6b688209751571ef06 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Thu, 27 Mar 2025 12:15:34 +0100 Subject: [PATCH 02/10] Simplify ReplaceMissingFieldWithNull When encountering projections, it tries to do the job of field extraction for missing fields by injecting an Eval that creates a literal null with the same name id as the field attribute for the missing field. This is wrong: 1. We only insert an Eval in case that a Project relies on the missing attribute. There could be other plan nodes that rely on the missing attribute. 2. Even for Projects, we only insert an Eval in case we squarely project for the field - in case of aliases (e.g. from RENAME), we do nothing. 3. In case of multiple Projects that use this attribute, we create multiple attributes with the original field attribute's id, causing a wrong Layout. This triggered https://github.com/elastic/elasticsearch/issues/121754. --- .../local/ReplaceMissingFieldWithNull.java | 72 +++---------------- 1 file changed, 11 insertions(+), 61 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java index e41e500aad110..1281beebff7d2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java @@ -7,16 +7,11 @@ package org.elasticsearch.xpack.esql.optimizer.rules.logical.local; -import org.elasticsearch.common.util.Maps; import org.elasticsearch.index.IndexMode; -import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.AttributeSet; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.Literal; -import org.elasticsearch.xpack.esql.core.expression.NamedExpression; -import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.optimizer.LocalLogicalOptimizerContext; -import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.EsRelation; import org.elasticsearch.xpack.esql.plan.logical.Eval; import org.elasticsearch.xpack.esql.plan.logical.Filter; @@ -26,14 +21,9 @@ import org.elasticsearch.xpack.esql.plan.logical.RegexExtract; import org.elasticsearch.xpack.esql.plan.logical.TopN; import org.elasticsearch.xpack.esql.plan.logical.join.Join; -import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation; import org.elasticsearch.xpack.esql.rule.ParameterizedRule; import org.elasticsearch.xpack.esql.stats.SearchStats; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - /** * Look for any fields used in the plan that are missing locally and replace them with null. * This should minimize the plan execution, in the best scenario skipping its execution all together. @@ -53,61 +43,21 @@ public LogicalPlan apply(LogicalPlan plan, LocalLogicalOptimizerContext localLog } private LogicalPlan missingToNull(LogicalPlan plan, SearchStats stats, AttributeSet lookupFields) { - if (plan instanceof EsRelation || plan instanceof LocalRelation) { - return plan; - } - - if (plan instanceof Aggregate a) { - // don't do anything (for now) - return a; - } - // keep the aliased name - else if (plan instanceof Project project) { - var projections = project.projections(); - List newProjections = new ArrayList<>(projections.size()); - Map nullLiteral = Maps.newLinkedHashMapWithExpectedSize(DataType.types().size()); - AttributeSet joinAttributes = joinAttributes(project); - - for (NamedExpression projection : projections) { - // Do not use the attribute name, this can deviate from the field name for union types. - if (projection instanceof FieldAttribute f && stats.exists(f.fieldName()) == false && joinAttributes.contains(f) == false) { - // TODO: Should do a searchStats lookup for join attributes instead of just ignoring them here - // See TransportSearchShardsAction - DataType dt = f.dataType(); - Alias nullAlias = nullLiteral.get(f.dataType()); - // save the first field as null (per datatype) - if (nullAlias == null) { - Alias alias = new Alias(f.source(), f.name(), Literal.of(f, null), f.id()); - nullLiteral.put(dt, alias); - projection = alias.toAttribute(); - } - // otherwise point to it - else { - // since avoids creating field copies - projection = new Alias(f.source(), f.name(), nullAlias.toAttribute(), f.id()); - } - } - - newProjections.add(projection); - } - // add the first found field as null - if (nullLiteral.size() > 0) { - plan = new Eval(project.source(), project.child(), new ArrayList<>(nullLiteral.values())); - plan = new Project(project.source(), plan, newProjections); - } - } else if (plan instanceof Eval + if (plan instanceof Eval || plan instanceof Filter || plan instanceof OrderBy || plan instanceof RegexExtract || plan instanceof TopN) { - plan = plan.transformExpressionsOnlyUp( - FieldAttribute.class, - // Do not use the attribute name, this can deviate from the field name for union types. - // Also skip fields from lookup indices because we do not have stats for these. - // TODO: We do have stats for lookup indices in case they are being used in the FROM clause; this can be refined. - f -> stats.exists(f.fieldName()) || lookupFields.contains(f) ? f : Literal.of(f, null) - ); - } + plan = plan.transformExpressionsOnlyUp( + FieldAttribute.class, + // Do not use the attribute name, this can deviate from the field name for union types. + // Also skip fields from lookup indices because we do not have stats for these. + // TODO: We do have stats for lookup indices in case they are being used in the FROM clause; this can be refined. + f -> (stats.exists(f.fieldName()) || lookupFields.contains(f)) + ? f + : Literal.of(f, null) + ); + } return plan; } From 4c29bfc7fcdc82dd70fae7005740ac52cfe6c3b0 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Thu, 27 Mar 2025 16:36:21 +0100 Subject: [PATCH 03/10] Revive logic for EsRelation instead of Project --- .../local/ReplaceMissingFieldWithNull.java | 85 ++++++++++++++----- .../xpack/esql/plan/logical/EsRelation.java | 4 + 2 files changed, 70 insertions(+), 19 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java index 1281beebff7d2..c8b2ff5989172 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java @@ -7,10 +7,15 @@ package org.elasticsearch.xpack.esql.optimizer.rules.logical.local; +import org.elasticsearch.common.util.Maps; import org.elasticsearch.index.IndexMode; +import org.elasticsearch.xpack.esql.core.expression.Alias; +import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.AttributeSet; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.expression.NamedExpression; +import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.optimizer.LocalLogicalOptimizerContext; import org.elasticsearch.xpack.esql.plan.logical.EsRelation; import org.elasticsearch.xpack.esql.plan.logical.Eval; @@ -20,9 +25,12 @@ import org.elasticsearch.xpack.esql.plan.logical.Project; import org.elasticsearch.xpack.esql.plan.logical.RegexExtract; import org.elasticsearch.xpack.esql.plan.logical.TopN; -import org.elasticsearch.xpack.esql.plan.logical.join.Join; import org.elasticsearch.xpack.esql.rule.ParameterizedRule; -import org.elasticsearch.xpack.esql.stats.SearchStats; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.function.Predicate; /** * Look for any fields used in the plan that are missing locally and replace them with null. @@ -32,6 +40,9 @@ public class ReplaceMissingFieldWithNull extends ParameterizedRule { if (esRelation.indexMode() == IndexMode.LOOKUP) { @@ -39,32 +50,68 @@ public LogicalPlan apply(LogicalPlan plan, LocalLogicalOptimizerContext localLog } }); - return plan.transformUp(p -> missingToNull(p, localLogicalOptimizerContext.searchStats(), lookupFields)); + // Do not use the attribute name, this can deviate from the field name for union types; use fieldName() instead. + // Also retain fields from lookup indices because we do not have stats for these. + Predicate shouldBeRetained = f -> (localLogicalOptimizerContext.searchStats().exists(f.fieldName()) || lookupFields.contains(f)); + + return plan.transformUp(p -> missingToNull(p, shouldBeRetained)); } - private LogicalPlan missingToNull(LogicalPlan plan, SearchStats stats, AttributeSet lookupFields) { + private LogicalPlan missingToNull(LogicalPlan plan, Predicate shouldBeRetained) { + if (plan instanceof EsRelation relation) { + // Remove missing fields from the EsRelation because this is not where we will obtain them from; replace them by an Eval right + // after, instead. This allows us to safely re-use the attribute ids of the corresponding FieldAttributes. + // This means that an EsRelation[field1, field2, field3] where field1 and field 3 are missing will be replaced by + // Project[field1, field2, field3] <- keeps the ordering intact + // \_Eval[field1 = null, field3 = null] + // \_EsRelation[field2] + // TODO: Test when there are 0 fields remaining + List initialOutput = relation.output(); + List remainingFields = new ArrayList<>(initialOutput.size()); + Map nullLiterals = Maps.newLinkedHashMapWithExpectedSize(DataType.types().size()); + List newProjections = new ArrayList<>(initialOutput.size()); + for (int i = 0, size = initialOutput.size(); i < size; i++) { + Attribute attr = initialOutput.get(i); + NamedExpression projection; + if (attr instanceof FieldAttribute f && (shouldBeRetained.test(f) == false)) { + DataType dt = f.dataType(); + Alias nullAlias = nullLiterals.get(dt); + // save the first field as null (per datatype) + if (nullAlias == null) { + // Keep the same id so downstream query plans don't need updating + Alias alias = new Alias(f.source(), f.name(), Literal.of(f, null), f.id()); + nullLiterals.put(dt, alias); + projection = alias.toAttribute(); + } + // otherwise point to it since this avoids creating field copies + else { + projection = new Alias(f.source(), f.name(), nullAlias.toAttribute(), f.id()); + } + } else { + remainingFields.add(attr); + projection = attr; + } + newProjections.add(projection); + } + + if (remainingFields.size() == initialOutput.size()) { + return plan; + } + + EsRelation newRelation = relation.withAttributes(remainingFields); + Eval eval = new Eval(plan.source(), newRelation, new ArrayList<>(nullLiterals.values())); + // This projection is redundant if there's another projection downstream (and no commands depend on the order until we hit it). + return new Project(plan.source(), eval, newProjections); + } + if (plan instanceof Eval || plan instanceof Filter || plan instanceof OrderBy || plan instanceof RegexExtract || plan instanceof TopN) { - plan = plan.transformExpressionsOnlyUp( - FieldAttribute.class, - // Do not use the attribute name, this can deviate from the field name for union types. - // Also skip fields from lookup indices because we do not have stats for these. - // TODO: We do have stats for lookup indices in case they are being used in the FROM clause; this can be refined. - f -> (stats.exists(f.fieldName()) || lookupFields.contains(f)) - ? f - : Literal.of(f, null) - ); + return plan.transformExpressionsOnlyUp(FieldAttribute.class, f -> shouldBeRetained.test(f) ? f : Literal.of(f, null)); } return plan; } - - private AttributeSet joinAttributes(Project project) { - var attributes = new AttributeSet(); - project.forEachDown(Join.class, j -> j.right().forEachDown(EsRelation.class, p -> attributes.addAll(p.output()))); - return attributes; - } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java index 448085df1e831..e3c562d3d630e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java @@ -226,4 +226,8 @@ public static void writeIndexMode(StreamOutput out, IndexMode indexMode) throws throw new IllegalStateException("not ready to support index mode [" + indexMode + "]"); } } + + public EsRelation withAttributes(List newAttributes) { + return new EsRelation(source(), indexPattern, indexMode, indexNameWithModes, newAttributes); + } } From a2df3fad9aaf1eb4ec398c18c6a7010265e47504 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Thu, 27 Mar 2025 17:16:35 +0100 Subject: [PATCH 04/10] Update LocalLogicalPlanOptimizerTests --- .../LocalLogicalPlanOptimizerTests.java | 61 ++++++++++++++++--- 1 file changed, 54 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java index 98f3d1d2d8d8e..6903e5dfce35d 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizerTests.java @@ -22,7 +22,6 @@ import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.FoldContext; import org.elasticsearch.xpack.esql.core.expression.Literal; -import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; @@ -78,6 +77,7 @@ import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.nullValue; //@TestLogging(value = "org.elasticsearch.xpack.esql:TRACE", reason = "debug") @@ -141,7 +141,7 @@ public void testMissingFieldInFilterString() { /** * Expects - * Project[[last_name{r}#6]] + * Project[[last_name{f}#6]] * \_Eval[[null[KEYWORD] AS last_name]] * \_Limit[10000[INTEGER]] * \_EsRelation[test][_meta_field{f}#8, emp_no{f}#2, first_name{f}#3, gen..] @@ -158,7 +158,7 @@ public void testMissingFieldInProject() { var project = as(localPlan, Project.class); var projections = project.projections(); assertThat(Expressions.names(projections), contains("last_name")); - as(projections.get(0), ReferenceAttribute.class); + as(projections.get(0), FieldAttribute.class); var eval = as(project.child(), Eval.class); assertThat(Expressions.names(eval.fields()), contains("last_name")); var alias = as(eval.fields().get(0), Alias.class); @@ -168,6 +168,7 @@ public void testMissingFieldInProject() { var limit = as(eval.child(), Limit.class); var source = as(limit.child(), EsRelation.class); + assertThat(Expressions.names(source.output()), not(contains("last_name"))); } /** @@ -192,6 +193,7 @@ public void testMissingFieldInSort() { var limit = as(project.child(), Limit.class); var source = as(limit.child(), EsRelation.class); + assertThat(Expressions.names(source.output()), not(contains("last_name"))); } /** @@ -199,8 +201,11 @@ public void testMissingFieldInSort() { * EsqlProject[[first_name{f}#7, last_name{r}#17]] * \_Limit[1000[INTEGER],true] * \_MvExpand[last_name{f}#10,last_name{r}#17] - * \_Limit[1000[INTEGER],false] - * \_EsRelation[test][_meta_field{f}#12, emp_no{f}#6, first_name{f}#7, ge..] + * \_Project[[_meta_field{f}#12, emp_no{f}#6, first_name{f}#7, gender{f}#8, hire_date{f}#13, job{f}#14, job.raw{f}#15, lang + * uages{f}#9, last_name{r}#10, long_noidx{f}#16, salary{f}#11]] + * \_Eval[[null[KEYWORD] AS last_name]] + * \_Limit[1000[INTEGER],false] + * \_EsRelation[test][_meta_field{f}#12, emp_no{f}#6, first_name{f}#7, ge..] */ public void testMissingFieldInMvExpand() { var plan = plan(""" @@ -212,14 +217,23 @@ public void testMissingFieldInMvExpand() { var testStats = statsForMissingField("last_name"); var localPlan = localPlan(plan, testStats); + // It'd be much better if this project was pushed down past the MvExpand, because MvExpand's cost scales with the number of + // involved attributes/columns. var project = as(localPlan, EsqlProject.class); var projections = project.projections(); assertThat(Expressions.names(projections), contains("first_name", "last_name")); var limit1 = asLimit(project.child(), 1000, true); var mvExpand = as(limit1.child(), MvExpand.class); - var limit2 = asLimit(mvExpand.child(), 1000, false); - as(limit2.child(), EsRelation.class); + var project2 = as(mvExpand.child(), Project.class); + var eval = as(project2.child(), Eval.class); + assertEquals(eval.fields().size(), 1); + var lastName = eval.fields().get(0); + assertEquals(lastName.name(), "last_name"); + assertEquals(lastName.child(), new Literal(EMPTY, null, DataType.KEYWORD)); + var limit2 = asLimit(eval.child(), 1000, false); + var relation = as(limit2.child(), EsRelation.class); + assertThat(Expressions.names(relation.output()), not(contains("last_name"))); } public static class MockFieldAttributeCommand extends UnaryPlan { @@ -275,6 +289,39 @@ public void testMissingFieldInNewCommand() { ), testStats ); + + var plan = plan(""" + from test + """); + var initialRelation = plan.collectLeaves().get(0); + FieldAttribute lastName = null; + for (Attribute attr : initialRelation.output()) { + if (attr.name().equals("last_name")) { + lastName = (FieldAttribute) attr; + } + } + + // Expects + // MockFieldAttributeCommand[last_name{f}#7] + // \_Project[[_meta_field{f}#9, emp_no{f}#3, first_name{f}#4, gender{f}#5, hire_date{f}#10, job{f}#11, job.raw{f}#12, langu + // ages{f}#6, last_name{r}#7, long_noidx{f}#13, salary{f}#8]] + // \_Eval[[null[KEYWORD] AS last_name]] + // \_Limit[1000[INTEGER],false] + // \_EsRelation[test][_meta_field{f}#9, emp_no{f}#3, first_name{f}#4, gen..] + LogicalPlan localPlan = localPlan(new MockFieldAttributeCommand(EMPTY, plan, lastName), testStats); + + var mockCommand = as(localPlan, MockFieldAttributeCommand.class); + var project = as(mockCommand.child(), Project.class); + var eval = as(project.child(), Eval.class); + var limit = asLimit(eval.child(), 1000); + var relation = as(limit.child(), EsRelation.class); + + assertThat(Expressions.names(eval.fields()), contains("last_name")); + var literal = as(eval.fields().get(0), Alias.class); + assertEquals(literal.child(), new Literal(EMPTY, null, DataType.KEYWORD)); + assertThat(Expressions.names(relation.output()), not(contains("last_name"))); + + assertEquals(Expressions.names(initialRelation.output()), Expressions.names(project.output())); } /** From 1f89c134707bf36b830f15e1237665cf5e7aa478 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Thu, 27 Mar 2025 17:51:29 +0100 Subject: [PATCH 05/10] Update test expectations --- .../rules/logical/local/ReplaceMissingFieldWithNull.java | 2 +- .../esql/optimizer/LocalPhysicalPlanOptimizerTests.java | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java index c8b2ff5989172..f49bbaae0148a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java @@ -65,7 +65,7 @@ private LogicalPlan missingToNull(LogicalPlan plan, Predicate sh // Project[field1, field2, field3] <- keeps the ordering intact // \_Eval[field1 = null, field3 = null] // \_EsRelation[field2] - // TODO: Test when there are 0 fields remaining + // TODO: Double check that we test when there are 0 fields remaining List initialOutput = relation.output(); List remainingFields = new ArrayList<>(initialOutput.size()); Map nullLiterals = Maps.newLinkedHashMapWithExpectedSize(DataType.types().size()); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index 640f352bd110c..74ddcceb0505f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -37,8 +37,8 @@ import org.elasticsearch.xpack.esql.analysis.Verifier; import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.Expressions; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.Literal; -import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.EsField; @@ -1282,9 +1282,9 @@ public void testMissingFieldsDoNotGetExtracted() { ) ); // emp_no - assertThat(projections.get(1), instanceOf(ReferenceAttribute.class)); + assertThat(projections.get(1), instanceOf(FieldAttribute.class)); // first_name - assertThat(projections.get(2), instanceOf(ReferenceAttribute.class)); + assertThat(projections.get(2), instanceOf(FieldAttribute.class)); // last_name --> first_name var nullAlias = Alias.unwrap(projections.get(8)); From 869415be32ce784fc621c6838a5d96e972779aa3 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Fri, 28 Mar 2025 11:16:14 +0100 Subject: [PATCH 06/10] Do not prune attributes from EsRelation This can lead to empty output, which leads to the EsRelation being replaced by a LocalRelation with 0 rows. --- .../local/ReplaceMissingFieldWithNull.java | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java index f49bbaae0148a..ac3931b7572b0 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java @@ -65,13 +65,11 @@ private LogicalPlan missingToNull(LogicalPlan plan, Predicate sh // Project[field1, field2, field3] <- keeps the ordering intact // \_Eval[field1 = null, field3 = null] // \_EsRelation[field2] - // TODO: Double check that we test when there are 0 fields remaining - List initialOutput = relation.output(); - List remainingFields = new ArrayList<>(initialOutput.size()); + List relationOutput = relation.output(); Map nullLiterals = Maps.newLinkedHashMapWithExpectedSize(DataType.types().size()); - List newProjections = new ArrayList<>(initialOutput.size()); - for (int i = 0, size = initialOutput.size(); i < size; i++) { - Attribute attr = initialOutput.get(i); + List newProjections = new ArrayList<>(relationOutput.size()); + for (int i = 0, size = relationOutput.size(); i < size; i++) { + Attribute attr = relationOutput.get(i); NamedExpression projection; if (attr instanceof FieldAttribute f && (shouldBeRetained.test(f) == false)) { DataType dt = f.dataType(); @@ -88,18 +86,16 @@ private LogicalPlan missingToNull(LogicalPlan plan, Predicate sh projection = new Alias(f.source(), f.name(), nullAlias.toAttribute(), f.id()); } } else { - remainingFields.add(attr); projection = attr; } newProjections.add(projection); } - if (remainingFields.size() == initialOutput.size()) { + if (nullLiterals.size() == 0) { return plan; } - EsRelation newRelation = relation.withAttributes(remainingFields); - Eval eval = new Eval(plan.source(), newRelation, new ArrayList<>(nullLiterals.values())); + Eval eval = new Eval(plan.source(), relation, new ArrayList<>(nullLiterals.values())); // This projection is redundant if there's another projection downstream (and no commands depend on the order until we hit it). return new Project(plan.source(), eval, newProjections); } From 23957bd37e9f4edccbdad1cd1ffa4927bb80ef70 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Wed, 2 Apr 2025 18:59:25 +0200 Subject: [PATCH 07/10] Add tests + capability --- .../src/main/resources/enrich.csv-spec | 18 +++++++++++++++ .../src/main/resources/lookup-join.csv-spec | 22 +++++++++++++++++++ .../xpack/esql/action/EsqlCapabilities.java | 7 +++++- 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec index 3c38bd190b0b1..0a7cb5cbceaf8 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec @@ -644,3 +644,21 @@ FROM airports abbrev:k | city_name:k | city_location:geo_point | country:k | location:geo_point | name:text | region:text | boundary_wkt_length:i IDR | Indore | POINT(75.8472 22.7167) | India | POINT(75.8092915005895 22.727749187571) | Devi Ahilyabai Holkar Int'l | Indore City | 231 ; + +// Regression test for https://github.com/elastic/elasticsearch/issues/126030 +// We had wrong layouts from ReplaceMissingFieldsWithNull in case of indices that had relevant fields for the query, +// but were **missing the field we enrich on**. +fieldsInOtherIndicesBug +required_capability: enrich_load +required_capability: fix_replace_missing_field_with_null_duplicate_name_id_in_layout + +from * +| keep author.keyword, book_no, scalerank, street, bytes_in, @timestamp, abbrev, city_location, distance, description, birth_date, language_code, intersects, client_ip, event_duration, version +| enrich languages_policy on author.keyword +| sort book_no +| limit 1 +; + +author.keyword:keyword|book_no:keyword|scalerank:integer|street:keyword|bytes_in:ul|@timestamp:unsupported|abbrev:keyword|city_location:geo_point|distance:double|description:unsupported|birth_date:date|language_code:integer|intersects:boolean|client_ip:unsupported|event_duration:long|version:version|language_name:keyword +Fyodor Dostoevsky |1211 |null |null |null |null |null |null |null |null |null |null |null |null |null |null |null +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index fc43c5fb5b278..ee9d25c7d4474 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -1394,6 +1394,10 @@ emp_no:integer | language_code:integer | language_name:keyword 10093 | 3 | Spanish ; +############################################### +# Bugfixes +############################################### + multipleBatchesWithSort required_capability: join_lookup_v12 required_capability: remove_redundant_sort @@ -1480,3 +1484,21 @@ from * m:integer |birth_date:datetime null |1952-02-27T00:00:00.000Z ; + +// Regression test for https://github.com/elastic/elasticsearch/issues/126030 +// We had wrong layouts from ReplaceMissingFieldsWithNull + +enrichLookupStatsBug +required_capability: join_lookup_v12 +required_capability: fix_replace_missing_field_with_null_duplicate_name_id_in_layout + +from * +| enrich languages_policy on cluster +| rename languages.byte as language_code +| lookup join languages_lookup on language_code +| stats salary_change.long = max(ratings), foo = max(num) +; + +salary_change.long:double|foo:long +5.0 |1698069301543123456 +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index e55f07b715010..18485133a4726 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -699,7 +699,12 @@ public enum Cap { /** * Make numberOfChannels consistent with layout in DefaultLayout by removing duplicated ChannelSet. */ - MAKE_NUMBER_OF_CHANNELS_CONSISTENT_WITH_LAYOUT; + MAKE_NUMBER_OF_CHANNELS_CONSISTENT_WITH_LAYOUT, + + /** + * Supercedes {@link Cap#MAKE_NUMBER_OF_CHANNELS_CONSISTENT_WITH_LAYOUT}. + */ + FIX_REPLACE_MISSING_FIELD_WITH_NULL_DUPLICATE_NAME_ID_IN_LAYOUT; private final boolean enabled; From c17e466ff7d89ebfd296b6ec3d5bb01f5cdb096d Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Wed, 2 Apr 2025 19:13:38 +0200 Subject: [PATCH 08/10] Add comments --- .../logical/local/ReplaceMissingFieldWithNull.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java index ac3931b7572b0..3e7d018839f57 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java @@ -41,10 +41,14 @@ public class ReplaceMissingFieldWithNull extends ParameterizedRule { + // Looking only for indices in LOOKUP mode is correct: during parsing, we assign the expected mode and even if a lookup index + // is used in the FROM command, it will not be marked with LOOKUP mode there - but STANDARD. + // It seems like we could instead just look for JOINs and walk down their right hand side to find lookup fields - but this does + // not work as this rule also gets called just on the right hand side of a JOIN, which means that we don't always know that + // we're inside the right (or left) branch of a JOIN node. (See PlannerUtils.localPlan - this looks for FragmentExecs and + // performs local logical optimization of the fragments; the right hand side of a LookupJoinExec can be a FragmentExec.) if (esRelation.indexMode() == IndexMode.LOOKUP) { lookupFields.addAll(esRelation.output()); } @@ -77,6 +81,12 @@ private LogicalPlan missingToNull(LogicalPlan plan, Predicate sh // save the first field as null (per datatype) if (nullAlias == null) { // Keep the same id so downstream query plans don't need updating + // NOTE: THIS IS BRITTLE AND CAN LEAD TO BUGS. + // In case some optimizer rule or so inserts a plan node that requires the field BEFORE the Eval that we're adding + // on top of the EsRelation, this can trigger a field extraction in the physical optimizer phase, causing wrong + // layouts due to a duplicate name id. + // If someone reaches here AGAIN when debugging e.g. ClassCastExceptions NPEs from wrong layouts, we should probably + // give up on this approach and instead insert EvalExecs in InsertFieldExtraction. Alias alias = new Alias(f.source(), f.name(), Literal.of(f, null), f.id()); nullLiterals.put(dt, alias); projection = alias.toAttribute(); From 3138eaacd0cacb10b62d4a4b44d29a98683f8b17 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 2 Apr 2025 18:37:42 +0000 Subject: [PATCH 09/10] [CI] Auto commit changes from spotless --- .../rules/logical/local/ReplaceMissingFieldWithNull.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java index 3e7d018839f57..5efef5e4b7c9a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java @@ -56,7 +56,8 @@ public LogicalPlan apply(LogicalPlan plan, LocalLogicalOptimizerContext localLog // Do not use the attribute name, this can deviate from the field name for union types; use fieldName() instead. // Also retain fields from lookup indices because we do not have stats for these. - Predicate shouldBeRetained = f -> (localLogicalOptimizerContext.searchStats().exists(f.fieldName()) || lookupFields.contains(f)); + Predicate shouldBeRetained = f -> (localLogicalOptimizerContext.searchStats().exists(f.fieldName()) + || lookupFields.contains(f)); return plan.transformUp(p -> missingToNull(p, shouldBeRetained)); } From 368f4d48c83b3c3f682689627795d3786dd64fbb Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Wed, 2 Apr 2025 19:07:20 +0200 Subject: [PATCH 10/10] Update docs/changelog/125764.yaml --- docs/changelog/125764.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 docs/changelog/125764.yaml diff --git a/docs/changelog/125764.yaml b/docs/changelog/125764.yaml new file mode 100644 index 0000000000000..8f85645c1425c --- /dev/null +++ b/docs/changelog/125764.yaml @@ -0,0 +1,8 @@ +pr: 125764 +summary: Fix `ReplaceMissingFieldsWithNull` +area: ES|QL +type: bug +issues: + - 126036 + - 121754 + - 126030