Skip to content

Commit 9676faf

Browse files
authored
ESQL: Prune lookup join cols (#118808)
Use the existing optimizer rule PruneColumns to remove redundant lookup fields from LOOKUP JOIN commands. Additionally, slightly simplify PruneColumns and the surrogate substitution for LOOKUP JOIN.
1 parent 6983f9a commit 9676faf

File tree

7 files changed

+422
-36
lines changed

7 files changed

+422
-36
lines changed

x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,51 @@ language_code:integer | language_name:keyword
198198
4 | German
199199
;
200200

201+
dropAllLookedUpFieldsOnTheDataNode-Ignore
202+
// Depends on
203+
// https://github.com/elastic/elasticsearch/issues/118778
204+
// https://github.com/elastic/elasticsearch/issues/118781
205+
required_capability: join_lookup_v8
206+
207+
FROM employees
208+
| EVAL language_code = emp_no % 10
209+
| LOOKUP JOIN languages_lookup_non_unique_key ON language_code
210+
| WHERE emp_no == 10001
211+
| SORT emp_no
212+
| DROP language*
213+
;
214+
215+
emp_no:integer
216+
10001
217+
10001
218+
10001
219+
10001
220+
;
221+
222+
dropAllLookedUpFieldsOnTheCoordinator-Ignore
223+
// Depends on
224+
// https://github.com/elastic/elasticsearch/issues/118778
225+
// https://github.com/elastic/elasticsearch/issues/118781
226+
required_capability: join_lookup_v8
227+
228+
FROM employees
229+
| SORT emp_no
230+
| LIMIT 2
231+
| EVAL language_code = emp_no % 10
232+
| LOOKUP JOIN languages_lookup_non_unique_key ON language_code
233+
| DROP language*
234+
;
235+
236+
emp_no:integer
237+
10001
238+
10001
239+
10001
240+
10001
241+
10002
242+
10002
243+
10002
244+
;
245+
201246
###############################################
202247
# Filtering tests with languages_lookup index
203248
###############################################
@@ -860,6 +905,26 @@ ignoreOrder:true
860905
2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 | Success | 172.21.2.162 | null
861906
;
862907

908+
lookupMessageFromIndexTwiceFullyShadowing
909+
required_capability: join_lookup_v8
910+
911+
FROM sample_data
912+
| LOOKUP JOIN message_types_lookup ON message
913+
| LOOKUP JOIN message_types_lookup ON message
914+
| KEEP @timestamp, client_ip, event_duration, message, type
915+
;
916+
ignoreOrder:true
917+
918+
@timestamp:date | client_ip:ip | event_duration:long | message:keyword | type:keyword
919+
2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 | Success
920+
2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error | Error
921+
2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error | Error
922+
2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error | Error
923+
2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected | Disconnected
924+
2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 | Success
925+
2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 | Success
926+
;
927+
863928
###############################################
864929
# Tests with clientips_lookup and message_types_lookup indexes
865930
###############################################

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PruneColumns.java

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,16 @@
99

1010
import org.elasticsearch.compute.data.Block;
1111
import org.elasticsearch.compute.data.BlockUtils;
12+
import org.elasticsearch.index.IndexMode;
1213
import org.elasticsearch.xpack.esql.core.expression.AttributeSet;
1314
import org.elasticsearch.xpack.esql.core.expression.EmptyAttribute;
1415
import org.elasticsearch.xpack.esql.core.expression.Expressions;
1516
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
16-
import org.elasticsearch.xpack.esql.core.util.Holder;
1717
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
18+
import org.elasticsearch.xpack.esql.plan.logical.EsRelation;
1819
import org.elasticsearch.xpack.esql.plan.logical.Eval;
1920
import org.elasticsearch.xpack.esql.plan.logical.Limit;
2021
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
21-
import org.elasticsearch.xpack.esql.plan.logical.Project;
2222
import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation;
2323
import org.elasticsearch.xpack.esql.plan.logical.local.LocalSupplier;
2424
import org.elasticsearch.xpack.esql.planner.PlannerUtils;
@@ -34,13 +34,17 @@ public final class PruneColumns extends Rule<LogicalPlan, LogicalPlan> {
3434

3535
@Override
3636
public LogicalPlan apply(LogicalPlan plan) {
37-
var used = new AttributeSet();
38-
// don't remove Evals without any Project/Aggregate (which might not occur as the last node in the plan)
39-
var seenProjection = new Holder<>(Boolean.FALSE);
40-
41-
// start top-to-bottom
42-
// and track used references
37+
// track used references
38+
var used = plan.outputSet();
39+
// while going top-to-bottom (upstream)
4340
var pl = plan.transformDown(p -> {
41+
// Note: It is NOT required to do anything special for binary plans like JOINs. It is perfectly fine that transformDown descends
42+
// first into the left side, adding all kinds of attributes to the `used` set, and then descends into the right side - even
43+
// though the `used` set will contain stuff only used in the left hand side. That's because any attribute that is used in the
44+
// left hand side must have been created in the left side as well. Even field attributes belonging to the same index fields will
45+
// have different name ids in the left and right hand sides - as in the extreme example
46+
// `FROM lookup_idx | LOOKUP JOIN lookup_idx ON key_field`.
47+
4448
// skip nodes that simply pass the input through
4549
if (p instanceof Limit) {
4650
return p;
@@ -53,7 +57,7 @@ public LogicalPlan apply(LogicalPlan plan) {
5357
do {
5458
recheck = false;
5559
if (p instanceof Aggregate aggregate) {
56-
var remaining = seenProjection.get() ? removeUnused(aggregate.aggregates(), used) : null;
60+
var remaining = removeUnused(aggregate.aggregates(), used);
5761

5862
if (remaining != null) {
5963
if (remaining.isEmpty()) {
@@ -87,10 +91,8 @@ public LogicalPlan apply(LogicalPlan plan) {
8791
);
8892
}
8993
}
90-
91-
seenProjection.set(Boolean.TRUE);
9294
} else if (p instanceof Eval eval) {
93-
var remaining = seenProjection.get() ? removeUnused(eval.fields(), used) : null;
95+
var remaining = removeUnused(eval.fields(), used);
9496
// no fields, no eval
9597
if (remaining != null) {
9698
if (remaining.isEmpty()) {
@@ -100,8 +102,16 @@ public LogicalPlan apply(LogicalPlan plan) {
100102
p = new Eval(eval.source(), eval.child(), remaining);
101103
}
102104
}
103-
} else if (p instanceof Project) {
104-
seenProjection.set(Boolean.TRUE);
105+
} else if (p instanceof EsRelation esRelation && esRelation.indexMode() == IndexMode.LOOKUP) {
106+
// Normally, pruning EsRelation has no effect because InsertFieldExtraction only extracts the required fields, anyway.
107+
// The field extraction for LOOKUP JOIN works differently, however - we extract all fields (other than the join key)
108+
// that the EsRelation has.
109+
var remaining = removeUnused(esRelation.output(), used);
110+
// TODO: LookupFromIndexOperator cannot handle 0 lookup fields, yet. That means 1 field in total (key field + lookup).
111+
// https://github.com/elastic/elasticsearch/issues/118778
112+
if (remaining != null && remaining.size() > 1) {
113+
p = new EsRelation(esRelation.source(), esRelation.index(), remaining, esRelation.indexMode(), esRelation.frozen());
114+
}
105115
}
106116
} while (recheck);
107117

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ else if (plan instanceof Project project) {
104104
FieldAttribute.class,
105105
// Do not use the attribute name, this can deviate from the field name for union types.
106106
// Also skip fields from lookup indices because we do not have stats for these.
107+
// TODO: We do have stats for lookup indices in case they are being used in the FROM clause; this can be refined.
107108
f -> stats.exists(f.fieldName()) || lookupFields.contains(f) ? f : Literal.of(f, null)
108109
);
109110
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/LookupJoin.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
1212
import org.elasticsearch.xpack.esql.core.tree.Source;
1313
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
14-
import org.elasticsearch.xpack.esql.plan.logical.Project;
1514
import org.elasticsearch.xpack.esql.plan.logical.SurrogateLogicalPlan;
1615
import org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes.UsingJoinType;
1716

@@ -50,9 +49,8 @@ public LookupJoin(Source source, LogicalPlan left, LogicalPlan right, JoinConfig
5049
*/
5150
@Override
5251
public LogicalPlan surrogate() {
53-
Join normalized = new Join(source(), left(), right(), config());
5452
// TODO: decide whether to introduce USING or just basic ON semantics - keep the ordering out for now
55-
return new Project(source(), normalized, output());
53+
return new Join(source(), left(), right(), config());
5654
}
5755

5856
@Override

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -749,7 +749,7 @@ public String describe() {
749749
return Stream.concat(
750750
Stream.concat(Stream.of(sourceOperatorFactory), intermediateOperatorFactories.stream()),
751751
Stream.of(sinkOperatorFactory)
752-
).map(Describable::describe).collect(joining("\n\\_", "\\_", ""));
752+
).map(describable -> describable == null ? "null" : describable.describe()).collect(joining("\n\\_", "\\_", ""));
753753
}
754754

755755
@Override

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java

Lines changed: 29 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3011,6 +3011,24 @@ public void testPruneChainedEval() {
30113011
var source = as(limit.child(), EsRelation.class);
30123012
}
30133013

3014+
public void testPruneChainedEvalNoProjection() {
3015+
var plan = plan("""
3016+
from test
3017+
| eval garbage = salary + 3
3018+
| eval garbage = emp_no / garbage, garbage = garbage
3019+
| eval garbage = 1
3020+
""");
3021+
var eval = as(plan, Eval.class);
3022+
var limit = as(eval.child(), Limit.class);
3023+
var source = as(limit.child(), EsRelation.class);
3024+
3025+
assertEquals(1, eval.fields().size());
3026+
var alias = as(eval.fields().getFirst(), Alias.class);
3027+
assertEquals(alias.name(), "garbage");
3028+
var literal = as(alias.child(), Literal.class);
3029+
assertEquals(1, literal.value());
3030+
}
3031+
30143032
/**
30153033
* Expects
30163034
* Limit[1000[INTEGER]]
@@ -4914,8 +4932,7 @@ public void testPlanSanityCheckWithBinaryPlans() throws Exception {
49144932
| LOOKUP JOIN languages_lookup ON language_code
49154933
""");
49164934

4917-
var project = as(plan, Project.class);
4918-
var join = as(project.child(), Join.class);
4935+
var join = as(plan, Join.class);
49194936

49204937
var joinWithInvalidLeftPlan = join.replaceChildren(join.right(), join.right());
49214938
IllegalStateException e = expectThrows(IllegalStateException.class, () -> logicalOptimizer.optimize(joinWithInvalidLeftPlan));
@@ -5921,10 +5938,9 @@ public void testLookupJoinPushDownFilterOnJoinKeyWithRename() {
59215938
""";
59225939
var plan = optimizedPlan(query);
59235940

5924-
var project = as(plan, Project.class);
5925-
var join = as(project.child(), Join.class);
5941+
var join = as(plan, Join.class);
59265942
assertThat(join.config().type(), equalTo(JoinTypes.LEFT));
5927-
project = as(join.left(), Project.class);
5943+
var project = as(join.left(), Project.class);
59285944
var limit = as(project.child(), Limit.class);
59295945
assertThat(limit.limit().fold(), equalTo(1000));
59305946
var filter = as(limit.child(), Filter.class);
@@ -5965,10 +5981,9 @@ public void testLookupJoinPushDownFilterOnLeftSideField() {
59655981

59665982
var plan = optimizedPlan(query);
59675983

5968-
var project = as(plan, Project.class);
5969-
var join = as(project.child(), Join.class);
5984+
var join = as(plan, Join.class);
59705985
assertThat(join.config().type(), equalTo(JoinTypes.LEFT));
5971-
project = as(join.left(), Project.class);
5986+
var project = as(join.left(), Project.class);
59725987

59735988
var limit = as(project.child(), Limit.class);
59745989
assertThat(limit.limit().fold(), equalTo(1000));
@@ -6009,8 +6024,7 @@ public void testLookupJoinPushDownDisabledForLookupField() {
60096024

60106025
var plan = optimizedPlan(query);
60116026

6012-
var project = as(plan, Project.class);
6013-
var limit = as(project.child(), Limit.class);
6027+
var limit = as(plan, Limit.class);
60146028
assertThat(limit.limit().fold(), equalTo(1000));
60156029

60166030
var filter = as(limit.child(), Filter.class);
@@ -6022,7 +6036,7 @@ public void testLookupJoinPushDownDisabledForLookupField() {
60226036

60236037
var join = as(filter.child(), Join.class);
60246038
assertThat(join.config().type(), equalTo(JoinTypes.LEFT));
6025-
project = as(join.left(), Project.class);
6039+
var project = as(join.left(), Project.class);
60266040

60276041
var leftRel = as(project.child(), EsRelation.class);
60286042
var rightRel = as(join.right(), EsRelation.class);
@@ -6054,8 +6068,7 @@ public void testLookupJoinPushDownSeparatedForConjunctionBetweenLeftAndRightFiel
60546068

60556069
var plan = optimizedPlan(query);
60566070

6057-
var project = as(plan, Project.class);
6058-
var limit = as(project.child(), Limit.class);
6071+
var limit = as(plan, Limit.class);
60596072
assertThat(limit.limit().fold(), equalTo(1000));
60606073
// filter kept in place, working on the right side
60616074
var filter = as(limit.child(), Filter.class);
@@ -6067,7 +6080,7 @@ public void testLookupJoinPushDownSeparatedForConjunctionBetweenLeftAndRightFiel
60676080

60686081
var join = as(filter.child(), Join.class);
60696082
assertThat(join.config().type(), equalTo(JoinTypes.LEFT));
6070-
project = as(join.left(), Project.class);
6083+
var project = as(join.left(), Project.class);
60716084
// filter pushed down
60726085
filter = as(project.child(), Filter.class);
60736086
op = as(filter.condition(), GreaterThan.class);
@@ -6079,7 +6092,6 @@ public void testLookupJoinPushDownSeparatedForConjunctionBetweenLeftAndRightFiel
60796092

60806093
var leftRel = as(filter.child(), EsRelation.class);
60816094
var rightRel = as(join.right(), EsRelation.class);
6082-
60836095
}
60846096

60856097
/**
@@ -6107,8 +6119,7 @@ public void testLookupJoinPushDownDisabledForDisjunctionBetweenLeftAndRightField
61076119

61086120
var plan = optimizedPlan(query);
61096121

6110-
var project = as(plan, Project.class);
6111-
var limit = as(project.child(), Limit.class);
6122+
var limit = as(plan, Limit.class);
61126123
assertThat(limit.limit().fold(), equalTo(1000));
61136124

61146125
var filter = as(limit.child(), Filter.class);
@@ -6128,7 +6139,7 @@ public void testLookupJoinPushDownDisabledForDisjunctionBetweenLeftAndRightField
61286139

61296140
var join = as(filter.child(), Join.class);
61306141
assertThat(join.config().type(), equalTo(JoinTypes.LEFT));
6131-
project = as(join.left(), Project.class);
6142+
var project = as(join.left(), Project.class);
61326143

61336144
var leftRel = as(project.child(), EsRelation.class);
61346145
var rightRel = as(join.right(), EsRelation.class);

0 commit comments

Comments
 (0)