Skip to content

Commit dfc1159

Browse files
author
Lukas Wegmann
authored
Rename fields with project (#421)
Fixes #418 Renames in `project` require a small conceptual change in the local execution planning: So far, the layout always contained one entry per block and there was a 1:1 mapping from attributes to blocks/channels. This assumption no longer holds as the same block might be mapped to multiple attribute ids. E.g. the query `from test | project x = count, y = count` creates a project operator that drops all blocks except `count` and is associated with the mapping `Id(x) -> 0, Id(y) -> 0`.
1 parent ef5f0de commit dfc1159

File tree

3 files changed

+135
-18
lines changed

3 files changed

+135
-18
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
projectRename
2+
row a = 1, b = 2 | project c = a;
3+
4+
c:integer
5+
1
6+
;
7+
8+
projectRenameDuplicate
9+
row a = 1, b = 2 | project c = a, d = a;
10+
11+
c:integer | d:integer
12+
1 | 1
13+
;
14+
15+
projectRenameEval
16+
row a = 1, b = 2 | project c = a, d = a | eval e = c + d;
17+
18+
c:integer | d:integer | e:integer
19+
1 | 1 | 2
20+
;
21+
22+
projectRenameEvalProject
23+
row a = 1, b = 2 | project c = a, d = a | eval e = c + d | project e, c, d;
24+
25+
e:integer | c:integer | d:integer
26+
2 | 1 | 1
27+
;

x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionIT.java

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
5050
import static org.hamcrest.Matchers.hasItem;
5151
import static org.hamcrest.Matchers.hasSize;
52+
import static org.hamcrest.Matchers.is;
5253

5354
@Experimental
5455
@ESIntegTestCase.ClusterScope(scope = SUITE, numDataNodes = 1, numClientNodes = 0, supportsDedicatedMasters = false)
@@ -392,6 +393,55 @@ public void testEvalOverride() {
392393
}
393394
}
394395

396+
public void testProjectRename() {
397+
EsqlQueryResponse results = run("from test | project x = count, y = count");
398+
logger.info(results);
399+
Assert.assertEquals(40, results.values().size());
400+
assertThat(results.columns(), contains(new ColumnInfo("x", "long"), new ColumnInfo("y", "long")));
401+
for (List<Object> values : results.values()) {
402+
assertThat((Long) values.get(0), greaterThanOrEqualTo(40L));
403+
assertThat(values.get(1), is(values.get(0)));
404+
}
405+
}
406+
407+
public void testProjectRenameEval() {
408+
EsqlQueryResponse results = run("from test | project x = count, y = count | eval x2 = x + 1 | eval y2 = y + 2");
409+
logger.info(results);
410+
Assert.assertEquals(40, results.values().size());
411+
assertThat(
412+
results.columns(),
413+
contains(new ColumnInfo("x", "long"), new ColumnInfo("y", "long"), new ColumnInfo("x2", "long"), new ColumnInfo("y2", "long"))
414+
);
415+
for (List<Object> values : results.values()) {
416+
assertThat((Long) values.get(0), greaterThanOrEqualTo(40L));
417+
assertThat(values.get(1), is(values.get(0)));
418+
assertThat(values.get(2), is(((Long) values.get(0)) + 1));
419+
assertThat(values.get(3), is(((Long) values.get(0)) + 2));
420+
}
421+
}
422+
423+
public void testProjectRenameEvalProject() {
424+
EsqlQueryResponse results = run("from test | project x = count, y = count | eval z = x + y | project x, y, z");
425+
logger.info(results);
426+
Assert.assertEquals(40, results.values().size());
427+
assertThat(results.columns(), contains(new ColumnInfo("x", "long"), new ColumnInfo("y", "long"), new ColumnInfo("z", "long")));
428+
for (List<Object> values : results.values()) {
429+
assertThat((Long) values.get(0), greaterThanOrEqualTo(40L));
430+
assertThat(values.get(1), is(values.get(0)));
431+
assertThat(values.get(2), is((Long) values.get(0) * 2));
432+
}
433+
}
434+
435+
public void testProjectOverride() {
436+
EsqlQueryResponse results = run("from test | project count, data = count");
437+
logger.info(results);
438+
Assert.assertEquals(40, results.values().size());
439+
assertThat(results.columns(), contains(new ColumnInfo("count", "long"), new ColumnInfo("data", "long")));
440+
for (List<Object> values : results.values()) {
441+
assertThat(values.get(1), is(values.get(0)));
442+
}
443+
}
444+
395445
public void testRefreshSearchIdleShards() throws Exception {
396446
String indexName = "test_refresh";
397447
ElasticsearchAssertions.assertAcked(

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java

Lines changed: 58 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
import org.elasticsearch.xpack.ql.expression.Expression;
7575
import org.elasticsearch.xpack.ql.expression.Expressions;
7676
import org.elasticsearch.xpack.ql.expression.Literal;
77+
import org.elasticsearch.xpack.ql.expression.NameId;
7778
import org.elasticsearch.xpack.ql.expression.NamedExpression;
7879
import org.elasticsearch.xpack.ql.expression.Order;
7980
import org.elasticsearch.xpack.ql.expression.function.aggregate.AggregateFunction;
@@ -84,6 +85,7 @@
8485
import java.util.ArrayList;
8586
import java.util.BitSet;
8687
import java.util.HashMap;
88+
import java.util.HashSet;
8789
import java.util.List;
8890
import java.util.Map;
8991
import java.util.Set;
@@ -278,7 +280,7 @@ public PhysicalOperation plan(PhysicalPlan node, LocalExecutionPlanContext conte
278280
}
279281
}
280282
Function<Page, Page> mapper = transformRequired ? p -> {
281-
var blocks = new Block[p.getBlockCount()];
283+
var blocks = new Block[mappedPosition.length];
282284
for (int i = 0; i < blocks.length; i++) {
283285
blocks[i] = p.getBlock(mappedPosition[i]);
284286
}
@@ -344,9 +346,8 @@ public PhysicalOperation plan(PhysicalPlan node, LocalExecutionPlanContext conte
344346
} else {
345347
throw new UnsupportedOperationException();
346348
}
347-
Map<Object, Integer> layout = new HashMap<>();
348-
layout.putAll(source.layout);
349-
layout.put(namedExpression.toAttribute().id(), layout.size());
349+
Map<Object, Integer> layout = new HashMap<>(source.layout);
350+
layout.put(namedExpression.toAttribute().id(), nextFreeChannel(layout));
350351
return new PhysicalOperation(
351352
new EvalOperatorFactory(evaluator, namedExpression.dataType().isRational() ? Double.TYPE : Long.TYPE),
352353
layout,
@@ -368,22 +369,46 @@ public PhysicalOperation plan(PhysicalPlan node, LocalExecutionPlanContext conte
368369
return new PhysicalOperation(new RowOperatorFactory(obj), layout);
369370
} else if (node instanceof ProjectExec project) {
370371
var source = plan(project.child(), context);
371-
Map<Object, Integer> layout = new HashMap<>();
372372

373-
var outputSet = project.outputSet();
374-
var input = project.child().output();
375-
var mask = new BitSet(input.size());
376-
int layoutPos = 0;
377-
for (Attribute element : input) {
378-
var id = element.id();
379-
var maskPosition = source.layout.get(id);
380-
var keepColumn = outputSet.contains(element);
381-
mask.set(maskPosition, keepColumn);
382-
if (keepColumn) {
383-
layout.put(id, layoutPos++);
373+
Map<Integer, Set<NameId>> inputChannelToInputIds = new HashMap<>(source.layout.size());
374+
for (Map.Entry<Object, Integer> entry : source.layout.entrySet()) {
375+
inputChannelToInputIds.computeIfAbsent(entry.getValue(), ignore -> new HashSet<>()).add((NameId) entry.getKey());
376+
}
377+
378+
Map<Integer, Set<NameId>> inputChannelToOutputIds = new HashMap<>(inputChannelToInputIds.size());
379+
for (NamedExpression ne : project.projections()) {
380+
NameId inputId;
381+
if (ne instanceof Alias a) {
382+
inputId = ((NamedExpression) a.child()).id();
383+
} else {
384+
inputId = ne.id();
385+
}
386+
int inputChannel = source.layout.get(inputId);
387+
inputChannelToOutputIds.computeIfAbsent(inputChannel, ignore -> new HashSet<>()).add(ne.id());
388+
}
389+
390+
BitSet mask = new BitSet(inputChannelToInputIds.size());
391+
Map<Object, Integer> layout = new HashMap<>(project.projections().size());
392+
int outChannel = 0;
393+
394+
for (int inChannel = 0; inChannel < inputChannelToInputIds.size(); inChannel++) {
395+
Set<NameId> outputIds = inputChannelToOutputIds.get(inChannel);
396+
397+
if (outputIds != null) {
398+
mask.set(inChannel);
399+
for (NameId outId : outputIds) {
400+
layout.put(outId, outChannel);
401+
}
402+
outChannel++;
384403
}
385404
}
386-
return new PhysicalOperation(new ProjectOperatorFactory(mask), layout, source);
405+
406+
if (mask.cardinality() == inputChannelToInputIds.size()) {
407+
// all columns are retained, project operator is not needed but the layout needs to be updated
408+
return new PhysicalOperation(source.operatorFactories, layout);
409+
} else {
410+
return new PhysicalOperation(new ProjectOperatorFactory(mask), layout, source);
411+
}
387412
} else if (node instanceof FilterExec filter) {
388413
PhysicalOperation source = plan(filter.child(), context);
389414
return new PhysicalOperation(new FilterOperatorFactory(toEvaluator(filter.condition(), source.layout)), source.layout, source);
@@ -421,7 +446,7 @@ private PhysicalOperation planFieldExtractNode(LocalExecutionPlanContext context
421446
PhysicalOperation op = source;
422447
for (Attribute attr : fieldExtractExec.attributesToExtract()) {
423448
layout = new HashMap<>(layout);
424-
layout.put(attr.id(), layout.size());
449+
layout.put(attr.id(), nextFreeChannel(layout));
425450
Map<Object, Integer> previousLayout = op.layout;
426451

427452
// Create ValuesSource object for the field to extract its values
@@ -519,6 +544,11 @@ public static class PhysicalOperation implements Describable {
519544
this.layout = layout;
520545
}
521546

547+
PhysicalOperation(List<OperatorFactory> operatorFactories, Map<Object, Integer> layout) {
548+
this.operatorFactories.addAll(operatorFactories);
549+
this.layout = layout;
550+
}
551+
522552
PhysicalOperation(OperatorFactory operatorFactory, Map<Object, Integer> layout, PhysicalOperation source) {
523553
this.operatorFactories.addAll(source.operatorFactories);
524554
this.operatorFactories.add(operatorFactory);
@@ -535,6 +565,16 @@ public String describe() {
535565
}
536566
}
537567

568+
private static int nextFreeChannel(Map<Object, Integer> layout) {
569+
int nextChannel = 0;
570+
for (int channel : layout.values()) {
571+
if (channel >= nextChannel) {
572+
nextChannel = channel + 1;
573+
}
574+
}
575+
return nextChannel;
576+
}
577+
538578
/**
539579
* The count and type of driver parallelism.
540580
*/

0 commit comments

Comments
 (0)