Skip to content

Commit bcf2c11

Browse files
authored
EQL: Introduce sequence internal paging (#58859)
Refactor sequence matching classes in order to decouple querying from results consumption (and matching). Rename some classes to better convey their intent. Introduce internal pagination of sequence algorithm, that is getting the data in slices and, if needed, moving forward in order to find more matches until either the dataset is consumer or the number of results desired is found.
1 parent f42d55d commit bcf2c11

File tree

17 files changed

+543
-345
lines changed

17 files changed

+543
-345
lines changed
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
7+
package org.elasticsearch.xpack.eql.execution.assembler;
8+
9+
import org.elasticsearch.index.query.BoolQueryBuilder;
10+
import org.elasticsearch.index.query.RangeQueryBuilder;
11+
import org.elasticsearch.search.builder.SearchSourceBuilder;
12+
import org.elasticsearch.xpack.eql.execution.search.Ordinal;
13+
import org.elasticsearch.xpack.eql.execution.search.QueryRequest;
14+
15+
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
16+
import static org.elasticsearch.index.query.QueryBuilders.rangeQuery;
17+
18+
public class BoxedQueryRequest implements QueryRequest {
19+
20+
private final RangeQueryBuilder timestampRange;
21+
private final RangeQueryBuilder tiebreakerRange;
22+
23+
private final SearchSourceBuilder searchSource;
24+
25+
public BoxedQueryRequest(QueryRequest original, String timestamp, String tiebreaker) {
26+
searchSource = original.searchSource();
27+
28+
// setup range queries and preserve their reference to simplify the update
29+
timestampRange = rangeQuery(timestamp).timeZone("UTC").format("epoch_millis");
30+
BoolQueryBuilder filter = boolQuery().filter(timestampRange);
31+
if (tiebreaker != null) {
32+
tiebreakerRange = rangeQuery(tiebreaker);
33+
filter.filter(tiebreakerRange);
34+
} else {
35+
tiebreakerRange = null;
36+
}
37+
// add ranges to existing query
38+
searchSource.query(filter.must(searchSource.query()));
39+
}
40+
41+
@Override
42+
public SearchSourceBuilder searchSource() {
43+
return searchSource;
44+
}
45+
46+
@Override
47+
public void next(Ordinal ordinal) {
48+
// reset existing constraints
49+
timestampRange.gte(null).lte(null);
50+
if (tiebreakerRange != null) {
51+
tiebreakerRange.gte(null).lte(null);
52+
}
53+
// and leave only search_after
54+
searchSource.searchAfter(ordinal.toArray());
55+
}
56+
57+
public BoxedQueryRequest between(Ordinal begin, Ordinal end) {
58+
timestampRange.gte(begin.timestamp()).lte(end.timestamp());
59+
60+
if (tiebreakerRange != null) {
61+
tiebreakerRange.gte(begin.tiebreaker()).lte(end.tiebreaker());
62+
}
63+
64+
return this;
65+
}
66+
67+
@Override
68+
public String toString() {
69+
return searchSource.toString();
70+
}
71+
}

x-pack/plugin/eql/src/main/java/org/elasticsearch/xpack/eql/execution/assembler/Criterion.java

Lines changed: 48 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -7,100 +7,83 @@
77
package org.elasticsearch.xpack.eql.execution.assembler;
88

99
import org.elasticsearch.search.SearchHit;
10-
import org.elasticsearch.search.builder.SearchSourceBuilder;
1110
import org.elasticsearch.xpack.eql.EqlIllegalArgumentException;
11+
import org.elasticsearch.xpack.eql.execution.search.Ordinal;
1212
import org.elasticsearch.xpack.eql.execution.search.QueryRequest;
13-
import org.elasticsearch.xpack.eql.execution.sequence.Ordinal;
14-
import org.elasticsearch.xpack.eql.util.ReversedIterator;
13+
import org.elasticsearch.xpack.eql.execution.sequence.SequenceKey;
1514
import org.elasticsearch.xpack.ql.execution.search.extractor.HitExtractor;
1615

1716
import java.util.List;
1817

19-
public class Criterion implements QueryRequest {
20-
21-
private final SearchSourceBuilder searchSource;
22-
private final List<HitExtractor> keyExtractors;
23-
private final HitExtractor timestampExtractor;
24-
private final HitExtractor tiebreakerExtractor;
25-
26-
// search after markers
27-
private Ordinal startMarker;
28-
private Ordinal stopMarker;
29-
30-
private boolean reverse;
31-
32-
//TODO: should accept QueryRequest instead of another SearchSourceBuilder
33-
public Criterion(SearchSourceBuilder searchSource,
34-
List<HitExtractor> searchAfterExractors,
35-
HitExtractor timestampExtractor,
36-
HitExtractor tiebreakerExtractor,
37-
boolean reverse) {
38-
this.searchSource = searchSource;
39-
this.keyExtractors = searchAfterExractors;
40-
this.timestampExtractor = timestampExtractor;
41-
this.tiebreakerExtractor = tiebreakerExtractor;
42-
43-
this.startMarker = null;
44-
this.stopMarker = null;
18+
public class Criterion<Q extends QueryRequest> {
19+
20+
private final int stage;
21+
private final Q queryRequest;
22+
private final List<HitExtractor> keys;
23+
private final HitExtractor timestamp;
24+
private final HitExtractor tiebreaker;
25+
26+
private final boolean reverse;
27+
28+
Criterion(int stage,
29+
Q queryRequest,
30+
List<HitExtractor> keys,
31+
HitExtractor timestamp,
32+
HitExtractor tiebreaker,
33+
boolean reverse) {
34+
this.stage = stage;
35+
this.queryRequest = queryRequest;
36+
this.keys = keys;
37+
this.timestamp = timestamp;
38+
this.tiebreaker = tiebreaker;
39+
4540
this.reverse = reverse;
4641
}
4742

48-
@Override
49-
public SearchSourceBuilder searchSource() {
50-
return searchSource;
43+
public int stage() {
44+
return stage;
5145
}
5246

53-
public List<HitExtractor> keyExtractors() {
54-
return keyExtractors;
47+
boolean reverse() {
48+
return reverse;
5549
}
5650

57-
public HitExtractor timestampExtractor() {
58-
return timestampExtractor;
51+
public Q queryRequest() {
52+
return queryRequest;
5953
}
6054

61-
public HitExtractor tiebreakerExtractor() {
62-
return tiebreakerExtractor;
55+
public SequenceKey key(SearchHit hit) {
56+
SequenceKey key;
57+
if (keys.isEmpty()) {
58+
key = SequenceKey.NONE;
59+
} else {
60+
Object[] docKeys = new Object[keys.size()];
61+
for (int i = 0; i < docKeys.length; i++) {
62+
docKeys[i] = keys.get(i).extract(hit);
63+
}
64+
key = new SequenceKey(docKeys);
65+
}
66+
return key;
6367
}
6468

6569
@SuppressWarnings({ "unchecked" })
6670
public Ordinal ordinal(SearchHit hit) {
6771

68-
Object ts = timestampExtractor.extract(hit);
72+
Object ts = timestamp.extract(hit);
6973
if (ts instanceof Number == false) {
7074
throw new EqlIllegalArgumentException("Expected timestamp as long but got {}", ts);
7175
}
7276

7377
long timestamp = ((Number) ts).longValue();
74-
Comparable<Object> tiebreaker = null;
78+
Comparable<Object> tbreaker = null;
7579

76-
if (tiebreakerExtractor != null) {
77-
Object tb = tiebreakerExtractor.extract(hit);
80+
if (tiebreaker != null) {
81+
Object tb = tiebreaker.extract(hit);
7882
if (tb instanceof Comparable == false) {
7983
throw new EqlIllegalArgumentException("Expected tiebreaker to be Comparable but got {}", tb);
8084
}
81-
tiebreaker = (Comparable<Object>) tb;
85+
tbreaker = (Comparable<Object>) tb;
8286
}
83-
return new Ordinal(timestamp, tiebreaker);
84-
}
85-
86-
public void startMarker(Ordinal ordinal) {
87-
startMarker = ordinal;
88-
}
89-
90-
public void stopMarker(Ordinal ordinal) {
91-
stopMarker = ordinal;
92-
}
93-
94-
public Ordinal nextMarker() {
95-
return startMarker.compareTo(stopMarker) < 1 ? startMarker : stopMarker;
96-
}
97-
98-
public Criterion useMarker(Ordinal marker) {
99-
searchSource.searchAfter(marker.toArray());
100-
return this;
101-
}
102-
103-
public Iterable<SearchHit> iterable(List<SearchHit> hits) {
104-
return () -> reverse ? new ReversedIterator<>(hits) : hits.iterator();
87+
return new Ordinal(timestamp, tbreaker);
10588
}
10689
}

x-pack/plugin/eql/src/main/java/org/elasticsearch/xpack/eql/execution/assembler/ExecutionManager.java

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import org.elasticsearch.xpack.ql.expression.Expression;
2525
import org.elasticsearch.xpack.ql.expression.Expressions;
2626
import org.elasticsearch.xpack.ql.expression.Order.OrderDirection;
27-
import org.elasticsearch.xpack.ql.util.Check;
2827

2928
import java.util.ArrayList;
3029
import java.util.List;
@@ -48,27 +47,51 @@ public Executable assemble(List<List<Attribute>> listOfKeys,
4847
Limit limit) {
4948
FieldExtractorRegistry extractorRegistry = new FieldExtractorRegistry();
5049

51-
List<Criterion> criteria = new ArrayList<>(plans.size() - 1);
52-
5350
boolean descending = direction == OrderDirection.DESC;
51+
52+
// fields
53+
HitExtractor tsExtractor = timestampExtractor(hitExtractor(timestamp, extractorRegistry));
54+
HitExtractor tbExtractor = Expressions.isPresent(tiebreaker) ? hitExtractor(tiebreaker, extractorRegistry) : null;
55+
// NB: since there's no aliasing inside EQL, the attribute name is the same as the underlying field name
56+
String timestampName = Expressions.name(timestamp);
57+
String tiebreakerName = Expressions.isPresent(tiebreaker) ? Expressions.name(tiebreaker) : null;
58+
59+
// secondary criteria
60+
List<Criterion<BoxedQueryRequest>> criteria = new ArrayList<>(plans.size() - 1);
5461

5562
// build a criterion for each query
56-
for (int i = 0; i < plans.size() - 1; i++) {
63+
for (int i = 0; i < plans.size(); i++) {
5764
List<Attribute> keys = listOfKeys.get(i);
58-
// fields
59-
HitExtractor tsExtractor = timestampExtractor(hitExtractor(timestamp, extractorRegistry));
60-
HitExtractor tbExtractor = Expressions.isPresent(tiebreaker) ? hitExtractor(tiebreaker, extractorRegistry) : null;
6165
List<HitExtractor> keyExtractors = hitExtractors(keys, extractorRegistry);
6266

6367
PhysicalPlan query = plans.get(i);
6468
// search query
65-
// TODO: this could be generalized into an exec only query
66-
Check.isTrue(query instanceof EsQueryExec, "Expected a query but got [{}]", query.getClass());
67-
QueryRequest request = ((EsQueryExec) query).queryRequest(session);
68-
// base query remains descending, the rest need to flip
69-
criteria.add(new Criterion(request.searchSource(), keyExtractors, tsExtractor, tbExtractor, i > 0 && descending));
69+
if (query instanceof EsQueryExec) {
70+
QueryRequest original = ((EsQueryExec) query).queryRequest(session);
71+
72+
BoxedQueryRequest boxedRequest = new BoxedQueryRequest(original, timestampName, tiebreakerName);
73+
Criterion<BoxedQueryRequest> criterion =
74+
new Criterion<>(i, boxedRequest, keyExtractors, tsExtractor, tbExtractor, i> 0 && descending);
75+
criteria.add(criterion);
76+
} else {
77+
// until
78+
if (i != plans.size() - 1) {
79+
throw new EqlIllegalArgumentException("Expected a query but got [{}]", query.getClass());
80+
} else {
81+
criteria.add(null);
82+
}
83+
}
7084
}
71-
return new SequenceRuntime(criteria, new BasicQueryClient(session), maxSpan, limit);
85+
86+
int completionStage = criteria.size() - 1;
87+
Matcher matcher = new Matcher(completionStage, maxSpan, limit);
88+
89+
TumblingWindow w = new TumblingWindow(new BasicQueryClient(session),
90+
criteria.subList(0, completionStage),
91+
criteria.get(completionStage),
92+
matcher);
93+
94+
return w;
7295
}
7396

7497
private HitExtractor timestampExtractor(HitExtractor hitExtractor) {

x-pack/plugin/eql/src/main/java/org/elasticsearch/xpack/eql/execution/assembler/KeyAndOrdinal.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@
66

77
package org.elasticsearch.xpack.eql.execution.assembler;
88

9-
import org.elasticsearch.xpack.eql.execution.sequence.Ordinal;
9+
import org.elasticsearch.xpack.eql.execution.search.Ordinal;
1010
import org.elasticsearch.xpack.eql.execution.sequence.SequenceKey;
1111

1212
import java.util.Objects;
1313

14-
class KeyAndOrdinal {
14+
public class KeyAndOrdinal {
1515
final SequenceKey key;
1616
final Ordinal ordinal;
1717

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
package org.elasticsearch.xpack.eql.execution.assembler;
7+
8+
import org.elasticsearch.common.collect.Tuple;
9+
import org.elasticsearch.common.unit.TimeValue;
10+
import org.elasticsearch.search.SearchHit;
11+
import org.elasticsearch.xpack.eql.execution.search.Limit;
12+
import org.elasticsearch.xpack.eql.execution.search.Ordinal;
13+
import org.elasticsearch.xpack.eql.execution.sequence.Sequence;
14+
import org.elasticsearch.xpack.eql.execution.sequence.SequenceStateMachine;
15+
import org.elasticsearch.xpack.eql.session.Payload;
16+
17+
import java.util.List;
18+
19+
/**
20+
* Executable tracking sequences at runtime.
21+
*/
22+
class Matcher {
23+
24+
// NB: just like in a list, this represents the total number of stages yet counting starts at 0
25+
private final SequenceStateMachine stateMachine;
26+
private final int numberOfStages;
27+
28+
Matcher(int numberOfStages, TimeValue maxSpan, Limit limit) {
29+
this.numberOfStages = numberOfStages;
30+
this.stateMachine = new SequenceStateMachine(numberOfStages, maxSpan, limit);
31+
}
32+
33+
/**
34+
* Match hits for the given stage.
35+
* Returns false if the process needs to be stopped.
36+
*/
37+
boolean match(int stage, Iterable<Tuple<KeyAndOrdinal, SearchHit>> hits) {
38+
for (Tuple<KeyAndOrdinal, SearchHit> tuple : hits) {
39+
KeyAndOrdinal ko = tuple.v1();
40+
SearchHit hit = tuple.v2();
41+
42+
if (stage == 0) {
43+
Sequence seq = new Sequence(ko.key, numberOfStages, ko.ordinal, hit);
44+
stateMachine.trackSequence(seq);
45+
} else {
46+
stateMachine.match(stage, ko.key, ko.ordinal, hit);
47+
48+
// early skip in case of reaching the limit
49+
// check the last stage to avoid calling the state machine in other stages
50+
if (stateMachine.reachedLimit()) {
51+
return false;
52+
}
53+
}
54+
}
55+
return true;
56+
}
57+
58+
boolean until(Iterable<Ordinal> markers) {
59+
// no-op so far
60+
61+
return false;
62+
}
63+
64+
65+
public boolean hasCandidates(int stage) {
66+
return stateMachine.hasCandidates(stage);
67+
}
68+
69+
Payload payload(long startTime) {
70+
List<Sequence> completed = stateMachine.completeSequences();
71+
TimeValue tookTime = new TimeValue(System.currentTimeMillis() - startTime);
72+
return new SequencePayload(completed, false, tookTime);
73+
}
74+
}

0 commit comments

Comments
 (0)