Skip to content

Commit f8dfc46

Browse files
committed
Change internal representation of bucket key of time_series agg
Currently, the key is a map, which can make reducing large response more memory intense then it should be also. Also data structures used during reduce are not back by bigarrays so not accounted for. This commit changes how the key is represented internally. By using BytesRef instead of Map. This doesn't commit doesn't change how the key is represented in the response. It also changes the reduce method to make use of the bucket keys are now bytes refs. Relates to elastic#74660
1 parent a8a684e commit f8dfc46

File tree

3 files changed

+65
-38
lines changed

3 files changed

+65
-38
lines changed

server/src/main/java/org/elasticsearch/search/aggregations/timeseries/InternalTimeSeries.java

Lines changed: 50 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,13 @@
88

99
package org.elasticsearch.search.aggregations.timeseries;
1010

11+
import org.apache.lucene.util.BytesRef;
1112
import org.elasticsearch.common.io.stream.StreamInput;
1213
import org.elasticsearch.common.io.stream.StreamOutput;
14+
import org.elasticsearch.common.util.BigArrays;
15+
import org.elasticsearch.common.util.BytesRefHash;
16+
import org.elasticsearch.common.util.LongObjectPagedHashMap;
17+
import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper;
1318
import org.elasticsearch.search.aggregations.AggregationReduceContext;
1419
import org.elasticsearch.search.aggregations.InternalAggregation;
1520
import org.elasticsearch.search.aggregations.InternalAggregations;
@@ -46,11 +51,12 @@ public class InternalTimeSeries extends InternalMultiBucketAggregation<InternalT
4651
public static class InternalBucket extends InternalMultiBucketAggregation.InternalBucket implements TimeSeries.Bucket {
4752
protected long bucketOrd;
4853
protected final boolean keyed;
49-
protected final Map<String, Object> key;
54+
protected final BytesRef key;
55+
// TODO: make computing docCount optional
5056
protected long docCount;
5157
protected InternalAggregations aggregations;
5258

53-
public InternalBucket(Map<String, Object> key, long docCount, InternalAggregations aggregations, boolean keyed) {
59+
public InternalBucket(BytesRef key, long docCount, InternalAggregations aggregations, boolean keyed) {
5460
this.key = key;
5561
this.docCount = docCount;
5662
this.aggregations = aggregations;
@@ -62,26 +68,26 @@ public InternalBucket(Map<String, Object> key, long docCount, InternalAggregatio
6268
*/
6369
public InternalBucket(StreamInput in, boolean keyed) throws IOException {
6470
this.keyed = keyed;
65-
key = in.readOrderedMap(StreamInput::readString, StreamInput::readGenericValue);
71+
key = in.readBytesRef();
6672
docCount = in.readVLong();
6773
aggregations = InternalAggregations.readFrom(in);
6874
}
6975

7076
@Override
7177
public void writeTo(StreamOutput out) throws IOException {
72-
out.writeMap(key, StreamOutput::writeString, StreamOutput::writeGenericValue);
78+
out.writeBytesRef(key);
7379
out.writeVLong(docCount);
7480
aggregations.writeTo(out);
7581
}
7682

7783
@Override
7884
public Map<String, Object> getKey() {
79-
return key;
85+
return TimeSeriesIdFieldMapper.decodeTsid(key);
8086
}
8187

8288
@Override
8389
public String getKeyAsString() {
84-
return key.toString();
90+
return getKey().toString();
8591
}
8692

8793
@Override
@@ -96,8 +102,10 @@ public InternalAggregations getAggregations() {
96102

97103
@Override
98104
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
105+
// Use map key in the xcontent response:
106+
var key = getKey();
99107
if (keyed) {
100-
builder.startObject(getKeyAsString());
108+
builder.startObject(key.toString());
101109
} else {
102110
builder.startObject();
103111
}
@@ -186,38 +194,46 @@ protected void doWriteTo(StreamOutput out) throws IOException {
186194

187195
@Override
188196
public InternalAggregation reduce(List<InternalAggregation> aggregations, AggregationReduceContext reduceContext) {
189-
// We still need to reduce in case we got the same time series in 2 different indices, but we should be able to optimize
190-
// that in the future
191-
Map<Map<String, Object>, List<InternalBucket>> bucketsList = null;
192-
for (InternalAggregation aggregation : aggregations) {
193-
InternalTimeSeries timeSeries = (InternalTimeSeries) aggregation;
194-
if (bucketsList != null) {
195-
for (InternalBucket bucket : timeSeries.buckets) {
196-
bucketsList.compute(bucket.key, (map, list) -> {
197-
if (list == null) {
198-
list = new ArrayList<>();
197+
// TODO: optimize single result case either by having a if check here and return aggregations.get(0) or
198+
// by overwriting the mustReduceOnSingleInternalAgg() method
199+
200+
final BigArrays bigArrays = reduceContext.bigArrays();
201+
final int initialCapacity = aggregations.stream()
202+
.map(value -> (InternalTimeSeries) value)
203+
.mapToInt(value -> value.getBuckets().size())
204+
.max()
205+
.getAsInt();
206+
try (LongObjectPagedHashMap<List<InternalBucket>> tsKeyToBuckets = new LongObjectPagedHashMap<>(initialCapacity, bigArrays)) {
207+
final int numTsids;
208+
// We still need to reduce in case we got the same time series in 2 different indices, but we should be able to optimize
209+
// that in the future
210+
try (BytesRefHash tsids = new BytesRefHash(initialCapacity, bigArrays)) {
211+
for (int i = 0; i < aggregations.size(); i++) {
212+
InternalTimeSeries timeSeries = (InternalTimeSeries) aggregations.get(i);
213+
for (int j = 0; j < timeSeries.getBuckets().size(); j++) {
214+
InternalBucket bucket = timeSeries.getBuckets().get(j);
215+
long key = tsids.add(bucket.key);
216+
List<InternalBucket> buckets;
217+
if (key < 0) {
218+
key = -1 - key;
219+
buckets = tsKeyToBuckets.get(key);
220+
} else {
221+
buckets = new ArrayList<>();
199222
}
200-
list.add(bucket);
201-
return list;
202-
});
203-
}
204-
} else {
205-
bucketsList = new HashMap<>(timeSeries.buckets.size());
206-
for (InternalTimeSeries.InternalBucket bucket : timeSeries.buckets) {
207-
List<InternalBucket> bucketList = new ArrayList<>();
208-
bucketList.add(bucket);
209-
bucketsList.put(bucket.key, bucketList);
223+
buckets.add(bucket);
224+
tsKeyToBuckets.put(key, buckets);
225+
}
210226
}
227+
numTsids = (int) tsids.size();
211228
}
212-
}
213229

214-
reduceContext.consumeBucketsAndMaybeBreak(bucketsList.size());
215-
InternalTimeSeries reduced = new InternalTimeSeries(name, new ArrayList<>(bucketsList.size()), keyed, getMetadata());
216-
for (Map.Entry<Map<String, Object>, List<InternalBucket>> bucketEntry : bucketsList.entrySet()) {
217-
reduced.buckets.add(reduceBucket(bucketEntry.getValue(), reduceContext));
230+
reduceContext.consumeBucketsAndMaybeBreak(numTsids);
231+
InternalTimeSeries reduced = new InternalTimeSeries(name, new ArrayList<>(numTsids), keyed, getMetadata());
232+
for (LongObjectPagedHashMap.Cursor<List<InternalBucket>> tsKeyToBucket : tsKeyToBuckets) {
233+
reduced.buckets.add(reduceBucket(tsKeyToBucket.value, reduceContext));
234+
}
235+
return reduced;
218236
}
219-
return reduced;
220-
221237
}
222238

223239
@Override

server/src/main/java/org/elasticsearch/search/aggregations/timeseries/TimeSeriesAggregator.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010

1111
import org.apache.lucene.util.BytesRef;
1212
import org.elasticsearch.core.Releasables;
13-
import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper;
1413
import org.elasticsearch.search.aggregations.AggregationExecutionContext;
1514
import org.elasticsearch.search.aggregations.Aggregator;
1615
import org.elasticsearch.search.aggregations.AggregatorFactories;
@@ -51,14 +50,14 @@ public TimeSeriesAggregator(
5150
public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException {
5251
InternalTimeSeries.InternalBucket[][] allBucketsPerOrd = new InternalTimeSeries.InternalBucket[owningBucketOrds.length][];
5352
for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) {
54-
BytesRef spareKey = new BytesRef();
5553
BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrds[ordIdx]);
5654
List<InternalTimeSeries.InternalBucket> buckets = new ArrayList<>();
5755
while (ordsEnum.next()) {
5856
long docCount = bucketDocCount(ordsEnum.ord());
57+
BytesRef spareKey = new BytesRef();
5958
ordsEnum.readValue(spareKey);
6059
InternalTimeSeries.InternalBucket bucket = new InternalTimeSeries.InternalBucket(
61-
TimeSeriesIdFieldMapper.decodeTsid(spareKey),
60+
spareKey,
6261
docCount,
6362
null,
6463
keyed

server/src/test/java/org/elasticsearch/search/aggregations/timeseries/InternalTimeSeriesTests.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,12 @@
88

99
package org.elasticsearch.search.aggregations.timeseries;
1010

11+
import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper;
1112
import org.elasticsearch.search.aggregations.InternalAggregations;
1213
import org.elasticsearch.test.InternalMultiBucketAggregationTestCase;
1314

15+
import java.io.IOException;
16+
import java.io.UncheckedIOException;
1417
import java.util.ArrayList;
1518
import java.util.HashMap;
1619
import java.util.List;
@@ -28,7 +31,16 @@ private List<InternalTimeSeries.InternalBucket> randomBuckets(boolean keyed, Int
2831
List<Map<String, Object>> keys = randomKeys(bucketKeys(randomIntBetween(1, 4)), numberOfBuckets);
2932
for (int j = 0; j < numberOfBuckets; j++) {
3033
long docCount = randomLongBetween(0, Long.MAX_VALUE / (20L * numberOfBuckets));
31-
bucketList.add(new InternalTimeSeries.InternalBucket(keys.get(j), docCount, aggregations, keyed));
34+
var builder = new TimeSeriesIdFieldMapper.TimeSeriesIdBuilder(null);
35+
for (var entry : keys.get(j).entrySet()) {
36+
builder.addString(entry.getKey(), (String) entry.getValue());
37+
}
38+
try {
39+
var key = builder.build().toBytesRef();
40+
bucketList.add(new InternalTimeSeries.InternalBucket(key, docCount, aggregations, keyed));
41+
} catch (IOException e) {
42+
throw new UncheckedIOException(e);
43+
}
3244
}
3345
return bucketList;
3446
}

0 commit comments

Comments
 (0)