Skip to content

Commit a3a98c7

Browse files
authored
Cache completion stats between refreshes (#51991)
Computing the stats for completion fields may involve a significant amount of work since it walks every field of every segment looking for completion fields. Innocuous-looking APIs like `GET _stats` or `GET _cluster/stats` do this for every shard in the cluster. This repeated work is unnecessary since these stats do not change between refreshes; in many indices they remain constant for a long time. This commit introduces a cache for these stats which is invalidated on a refresh, allowing most stats calls to bypass the work needed to compute them on most shards. Closes #51915
1 parent 416dc46 commit a3a98c7

File tree

7 files changed

+458
-37
lines changed

7 files changed

+458
-37
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
---
2+
setup:
3+
4+
- do:
5+
indices.create:
6+
index: test1
7+
wait_for_active_shards: all
8+
body:
9+
settings:
10+
# Limit the number of shards so that shards are unlikely
11+
# to be relocated or being initialized between the test
12+
# set up and the test execution
13+
index.number_of_shards: 3
14+
index.number_of_replicas: 0
15+
mappings:
16+
properties:
17+
bar:
18+
type: text
19+
fielddata: true
20+
fields:
21+
completion:
22+
type: completion
23+
24+
- do:
25+
cluster.health:
26+
wait_for_no_relocating_shards: true
27+
wait_for_events: languid
28+
29+
- do:
30+
index:
31+
index: test1
32+
id: 1
33+
body: { "bar": "bar" }
34+
35+
- do:
36+
index:
37+
index: test1
38+
id: 2
39+
body: { "bar": "foo" }
40+
41+
- do:
42+
indices.refresh: {}
43+
44+
---
45+
"Completion stats":
46+
- do:
47+
indices.stats: { completion_fields: "*" }
48+
49+
- match: { _shards.failed: 0}
50+
- gt: { _all.total.completion.fields.bar\.completion.size_in_bytes: 0 }
51+
- gt: { _all.total.completion.size_in_bytes: 0 }
52+
- set: { _all.total.completion.size_in_bytes: original_size }
53+
54+
- do:
55+
index:
56+
index: test1
57+
id: 3
58+
body: { "bar": "foo", "baz": "foo" }
59+
60+
- do:
61+
indices.refresh: {}
62+
63+
- do:
64+
indices.stats: { completion_fields: "*" }
65+
66+
- match: { _shards.failed: 0}
67+
- gt: { _all.total.completion.size_in_bytes: $original_size }
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.index.engine;
20+
21+
import com.carrotsearch.hppc.ObjectLongHashMap;
22+
import com.carrotsearch.hppc.cursors.ObjectLongCursor;
23+
import org.apache.lucene.index.FieldInfo;
24+
import org.apache.lucene.index.LeafReader;
25+
import org.apache.lucene.index.LeafReaderContext;
26+
import org.apache.lucene.index.Terms;
27+
import org.apache.lucene.search.ReferenceManager;
28+
import org.apache.lucene.search.suggest.document.CompletionTerms;
29+
import org.elasticsearch.action.ActionListener;
30+
import org.elasticsearch.action.support.PlainActionFuture;
31+
import org.elasticsearch.common.FieldMemoryStats;
32+
import org.elasticsearch.common.regex.Regex;
33+
import org.elasticsearch.search.suggest.completion.CompletionStats;
34+
35+
import java.util.concurrent.atomic.AtomicReference;
36+
import java.util.function.Supplier;
37+
38+
class CompletionStatsCache implements ReferenceManager.RefreshListener {
39+
40+
private final Supplier<Engine.Searcher> searcherSupplier;
41+
42+
/**
43+
* Contains a future (i.e. non-null) if another thread is already computing stats, in which case wait for this computation to
44+
* complete. Contains null otherwise, in which case compute the stats ourselves and save them here for other threads to use.
45+
* Futures are eventually completed with stats that include all fields, requiring further filtering (see
46+
* {@link CompletionStatsCache#filterCompletionStatsByFieldName}).
47+
*/
48+
private final AtomicReference<PlainActionFuture<CompletionStats>> completionStatsFutureRef = new AtomicReference<>();
49+
50+
CompletionStatsCache(Supplier<Engine.Searcher> searcherSupplier) {
51+
this.searcherSupplier = searcherSupplier;
52+
}
53+
54+
CompletionStats get(String... fieldNamePatterns) {
55+
final PlainActionFuture<CompletionStats> newFuture = new PlainActionFuture<>();
56+
final PlainActionFuture<CompletionStats> oldFuture = completionStatsFutureRef.compareAndExchange(null, newFuture);
57+
58+
if (oldFuture != null) {
59+
// we lost the race, someone else is already computing stats, so we wait for that to finish
60+
return filterCompletionStatsByFieldName(fieldNamePatterns, oldFuture.actionGet());
61+
}
62+
63+
// we won the race, nobody else is already computing stats, so it's up to us
64+
ActionListener.completeWith(newFuture, () -> {
65+
long sizeInBytes = 0;
66+
final ObjectLongHashMap<String> completionFields = new ObjectLongHashMap<>();
67+
68+
try (Engine.Searcher currentSearcher = searcherSupplier.get()) {
69+
for (LeafReaderContext atomicReaderContext : currentSearcher.getIndexReader().leaves()) {
70+
LeafReader atomicReader = atomicReaderContext.reader();
71+
for (FieldInfo info : atomicReader.getFieldInfos()) {
72+
Terms terms = atomicReader.terms(info.name);
73+
if (terms instanceof CompletionTerms) {
74+
// TODO: currently we load up the suggester for reporting its size
75+
final long fstSize = ((CompletionTerms) terms).suggester().ramBytesUsed();
76+
completionFields.addTo(info.name, fstSize);
77+
sizeInBytes += fstSize;
78+
}
79+
}
80+
}
81+
}
82+
83+
return new CompletionStats(sizeInBytes, new FieldMemoryStats(completionFields));
84+
});
85+
86+
boolean success = false;
87+
final CompletionStats completionStats;
88+
try {
89+
completionStats = newFuture.actionGet();
90+
success = true;
91+
} finally {
92+
if (success == false) {
93+
// invalidate the cache (if not already invalidated) so that future calls will retry
94+
completionStatsFutureRef.compareAndSet(newFuture, null);
95+
}
96+
}
97+
98+
return filterCompletionStatsByFieldName(fieldNamePatterns, completionStats);
99+
}
100+
101+
private static CompletionStats filterCompletionStatsByFieldName(String[] fieldNamePatterns, CompletionStats fullCompletionStats) {
102+
final FieldMemoryStats fieldMemoryStats;
103+
if (fieldNamePatterns != null && fieldNamePatterns.length > 0) {
104+
final ObjectLongHashMap<String> completionFields = new ObjectLongHashMap<>(fieldNamePatterns.length);
105+
for (ObjectLongCursor<String> fieldCursor : fullCompletionStats.getFields()) {
106+
if (Regex.simpleMatch(fieldNamePatterns, fieldCursor.key)) {
107+
completionFields.addTo(fieldCursor.key, fieldCursor.value);
108+
}
109+
}
110+
fieldMemoryStats = new FieldMemoryStats(completionFields);
111+
} else {
112+
fieldMemoryStats = null;
113+
}
114+
return new CompletionStats(fullCompletionStats.getSizeInBytes(), fieldMemoryStats);
115+
}
116+
117+
@Override
118+
public void beforeRefresh() {
119+
}
120+
121+
@Override
122+
public void afterRefresh(boolean didRefresh) {
123+
if (didRefresh) {
124+
completionStatsFutureRef.set(null);
125+
}
126+
}
127+
}

server/src/main/java/org/elasticsearch/index/engine/Engine.java

+1-31
Original file line numberDiff line numberDiff line change
@@ -19,27 +19,22 @@
1919

2020
package org.elasticsearch.index.engine;
2121

22-
import com.carrotsearch.hppc.ObjectLongHashMap;
2322
import org.apache.logging.log4j.Logger;
2423
import org.apache.logging.log4j.message.ParameterizedMessage;
2524
import org.apache.lucene.index.DirectoryReader;
26-
import org.apache.lucene.index.FieldInfo;
2725
import org.apache.lucene.index.IndexCommit;
2826
import org.apache.lucene.index.IndexFileNames;
2927
import org.apache.lucene.index.IndexReader;
30-
import org.apache.lucene.index.LeafReader;
3128
import org.apache.lucene.index.LeafReaderContext;
3229
import org.apache.lucene.index.SegmentCommitInfo;
3330
import org.apache.lucene.index.SegmentInfos;
3431
import org.apache.lucene.index.SegmentReader;
3532
import org.apache.lucene.index.Term;
36-
import org.apache.lucene.index.Terms;
3733
import org.apache.lucene.search.IndexSearcher;
3834
import org.apache.lucene.search.QueryCache;
3935
import org.apache.lucene.search.QueryCachingPolicy;
4036
import org.apache.lucene.search.ReferenceManager;
4137
import org.apache.lucene.search.similarities.Similarity;
42-
import org.apache.lucene.search.suggest.document.CompletionTerms;
4338
import org.apache.lucene.store.AlreadyClosedException;
4439
import org.apache.lucene.store.Directory;
4540
import org.apache.lucene.store.IOContext;
@@ -49,7 +44,6 @@
4944
import org.elasticsearch.ExceptionsHelper;
5045
import org.elasticsearch.action.index.IndexRequest;
5146
import org.elasticsearch.common.CheckedRunnable;
52-
import org.elasticsearch.common.FieldMemoryStats;
5347
import org.elasticsearch.common.Nullable;
5448
import org.elasticsearch.common.bytes.BytesReference;
5549
import org.elasticsearch.common.collect.ImmutableOpenMap;
@@ -62,7 +56,6 @@
6256
import org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver;
6357
import org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndVersion;
6458
import org.elasticsearch.common.metrics.CounterMetric;
65-
import org.elasticsearch.common.regex.Regex;
6659
import org.elasticsearch.common.unit.TimeValue;
6760
import org.elasticsearch.common.util.concurrent.ReleasableLock;
6861
import org.elasticsearch.index.VersionType;
@@ -180,30 +173,7 @@ public MergeStats getMergeStats() {
180173
/**
181174
* Returns the {@link CompletionStats} for this engine
182175
*/
183-
public CompletionStats completionStats(String... fieldNamePatterns) throws IOException {
184-
try (Searcher currentSearcher = acquireSearcher("completion_stats", SearcherScope.INTERNAL)) {
185-
long sizeInBytes = 0;
186-
ObjectLongHashMap<String> completionFields = null;
187-
if (fieldNamePatterns != null && fieldNamePatterns.length > 0) {
188-
completionFields = new ObjectLongHashMap<>(fieldNamePatterns.length);
189-
}
190-
for (LeafReaderContext atomicReaderContext : currentSearcher.getIndexReader().leaves()) {
191-
LeafReader atomicReader = atomicReaderContext.reader();
192-
for (FieldInfo info : atomicReader.getFieldInfos()) {
193-
Terms terms = atomicReader.terms(info.name);
194-
if (terms instanceof CompletionTerms) {
195-
// TODO: currently we load up the suggester for reporting its size
196-
long fstSize = ((CompletionTerms) terms).suggester().ramBytesUsed();
197-
if (Regex.simpleMatch(fieldNamePatterns, info.name)) {
198-
completionFields.addTo(info.name, fstSize);
199-
}
200-
sizeInBytes += fstSize;
201-
}
202-
}
203-
}
204-
return new CompletionStats(sizeInBytes, completionFields == null ? null : new FieldMemoryStats(completionFields));
205-
}
206-
}
176+
public abstract CompletionStats completionStats(String... fieldNamePatterns);
207177

208178
/**
209179
* Returns the {@link DocsStats} for this engine

server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java

+10
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@
9797
import org.elasticsearch.index.translog.TranslogCorruptedException;
9898
import org.elasticsearch.index.translog.TranslogDeletionPolicy;
9999
import org.elasticsearch.index.translog.TranslogStats;
100+
import org.elasticsearch.search.suggest.completion.CompletionStats;
100101
import org.elasticsearch.threadpool.ThreadPool;
101102

102103
import java.io.Closeable;
@@ -174,6 +175,8 @@ public class InternalEngine extends Engine {
174175
private final SoftDeletesPolicy softDeletesPolicy;
175176
private final LastRefreshedCheckpointListener lastRefreshedCheckpointListener;
176177

178+
private final CompletionStatsCache completionStatsCache;
179+
177180
private final AtomicBoolean trackTranslogLocation = new AtomicBoolean(false);
178181
private final KeyedLock<Long> noOpKeyedLock = new KeyedLock<>();
179182
private final AtomicBoolean shouldPeriodicallyFlushAfterBigMerge = new AtomicBoolean(false);
@@ -258,6 +261,8 @@ public InternalEngine(EngineConfig engineConfig) {
258261
"failed to restore version map and local checkpoint tracker", e);
259262
}
260263
}
264+
completionStatsCache = new CompletionStatsCache(() -> acquireSearcher("completion_stats"));
265+
this.externalReaderManager.addListener(completionStatsCache);
261266
success = true;
262267
} finally {
263268
if (success == false) {
@@ -298,6 +303,11 @@ private SoftDeletesPolicy newSoftDeletesPolicy() throws IOException {
298303
engineConfig.retentionLeasesSupplier());
299304
}
300305

306+
@Override
307+
public CompletionStats completionStats(String... fieldNamePatterns) {
308+
return completionStatsCache.get(fieldNamePatterns);
309+
}
310+
301311
/**
302312
* This reference manager delegates all it's refresh calls to another (internal) ReaderManager
303313
* The main purpose for this is that if we have external refreshes happening we don't issue extra

server/src/main/java/org/elasticsearch/index/engine/ReadOnlyEngine.java

+11
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import org.elasticsearch.index.translog.TranslogConfig;
4343
import org.elasticsearch.index.translog.TranslogDeletionPolicy;
4444
import org.elasticsearch.index.translog.TranslogStats;
45+
import org.elasticsearch.search.suggest.completion.CompletionStats;
4546

4647
import java.io.Closeable;
4748
import java.io.IOException;
@@ -78,6 +79,7 @@ public class ReadOnlyEngine extends Engine {
7879
private final DocsStats docsStats;
7980
private final RamAccountingRefreshListener refreshListener;
8081
private final SafeCommitInfo safeCommitInfo;
82+
private final CompletionStatsCache completionStatsCache;
8183

8284
protected volatile TranslogStats translogStats;
8385

@@ -122,6 +124,10 @@ public ReadOnlyEngine(EngineConfig config, SeqNoStats seqNoStats, TranslogStats
122124
this.translogStats = translogStats != null ? translogStats : translogStats(config, lastCommittedSegmentInfos);
123125
this.indexWriterLock = indexWriterLock;
124126
this.safeCommitInfo = new SafeCommitInfo(seqNoStats.getLocalCheckpoint(), lastCommittedSegmentInfos.totalMaxDoc());
127+
128+
completionStatsCache = new CompletionStatsCache(() -> acquireSearcher("completion_stats"));
129+
// no need to register a refresh listener to invalidate completionStatsCache since this engine is readonly
130+
125131
success = true;
126132
} finally {
127133
if (success == false) {
@@ -513,4 +519,9 @@ protected static DirectoryReader openDirectory(Directory directory, boolean wrap
513519
return reader;
514520
}
515521
}
522+
523+
@Override
524+
public CompletionStats completionStats(String... fieldNamePatterns) {
525+
return completionStatsCache.get(fieldNamePatterns);
526+
}
516527
}

server/src/main/java/org/elasticsearch/index/shard/IndexShard.java

+1-6
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,6 @@
151151
import java.io.Closeable;
152152
import java.io.IOException;
153153
import java.io.PrintStream;
154-
import java.io.UncheckedIOException;
155154
import java.nio.channels.ClosedByInterruptException;
156155
import java.nio.charset.StandardCharsets;
157156
import java.util.ArrayList;
@@ -1026,11 +1025,7 @@ public TranslogStats translogStats() {
10261025

10271026
public CompletionStats completionStats(String... fields) {
10281027
readAllowed();
1029-
try {
1030-
return getEngine().completionStats(fields);
1031-
} catch (IOException e) {
1032-
throw new UncheckedIOException(e);
1033-
}
1028+
return getEngine().completionStats(fields);
10341029
}
10351030

10361031
/**

0 commit comments

Comments
 (0)