Skip to content

Commit 0c77f45

Browse files
authored
Move DocsStats into Engine (#33835)
By moving DocStats into the engine we can easily cache the stats for read-only engines if necessary. It also moves the responsibility out of IndexShard which has quiet some complexity already.
1 parent 6f3b333 commit 0c77f45

File tree

4 files changed

+60
-31
lines changed

4 files changed

+60
-31
lines changed

server/src/main/java/org/elasticsearch/index/engine/Engine.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
import org.elasticsearch.index.merge.MergeStats;
6767
import org.elasticsearch.index.seqno.SeqNoStats;
6868
import org.elasticsearch.index.seqno.SequenceNumbers;
69+
import org.elasticsearch.index.shard.DocsStats;
6970
import org.elasticsearch.index.shard.ShardId;
7071
import org.elasticsearch.index.store.Store;
7172
import org.elasticsearch.index.translog.Translog;
@@ -175,6 +176,41 @@ public MergeStats getMergeStats() {
175176
/** Returns how many bytes we are currently moving from heap to disk */
176177
public abstract long getWritingBytes();
177178

179+
/**
180+
* Returns the {@link DocsStats} for this engine
181+
*/
182+
public DocsStats docStats() {
183+
// we calculate the doc stats based on the internal reader that is more up-to-date and not subject
184+
// to external refreshes. For instance we don't refresh an external reader if we flush and indices with
185+
// index.refresh_interval=-1 won't see any doc stats updates at all. This change will give more accurate statistics
186+
// when indexing but not refreshing in general. Yet, if a refresh happens the internal reader is refresh as well so we are
187+
// safe here.
188+
try (Engine.Searcher searcher = acquireSearcher("docStats", Engine.SearcherScope.INTERNAL)) {
189+
return docsStats(searcher.reader());
190+
}
191+
}
192+
193+
protected final DocsStats docsStats(IndexReader indexReader) {
194+
long numDocs = 0;
195+
long numDeletedDocs = 0;
196+
long sizeInBytes = 0;
197+
// we don't wait for a pending refreshes here since it's a stats call instead we mark it as accessed only which will cause
198+
// the next scheduled refresh to go through and refresh the stats as well
199+
for (LeafReaderContext readerContext : indexReader.leaves()) {
200+
// we go on the segment level here to get accurate numbers
201+
final SegmentReader segmentReader = Lucene.segmentReader(readerContext.reader());
202+
SegmentCommitInfo info = segmentReader.getSegmentInfo();
203+
numDocs += readerContext.reader().numDocs();
204+
numDeletedDocs += readerContext.reader().numDeletedDocs();
205+
try {
206+
sizeInBytes += info.sizeInBytes();
207+
} catch (IOException e) {
208+
logger.trace(() -> new ParameterizedMessage("failed to get size for [{}]", info.info.name), e);
209+
}
210+
}
211+
return new DocsStats(numDocs, numDeletedDocs, sizeInBytes);
212+
}
213+
178214
/**
179215
* A throttling class that can be activated, causing the
180216
* {@code acquireThrottle} method to block on a lock when throttling

server/src/main/java/org/elasticsearch/index/engine/ReadOnlyEngine.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.elasticsearch.index.mapper.MapperService;
3535
import org.elasticsearch.index.seqno.SeqNoStats;
3636
import org.elasticsearch.index.seqno.SequenceNumbers;
37+
import org.elasticsearch.index.shard.DocsStats;
3738
import org.elasticsearch.index.store.Store;
3839
import org.elasticsearch.index.translog.Translog;
3940
import org.elasticsearch.index.translog.TranslogStats;
@@ -63,6 +64,7 @@ public final class ReadOnlyEngine extends Engine {
6364
private final SearcherManager searcherManager;
6465
private final IndexCommit indexCommit;
6566
private final Lock indexWriterLock;
67+
private final DocsStats docsStats;
6668

6769
/**
6870
* Creates a new ReadOnlyEngine. This ctor can also be used to open a read-only engine on top of an already opened
@@ -101,6 +103,7 @@ public ReadOnlyEngine(EngineConfig config, SeqNoStats seqNoStats, TranslogStats
101103
this.indexCommit = reader.getIndexCommit();
102104
this.searcherManager = new SearcherManager(reader,
103105
new RamAccountingSearcherFactory(engineConfig.getCircuitBreakerService()));
106+
this.docsStats = docsStats(reader);
104107
this.indexWriterLock = indexWriterLock;
105108
success = true;
106109
} finally {
@@ -365,4 +368,9 @@ public void trimOperationsFromTranslog(long belowTerm, long aboveSeqNo) {
365368
@Override
366369
public void maybePruneDeletes() {
367370
}
371+
372+
@Override
373+
public DocsStats docStats() {
374+
return docsStats;
375+
}
368376
}

server/src/main/java/org/elasticsearch/index/shard/IndexShard.java

Lines changed: 3 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,9 @@
2121

2222
import com.carrotsearch.hppc.ObjectLongMap;
2323
import org.apache.logging.log4j.Logger;
24-
import org.apache.logging.log4j.message.ParameterizedMessage;
2524
import org.apache.lucene.index.CheckIndex;
2625
import org.apache.lucene.index.IndexCommit;
27-
import org.apache.lucene.index.LeafReaderContext;
28-
import org.apache.lucene.index.SegmentCommitInfo;
2926
import org.apache.lucene.index.SegmentInfos;
30-
import org.apache.lucene.index.SegmentReader;
3127
import org.apache.lucene.index.Term;
3228
import org.apache.lucene.search.Query;
3329
import org.apache.lucene.search.QueryCachingPolicy;
@@ -879,32 +875,9 @@ public FlushStats flushStats() {
879875
}
880876

881877
public DocsStats docStats() {
882-
// we calculate the doc stats based on the internal reader that is more up-to-date and not subject
883-
// to external refreshes. For instance we don't refresh an external reader if we flush and indices with
884-
// index.refresh_interval=-1 won't see any doc stats updates at all. This change will give more accurate statistics
885-
// when indexing but not refreshing in general. Yet, if a refresh happens the internal reader is refresh as well so we are
886-
// safe here.
887-
long numDocs = 0;
888-
long numDeletedDocs = 0;
889-
long sizeInBytes = 0;
890-
try (Engine.Searcher searcher = acquireSearcher("docStats", Engine.SearcherScope.INTERNAL)) {
891-
// we don't wait for a pending refreshes here since it's a stats call instead we mark it as accessed only which will cause
892-
// the next scheduled refresh to go through and refresh the stats as well
893-
markSearcherAccessed();
894-
for (LeafReaderContext reader : searcher.reader().leaves()) {
895-
// we go on the segment level here to get accurate numbers
896-
final SegmentReader segmentReader = Lucene.segmentReader(reader.reader());
897-
SegmentCommitInfo info = segmentReader.getSegmentInfo();
898-
numDocs += reader.reader().numDocs();
899-
numDeletedDocs += reader.reader().numDeletedDocs();
900-
try {
901-
sizeInBytes += info.sizeInBytes();
902-
} catch (IOException e) {
903-
logger.trace(() -> new ParameterizedMessage("failed to get size for [{}]", info.info.name), e);
904-
}
905-
}
906-
}
907-
return new DocsStats(numDocs, numDeletedDocs, sizeInBytes);
878+
DocsStats docsStats = getEngine().docStats();
879+
markSearcherAccessed();
880+
return docsStats;
908881
}
909882

910883
/**

server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2438,7 +2438,7 @@ public void testRecoverFromLocalShard() throws IOException {
24382438
closeShards(sourceShard, targetShard);
24392439
}
24402440

2441-
public void testDocStats() throws IOException, InterruptedException {
2441+
public void testDocStats() throws Exception {
24422442
IndexShard indexShard = null;
24432443
try {
24442444
indexShard = newStartedShard(
@@ -2455,7 +2455,14 @@ public void testDocStats() throws IOException, InterruptedException {
24552455
indexShard.flush(new FlushRequest());
24562456
}
24572457
{
2458+
IndexShard shard = indexShard;
2459+
assertBusy(() -> {
2460+
ThreadPool threadPool = shard.getThreadPool();
2461+
assertThat(threadPool.relativeTimeInMillis(), greaterThan(shard.getLastSearcherAccess()));
2462+
});
2463+
long prevAccessTime = shard.getLastSearcherAccess();
24582464
final DocsStats docsStats = indexShard.docStats();
2465+
assertThat("searcher was not marked as accessed", shard.getLastSearcherAccess(), greaterThan(prevAccessTime));
24592466
assertThat(docsStats.getCount(), equalTo(numDocs));
24602467
try (Engine.Searcher searcher = indexShard.acquireSearcher("test")) {
24612468
assertTrue(searcher.reader().numDocs() <= docsStats.getCount());
@@ -3412,4 +3419,9 @@ public void testResetEngine() throws Exception {
34123419
assertThat(shard.translogStats().estimatedNumberOfOperations(), equalTo(translogStats.estimatedNumberOfOperations()));
34133420
closeShard(shard, false);
34143421
}
3422+
3423+
@Override
3424+
public Settings threadPoolSettings() {
3425+
return Settings.builder().put(super.threadPoolSettings()).put("thread_pool.estimated_time_interval", "5ms").build();
3426+
}
34153427
}

0 commit comments

Comments
 (0)