Skip to content

Commit 039d667

Browse files
committed
Ensure doc_stats are changing even if refresh is disabled (#27505)
Today if refresh is disabled the doc stats are not updated anymore. In a bulk index scenario this might cause confusion since even if we refresh internal readers etc. doc stats are never advancing. This change cuts over to the internal reader that is refreshed outside of the external readers refresh interval but always equally `fresh` or `fresher` which will cause less confusion.
1 parent 5e5e346 commit 039d667

File tree

4 files changed

+72
-36
lines changed

4 files changed

+72
-36
lines changed

core/src/main/java/org/elasticsearch/common/lucene/Lucene.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,19 @@
3030
import org.apache.lucene.document.LatLonDocValuesField;
3131
import org.apache.lucene.index.CorruptIndexException;
3232
import org.apache.lucene.index.DirectoryReader;
33+
import org.apache.lucene.index.FilterLeafReader;
3334
import org.apache.lucene.index.IndexCommit;
3435
import org.apache.lucene.index.IndexFileNames;
3536
import org.apache.lucene.index.IndexFormatTooNewException;
3637
import org.apache.lucene.index.IndexFormatTooOldException;
3738
import org.apache.lucene.index.IndexWriter;
3839
import org.apache.lucene.index.IndexWriterConfig;
40+
import org.apache.lucene.index.LeafReader;
3941
import org.apache.lucene.index.LeafReaderContext;
4042
import org.apache.lucene.index.NoMergePolicy;
4143
import org.apache.lucene.index.SegmentCommitInfo;
4244
import org.apache.lucene.index.SegmentInfos;
45+
import org.apache.lucene.index.SegmentReader;
4346
import org.apache.lucene.search.DocIdSetIterator;
4447
import org.apache.lucene.search.Explanation;
4548
import org.apache.lucene.search.FieldDoc;
@@ -650,6 +653,21 @@ public static Version parseVersionLenient(String toParse, Version defaultValue)
650653
return LenientParser.parse(toParse, defaultValue);
651654
}
652655

656+
/**
657+
* Tries to extract a segment reader from the given index reader.
658+
* If no SegmentReader can be extracted an {@link IllegalStateException} is thrown.
659+
*/
660+
public static SegmentReader segmentReader(LeafReader reader) {
661+
if (reader instanceof SegmentReader) {
662+
return (SegmentReader) reader;
663+
} else if (reader instanceof FilterLeafReader) {
664+
final FilterLeafReader fReader = (FilterLeafReader) reader;
665+
return segmentReader(FilterLeafReader.unwrap(fReader));
666+
}
667+
// hard fail - we can't get a SegmentReader
668+
throw new IllegalStateException("Can not extract segment reader from given index reader [" + reader + "]");
669+
}
670+
653671
@SuppressForbidden(reason = "Version#parseLeniently() used in a central place")
654672
private static final class LenientParser {
655673
public static Version parse(String toParse, Version defaultValue) {

core/src/main/java/org/elasticsearch/index/engine/Engine.java

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import org.apache.logging.log4j.message.ParameterizedMessage;
2424
import org.apache.logging.log4j.util.Supplier;
2525
import org.apache.lucene.index.DirectoryReader;
26-
import org.apache.lucene.index.FilterLeafReader;
2726
import org.apache.lucene.index.IndexCommit;
2827
import org.apache.lucene.index.IndexFileNames;
2928
import org.apache.lucene.index.IndexReader;
@@ -143,27 +142,12 @@ protected static long guardedRamBytesUsed(Accountable a) {
143142
return a.ramBytesUsed();
144143
}
145144

146-
/**
147-
* Tries to extract a segment reader from the given index reader.
148-
* If no SegmentReader can be extracted an {@link IllegalStateException} is thrown.
149-
*/
150-
protected static SegmentReader segmentReader(LeafReader reader) {
151-
if (reader instanceof SegmentReader) {
152-
return (SegmentReader) reader;
153-
} else if (reader instanceof FilterLeafReader) {
154-
final FilterLeafReader fReader = (FilterLeafReader) reader;
155-
return segmentReader(FilterLeafReader.unwrap(fReader));
156-
}
157-
// hard fail - we can't get a SegmentReader
158-
throw new IllegalStateException("Can not extract segment reader from given index reader [" + reader + "]");
159-
}
160-
161145
/**
162146
* Returns whether a leaf reader comes from a merge (versus flush or addIndexes).
163147
*/
164148
protected static boolean isMergedSegment(LeafReader reader) {
165149
// We expect leaves to be segment readers
166-
final Map<String, String> diagnostics = segmentReader(reader).getSegmentInfo().info.getDiagnostics();
150+
final Map<String, String> diagnostics = Lucene.segmentReader(reader).getSegmentInfo().info.getDiagnostics();
167151
final String source = diagnostics.get(IndexWriter.SOURCE);
168152
assert Arrays.asList(IndexWriter.SOURCE_ADDINDEXES_READERS, IndexWriter.SOURCE_FLUSH,
169153
IndexWriter.SOURCE_MERGE).contains(source) : "Unknown source " + source;
@@ -611,7 +595,7 @@ public final SegmentsStats segmentsStats(boolean includeSegmentFileSizes) {
611595
try (Searcher searcher = acquireSearcher("segments_stats")) {
612596
SegmentsStats stats = new SegmentsStats();
613597
for (LeafReaderContext reader : searcher.reader().leaves()) {
614-
final SegmentReader segmentReader = segmentReader(reader.reader());
598+
final SegmentReader segmentReader = Lucene.segmentReader(reader.reader());
615599
stats.add(1, segmentReader.ramBytesUsed());
616600
stats.addTermsMemoryInBytes(guardedRamBytesUsed(segmentReader.getPostingsReader()));
617601
stats.addStoredFieldsMemoryInBytes(guardedRamBytesUsed(segmentReader.getFieldsReader()));
@@ -718,7 +702,7 @@ protected Segment[] getSegmentInfo(SegmentInfos lastCommittedSegmentInfos, boole
718702
// first, go over and compute the search ones...
719703
try (Searcher searcher = acquireSearcher("segments")){
720704
for (LeafReaderContext reader : searcher.reader().leaves()) {
721-
final SegmentReader segmentReader = segmentReader(reader.reader());
705+
final SegmentReader segmentReader = Lucene.segmentReader(reader.reader());
722706
SegmentCommitInfo info = segmentReader.getSegmentInfo();
723707
assert !segments.containsKey(info.info.name);
724708
Segment segment = new Segment(info.info.name);

core/src/main/java/org/elasticsearch/index/shard/IndexShard.java

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,14 @@
2121

2222
import com.carrotsearch.hppc.ObjectLongMap;
2323
import org.apache.logging.log4j.Logger;
24+
import org.apache.logging.log4j.message.ParameterizedMessage;
2425
import org.apache.lucene.index.CheckIndex;
2526
import org.apache.lucene.index.IndexCommit;
2627
import org.apache.lucene.index.IndexOptions;
28+
import org.apache.lucene.index.LeafReaderContext;
29+
import org.apache.lucene.index.SegmentCommitInfo;
2730
import org.apache.lucene.index.SegmentInfos;
31+
import org.apache.lucene.index.SegmentReader;
2832
import org.apache.lucene.index.Term;
2933
import org.apache.lucene.search.QueryCachingPolicy;
3034
import org.apache.lucene.search.ReferenceManager;
@@ -58,7 +62,6 @@
5862
import org.elasticsearch.common.lucene.Lucene;
5963
import org.elasticsearch.common.metrics.MeanMetric;
6064
import org.elasticsearch.common.settings.Settings;
61-
import org.elasticsearch.common.unit.ByteSizeValue;
6265
import org.elasticsearch.common.unit.TimeValue;
6366
import org.elasticsearch.common.util.BigArrays;
6467
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
@@ -151,7 +154,6 @@
151154
import java.util.concurrent.TimeUnit;
152155
import java.util.concurrent.TimeoutException;
153156
import java.util.concurrent.atomic.AtomicBoolean;
154-
import java.util.concurrent.atomic.AtomicLong;
155157
import java.util.concurrent.atomic.AtomicReference;
156158
import java.util.function.BiConsumer;
157159
import java.util.function.Consumer;
@@ -856,15 +858,27 @@ public FlushStats flushStats() {
856858
}
857859

858860
public DocsStats docStats() {
861+
// we calculate the doc stats based on the internal reader that is more up-to-date and not subject
862+
// to external refreshes. For instance we don't refresh an external reader if we flush and indices with
863+
// index.refresh_interval=-1 won't see any doc stats updates at all. This change will give more accurate statistics
864+
// when indexing but not refreshing in general. Yet, if a refresh happens the internal reader is refresh as well so we are
865+
// safe here.
859866
long numDocs = 0;
860867
long numDeletedDocs = 0;
861868
long sizeInBytes = 0;
862-
List<Segment> segments = segments(false);
863-
for (Segment segment : segments) {
864-
if (segment.search) {
865-
numDocs += segment.getNumDocs();
866-
numDeletedDocs += segment.getDeletedDocs();
867-
sizeInBytes += segment.getSizeInBytes();
869+
try (Engine.Searcher searcher = acquireSearcher("docStats", Engine.SearcherScope.INTERNAL)) {
870+
for (LeafReaderContext reader : searcher.reader().leaves()) {
871+
// we go on the segment level here to get accurate numbers
872+
final SegmentReader segmentReader = Lucene.segmentReader(reader.reader());
873+
SegmentCommitInfo info = segmentReader.getSegmentInfo();
874+
numDocs += reader.reader().numDocs();
875+
numDeletedDocs += reader.reader().numDeletedDocs();
876+
try {
877+
sizeInBytes += info.sizeInBytes();
878+
} catch (IOException e) {
879+
logger.trace((org.apache.logging.log4j.util.Supplier<?>)
880+
() -> new ParameterizedMessage("failed to get size for [{}]", info.info.name), e);
881+
}
868882
}
869883
}
870884
return new DocsStats(numDocs, numDeletedDocs, sizeInBytes);

core/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2269,11 +2269,17 @@ public void testDocStats() throws IOException {
22692269
final String id = Integer.toString(i);
22702270
indexDoc(indexShard, "test", id);
22712271
}
2272-
2273-
indexShard.refresh("test");
2272+
if (randomBoolean()) {
2273+
indexShard.refresh("test");
2274+
} else {
2275+
indexShard.flush(new FlushRequest());
2276+
}
22742277
{
22752278
final DocsStats docsStats = indexShard.docStats();
22762279
assertThat(docsStats.getCount(), equalTo(numDocs));
2280+
try (Engine.Searcher searcher = indexShard.acquireSearcher("test")) {
2281+
assertTrue(searcher.reader().numDocs() <= docsStats.getCount());
2282+
}
22772283
assertThat(docsStats.getDeleted(), equalTo(0L));
22782284
assertThat(docsStats.getAverageSizeInBytes(), greaterThan(0L));
22792285
}
@@ -2293,9 +2299,14 @@ public void testDocStats() throws IOException {
22932299
flushRequest.waitIfOngoing(false);
22942300
indexShard.flush(flushRequest);
22952301

2296-
indexShard.refresh("test");
2302+
if (randomBoolean()) {
2303+
indexShard.refresh("test");
2304+
}
22972305
{
22982306
final DocsStats docStats = indexShard.docStats();
2307+
try (Engine.Searcher searcher = indexShard.acquireSearcher("test")) {
2308+
assertTrue(searcher.reader().numDocs() <= docStats.getCount());
2309+
}
22992310
assertThat(docStats.getCount(), equalTo(numDocs));
23002311
// Lucene will delete a segment if all docs are deleted from it; this means that we lose the deletes when deleting all docs
23012312
assertThat(docStats.getDeleted(), equalTo(numDocsToDelete == numDocs ? 0 : numDocsToDelete));
@@ -2307,7 +2318,11 @@ public void testDocStats() throws IOException {
23072318
forceMergeRequest.maxNumSegments(1);
23082319
indexShard.forceMerge(forceMergeRequest);
23092320

2310-
indexShard.refresh("test");
2321+
if (randomBoolean()) {
2322+
indexShard.refresh("test");
2323+
} else {
2324+
indexShard.flush(new FlushRequest());
2325+
}
23112326
{
23122327
final DocsStats docStats = indexShard.docStats();
23132328
assertThat(docStats.getCount(), equalTo(numDocs));
@@ -2338,8 +2353,11 @@ public void testEstimateTotalDocSize() throws Exception {
23382353
assertThat("Without flushing, segment sizes should be zero",
23392354
indexShard.docStats().getTotalSizeInBytes(), equalTo(0L));
23402355

2341-
indexShard.flush(new FlushRequest());
2342-
indexShard.refresh("test");
2356+
if (randomBoolean()) {
2357+
indexShard.flush(new FlushRequest());
2358+
} else {
2359+
indexShard.refresh("test");
2360+
}
23432361
{
23442362
final DocsStats docsStats = indexShard.docStats();
23452363
final StoreStats storeStats = indexShard.storeStats();
@@ -2359,9 +2377,11 @@ public void testEstimateTotalDocSize() throws Exception {
23592377
indexDoc(indexShard, "doc", Integer.toString(i), "{\"foo\": \"bar\"}");
23602378
}
23612379
}
2362-
2363-
indexShard.flush(new FlushRequest());
2364-
indexShard.refresh("test");
2380+
if (randomBoolean()) {
2381+
indexShard.flush(new FlushRequest());
2382+
} else {
2383+
indexShard.refresh("test");
2384+
}
23652385
{
23662386
final DocsStats docsStats = indexShard.docStats();
23672387
final StoreStats storeStats = indexShard.storeStats();

0 commit comments

Comments
 (0)