Skip to content

Ensure doc_stats are changing even if refresh is disabled #27505

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 25, 2017
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ protected static long guardedRamBytesUsed(Accountable a) {
* Tries to extract a segment reader from the given index reader.
* If no SegmentReader can be extracted an {@link IllegalStateException} is thrown.
*/
protected static SegmentReader segmentReader(LeafReader reader) {
public static SegmentReader segmentReader(LeafReader reader) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: If this utility method get more use-cases, I'd move it to a separate class, like maybe the "Lucene" class in that case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done, good point

if (reader instanceof SegmentReader) {
return (SegmentReader) reader;
} else if (reader instanceof FilterLeafReader) {
Expand Down
28 changes: 22 additions & 6 deletions core/src/main/java/org/elasticsearch/index/shard/IndexShard.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,14 @@

import com.carrotsearch.hppc.ObjectLongMap;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.QueryCachingPolicy;
import org.apache.lucene.search.ReferenceManager;
Expand Down Expand Up @@ -856,15 +860,27 @@ public FlushStats flushStats() {
}

public DocsStats docStats() {
// we calculate the doc stats based on the internal reader that is more up-to-date and not subject
// to external refreshes. For instance we don't refresh an external reader if we flush and indices with
// index.refresh_interval=-1 won't see any doc stats updates at all. This change will give more accurate statistics
// when indexing but not refreshing in general. Yet, if a refresh happens the internal reader is refresh as well so we are
// safe here.
long numDocs = 0;
long numDeletedDocs = 0;
long sizeInBytes = 0;
List<Segment> segments = segments(false);
for (Segment segment : segments) {
if (segment.search) {
numDocs += segment.getNumDocs();
numDeletedDocs += segment.getDeletedDocs();
sizeInBytes += segment.getSizeInBytes();
try (Engine.Searcher searcher = acquireSearcher("docStats", Engine.SearcherScope.INTERNAL)) {
for (LeafReaderContext reader : searcher.reader().leaves()) {
// we go on the segment level here to get accurate numbers
final SegmentReader segmentReader = Engine.segmentReader(reader.reader());
SegmentCommitInfo info = segmentReader.getSegmentInfo();
numDocs += reader.reader().numDocs();
numDeletedDocs += reader.reader().numDeletedDocs();
try {
sizeInBytes += info.sizeInBytes();
} catch (IOException e) {
logger.trace((org.apache.logging.log4j.util.Supplier<?>)
() -> new ParameterizedMessage("failed to get size for [{}]", info.info.name), e);
}
}
}
return new DocsStats(numDocs, numDeletedDocs, sizeInBytes);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2269,11 +2269,17 @@ public void testDocStats() throws IOException {
final String id = Integer.toString(i);
indexDoc(indexShard, "test", id);
}

indexShard.refresh("test");
if (randomBoolean()) {
indexShard.refresh("test");
} else {
indexShard.flush(new FlushRequest());
}
{
final DocsStats docsStats = indexShard.docStats();
assertThat(docsStats.getCount(), equalTo(numDocs));
try (Engine.Searcher searcher = indexShard.acquireSearcher("test")) {
assertTrue(searcher.reader().numDocs() <= docsStats.getCount());
}
assertThat(docsStats.getDeleted(), equalTo(0L));
assertThat(docsStats.getAverageSizeInBytes(), greaterThan(0L));
}
Expand All @@ -2293,9 +2299,14 @@ public void testDocStats() throws IOException {
flushRequest.waitIfOngoing(false);
indexShard.flush(flushRequest);

indexShard.refresh("test");
if (randomBoolean()) {
indexShard.refresh("test");
}
{
final DocsStats docStats = indexShard.docStats();
try (Engine.Searcher searcher = indexShard.acquireSearcher("test")) {
assertTrue(searcher.reader().numDocs() <= docStats.getCount());
}
assertThat(docStats.getCount(), equalTo(numDocs));
// Lucene will delete a segment if all docs are deleted from it; this means that we lose the deletes when deleting all docs
assertThat(docStats.getDeleted(), equalTo(numDocsToDelete == numDocs ? 0 : numDocsToDelete));
Expand All @@ -2307,7 +2318,11 @@ public void testDocStats() throws IOException {
forceMergeRequest.maxNumSegments(1);
indexShard.forceMerge(forceMergeRequest);

indexShard.refresh("test");
if (randomBoolean()) {
indexShard.refresh("test");
} else {
indexShard.flush(new FlushRequest());
}
{
final DocsStats docStats = indexShard.docStats();
assertThat(docStats.getCount(), equalTo(numDocs));
Expand Down Expand Up @@ -2338,8 +2353,11 @@ public void testEstimateTotalDocSize() throws Exception {
assertThat("Without flushing, segment sizes should be zero",
indexShard.docStats().getTotalSizeInBytes(), equalTo(0L));

indexShard.flush(new FlushRequest());
indexShard.refresh("test");
if (randomBoolean()) {
indexShard.flush(new FlushRequest());
} else {
indexShard.refresh("test");
}
{
final DocsStats docsStats = indexShard.docStats();
final StoreStats storeStats = indexShard.storeStats();
Expand All @@ -2359,9 +2377,11 @@ public void testEstimateTotalDocSize() throws Exception {
indexDoc(indexShard, "doc", Integer.toString(i), "{\"foo\": \"bar\"}");
}
}

indexShard.flush(new FlushRequest());
indexShard.refresh("test");
if (randomBoolean()) {
indexShard.flush(new FlushRequest());
} else {
indexShard.refresh("test");
}
{
final DocsStats docsStats = indexShard.docStats();
final StoreStats storeStats = indexShard.storeStats();
Expand Down