Skip to content

Commit 662f062

Browse files
committed
tracks total bytes, not average bytes
1 parent f38e957 commit 662f062

File tree

4 files changed

+71
-69
lines changed

4 files changed

+71
-69
lines changed

core/src/main/java/org/elasticsearch/index/shard/DocsStats.java

+20-12
Original file line numberDiff line numberDiff line change
@@ -25,35 +25,31 @@
2525
import org.elasticsearch.common.io.stream.Streamable;
2626
import org.elasticsearch.common.xcontent.ToXContentFragment;
2727
import org.elasticsearch.common.xcontent.XContentBuilder;
28+
import org.elasticsearch.index.store.StoreStats;
2829

2930
import java.io.IOException;
3031

3132
public class DocsStats implements Streamable, ToXContentFragment {
3233

3334
long count = 0;
3435
long deleted = 0;
35-
long averageSizeInBytes = 0;
36+
long totalSizeInBytes = 0;
3637

3738
public DocsStats() {
3839

3940
}
4041

41-
public DocsStats(long count, long deleted, long averageSizeInBytes) {
42+
public DocsStats(long count, long deleted, long totalSizeInBytes) {
4243
this.count = count;
4344
this.deleted = deleted;
44-
this.averageSizeInBytes = averageSizeInBytes;
45+
this.totalSizeInBytes = totalSizeInBytes;
4546
}
4647

4748
public void add(DocsStats that) {
4849
if (that == null) {
4950
return;
5051
}
51-
long totalBytes = this.averageSizeInBytes * (this.count + this.deleted)
52-
+ that.averageSizeInBytes * (that.count + that.deleted);
53-
long totalDocs = this.count + this.deleted + that.count + that.deleted;
54-
if (totalDocs > 0) {
55-
this.averageSizeInBytes = totalBytes / totalDocs;
56-
}
52+
this.totalSizeInBytes += that.totalSizeInBytes;
5753
this.count += that.count;
5854
this.deleted += that.deleted;
5955
}
@@ -66,16 +62,28 @@ public long getDeleted() {
6662
return this.deleted;
6763
}
6864

65+
/**
66+
* Returns the total size in bytes of all documents in this stats.
67+
* This value may be more reliable than {@link StoreStats#getSizeInBytes()} in estimating the index size.
68+
*/
69+
public long getTotalSizeInBytes() {
70+
return totalSizeInBytes;
71+
}
72+
73+
/**
74+
* Returns the average size in bytes of all documents in this stats.
75+
*/
6976
public long getAverageSizeInBytes() {
70-
return averageSizeInBytes;
77+
long totalDocs = count + deleted;
78+
return totalDocs == 0 ? 0 : totalSizeInBytes / totalDocs;
7179
}
7280

7381
@Override
7482
public void readFrom(StreamInput in) throws IOException {
7583
count = in.readVLong();
7684
deleted = in.readVLong();
7785
if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
78-
averageSizeInBytes = in.readVLong();
86+
totalSizeInBytes = in.readVLong();
7987
}
8088
}
8189

@@ -84,7 +92,7 @@ public void writeTo(StreamOutput out) throws IOException {
8492
out.writeVLong(count);
8593
out.writeVLong(deleted);
8694
if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
87-
out.writeVLong(averageSizeInBytes);
95+
out.writeVLong(totalSizeInBytes);
8896
}
8997
}
9098

core/src/main/java/org/elasticsearch/index/shard/IndexShard.java

+11-9
Original file line numberDiff line numberDiff line change
@@ -880,16 +880,18 @@ public FlushStats flushStats() {
880880
}
881881

882882
public DocsStats docStats() {
883-
long totalDocsInSegments = 0L;
884-
long totalBytesInSegments = 0L;
885-
for (Segment segment : segments(false)) {
886-
totalDocsInSegments += segment.getNumDocs();
887-
totalBytesInSegments += segment.getSizeInBytes();
888-
}
889-
final long avgDocSize = totalDocsInSegments > 0 ? totalBytesInSegments / totalDocsInSegments : 0;
890-
try (Engine.Searcher searcher = acquireSearcher("doc_stats")) {
891-
return new DocsStats(searcher.reader().numDocs(), searcher.reader().numDeletedDocs(), avgDocSize);
883+
long numDocs = 0;
884+
long numDeletedDocs = 0;
885+
long sizeInBytes = 0;
886+
List<Segment> segments = segments(false);
887+
for (Segment segment : segments) {
888+
if (segment.search) {
889+
numDocs += segment.getNumDocs();
890+
numDeletedDocs += segment.getDeletedDocs();
891+
sizeInBytes += segment.getSizeInBytes();
892+
}
892893
}
894+
return new DocsStats(numDocs, numDeletedDocs, sizeInBytes);
893895
}
894896

895897
/**

core/src/test/java/org/elasticsearch/index/shard/DocsStatsTests.java

+5-17
Original file line numberDiff line numberDiff line change
@@ -29,29 +29,17 @@
2929
public class DocsStatsTests extends ESTestCase {
3030

3131
public void testCalculateAverageDocSize() throws Exception {
32-
DocsStats stats = new DocsStats(10, 2, 10);
32+
DocsStats stats = new DocsStats(10, 2, 120);
3333
assertThat(stats.getAverageSizeInBytes(), equalTo(10L));
3434

35-
stats.add(new DocsStats(0, 0, randomNonNegativeLong()));
35+
stats.add(new DocsStats(0, 0, 0));
3636
assertThat(stats.getAverageSizeInBytes(), equalTo(10L));
3737

38-
// (38*900 + 12*10) / 50 = 686L
39-
stats.add(new DocsStats(8, 30, 900));
38+
stats.add(new DocsStats(8, 30, 480));
4039
assertThat(stats.getCount(), equalTo(18L));
4140
assertThat(stats.getDeleted(), equalTo(32L));
42-
assertThat(stats.getAverageSizeInBytes(), equalTo(686L));
43-
44-
// (50*686 + 40*120) / 90 = 434L
45-
stats.add(new DocsStats(0, 40, 120));
46-
assertThat(stats.getCount(), equalTo(18L));
47-
assertThat(stats.getDeleted(), equalTo(72L));
48-
assertThat(stats.getAverageSizeInBytes(), equalTo(434L));
49-
50-
// (90*434 + 35*99) / 125 = 340L
51-
stats.add(new DocsStats(35, 0, 99));
52-
assertThat(stats.getCount(), equalTo(53L));
53-
assertThat(stats.getDeleted(), equalTo(72L));
54-
assertThat(stats.getAverageSizeInBytes(), equalTo(340L));
41+
assertThat(stats.getTotalSizeInBytes(), equalTo(600L));
42+
assertThat(stats.getAverageSizeInBytes(), equalTo(12L));
5543
}
5644

5745
public void testSerialize() throws Exception {

core/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java

+35-31
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
import org.elasticsearch.index.seqno.SequenceNumbers;
8989
import org.elasticsearch.index.snapshots.IndexShardSnapshotStatus;
9090
import org.elasticsearch.index.store.Store;
91+
import org.elasticsearch.index.store.StoreStats;
9192
import org.elasticsearch.index.translog.Translog;
9293
import org.elasticsearch.index.translog.TranslogTests;
9394
import org.elasticsearch.indices.IndicesQueryCache;
@@ -2273,54 +2274,57 @@ public void testDocStats() throws IOException {
22732274
}
22742275
}
22752276

2276-
public void testEstimateAverageDocSize() throws Exception {
2277+
public void testEstimateTotalDocSize() throws Exception {
22772278
IndexShard indexShard = null;
22782279
try {
22792280
indexShard = newStartedShard(true);
2280-
int smallDocNum = randomIntBetween(5, 100);
2281-
for (int i = 0; i < smallDocNum; i++) {
2282-
indexDoc(indexShard, "test", "small-" + i);
2283-
}
2284-
// Average document size is estimated by sampling segments, thus it should be zero without flushing.
2285-
DocsStats withoutFlush = indexShard.docStats();
2286-
assertThat(withoutFlush.averageSizeInBytes, equalTo(0L));
22872281

2288-
indexShard.flush(new FlushRequest());
2289-
indexShard.refresh("test");
2290-
DocsStats smallStats = indexShard.docStats();
2291-
assertThat(smallStats.averageSizeInBytes, greaterThan(10L));
2292-
2293-
long storedAvgSize = indexShard.storeStats().sizeInBytes() / smallDocNum;
2294-
assertThat("Estimated average document size is too small compared with the average stored size",
2295-
smallStats.averageSizeInBytes, greaterThanOrEqualTo(storedAvgSize * 80/100));
2296-
assertThat("Estimated average document size is too large compared with the average stored size",
2297-
smallStats.averageSizeInBytes, lessThanOrEqualTo(storedAvgSize * 120/100));
2298-
2299-
// Indexing large documents should increase the average document size.
2300-
int largeDocNum = randomIntBetween(100, 200);
2301-
for (int i = 0; i < largeDocNum; i++) {
2282+
int numDoc = randomIntBetween(100, 200);
2283+
for (int i = 0; i < numDoc; i++) {
23022284
String doc = XContentFactory.jsonBuilder()
23032285
.startObject()
23042286
.field("count", randomInt())
23052287
.field("point", randomFloat())
23062288
.field("description", randomUnicodeOfCodepointLength(100))
23072289
.endObject().string();
2308-
indexDoc(indexShard, "test", "large-" + i, doc);
2290+
indexDoc(indexShard, "doc", Integer.toString(i), doc);
23092291
}
2292+
2293+
assertThat("Without flushing, segment sizes should be zero",
2294+
indexShard.docStats().getTotalSizeInBytes(), equalTo(0L));
2295+
23102296
indexShard.flush(new FlushRequest());
23112297
indexShard.refresh("test");
2312-
DocsStats largeStats = indexShard.docStats();
2313-
assertThat(largeStats.averageSizeInBytes, greaterThan(100L));
2314-
assertThat(largeStats.averageSizeInBytes, greaterThan(smallStats.averageSizeInBytes));
2298+
{
2299+
final DocsStats docsStats = indexShard.docStats();
2300+
final StoreStats storeStats = indexShard.storeStats();
2301+
assertThat(storeStats.sizeInBytes(), greaterThan(numDoc * 100L)); // A doc should be more than 100 bytes.
23152302

2316-
int deleteDocs = randomIntBetween(1, smallDocNum / 2);
2317-
for (int i = 0; i < deleteDocs; i++) {
2318-
deleteDoc(indexShard, "test", "small-" + i);
2303+
assertThat("Estimated total document size is too small compared with the stored size",
2304+
docsStats.getTotalSizeInBytes(), greaterThanOrEqualTo(storeStats.sizeInBytes() * 80/100));
2305+
assertThat("Estimated total document size is too large compared with the stored size",
2306+
docsStats.getTotalSizeInBytes(), lessThanOrEqualTo(storeStats.sizeInBytes() * 120/100));
23192307
}
2308+
2309+
// Do some updates and deletes, then recheck the correlation again.
2310+
for (int i = 0; i < numDoc / 2; i++) {
2311+
if (randomBoolean()) {
2312+
deleteDoc(indexShard, "doc", Integer.toString(i));
2313+
} else {
2314+
indexDoc(indexShard, "doc", Integer.toString(i), "{\"foo\": \"bar\"}");
2315+
}
2316+
}
2317+
23202318
indexShard.flush(new FlushRequest());
23212319
indexShard.refresh("test");
2322-
DocsStats withDeletedStats = indexShard.docStats();
2323-
assertThat(withDeletedStats.averageSizeInBytes, greaterThan(largeStats.averageSizeInBytes));
2320+
{
2321+
final DocsStats docsStats = indexShard.docStats();
2322+
final StoreStats storeStats = indexShard.storeStats();
2323+
assertThat("Estimated total document size is too small compared with the stored size",
2324+
docsStats.getTotalSizeInBytes(), greaterThanOrEqualTo(storeStats.sizeInBytes() * 80/100));
2325+
assertThat("Estimated total document size is too large compared with the stored size",
2326+
docsStats.getTotalSizeInBytes(), lessThanOrEqualTo(storeStats.sizeInBytes() * 120/100));
2327+
}
23242328

23252329
} finally {
23262330
closeShards(indexShard);

0 commit comments

Comments
 (0)