-
Notifications
You must be signed in to change notification settings - Fork 25.2k
Adds average document size to DocsStats #27117
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b964ac8
a381d98
f38e957
662f062
6cca080
96e9be7
ca3023b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,34 +19,39 @@ | |
|
||
package org.elasticsearch.index.shard; | ||
|
||
import org.elasticsearch.Version; | ||
import org.elasticsearch.common.io.stream.StreamInput; | ||
import org.elasticsearch.common.io.stream.StreamOutput; | ||
import org.elasticsearch.common.io.stream.Streamable; | ||
import org.elasticsearch.common.xcontent.ToXContentFragment; | ||
import org.elasticsearch.common.xcontent.XContentBuilder; | ||
import org.elasticsearch.index.store.StoreStats; | ||
|
||
import java.io.IOException; | ||
|
||
public class DocsStats implements Streamable, ToXContentFragment { | ||
|
||
long count = 0; | ||
long deleted = 0; | ||
long totalSizeInBytes = 0; | ||
|
||
public DocsStats() { | ||
|
||
} | ||
|
||
public DocsStats(long count, long deleted) { | ||
public DocsStats(long count, long deleted, long totalSizeInBytes) { | ||
this.count = count; | ||
this.deleted = deleted; | ||
this.totalSizeInBytes = totalSizeInBytes; | ||
} | ||
|
||
public void add(DocsStats docsStats) { | ||
if (docsStats == null) { | ||
public void add(DocsStats other) { | ||
if (other == null) { | ||
return; | ||
} | ||
count += docsStats.count; | ||
deleted += docsStats.deleted; | ||
this.totalSizeInBytes += other.totalSizeInBytes; | ||
this.count += other.count; | ||
this.deleted += other.deleted; | ||
} | ||
|
||
public long getCount() { | ||
|
@@ -57,16 +62,40 @@ public long getDeleted() { | |
return this.deleted; | ||
} | ||
|
||
/** | ||
* Returns the total size in bytes of all documents in this stats. | ||
* This value may be more reliable than {@link StoreStats#getSizeInBytes()} in estimating the index size. | ||
*/ | ||
public long getTotalSizeInBytes() { | ||
return totalSizeInBytes; | ||
} | ||
|
||
/** | ||
* Returns the average size in bytes of all documents in this stats. | ||
*/ | ||
public long getAverageSizeInBytes() { | ||
long totalDocs = count + deleted; | ||
return totalDocs == 0 ? 0 : totalSizeInBytes / totalDocs; | ||
} | ||
|
||
@Override | ||
public void readFrom(StreamInput in) throws IOException { | ||
count = in.readVLong(); | ||
deleted = in.readVLong(); | ||
if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) { | ||
totalSizeInBytes = in.readVLong(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we set There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done 96e9be7 |
||
} else { | ||
totalSizeInBytes = -1; | ||
} | ||
} | ||
|
||
@Override | ||
public void writeTo(StreamOutput out) throws IOException { | ||
out.writeVLong(count); | ||
out.writeVLong(deleted); | ||
if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here for V_6_1_0 |
||
out.writeVLong(totalSizeInBytes); | ||
} | ||
} | ||
|
||
@Override | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -880,9 +880,18 @@ public FlushStats flushStats() { | |
} | ||
|
||
public DocsStats docStats() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should rather be something like this: public DocsStats docStats() {
long numDocs = 0;
long numDeletedDocs = 0;
long sizeInByte = 0;
List<Segment> segments = segments(false);
for (Segment segment : segments) {
if (segment.search) {
numDocs += segment.getNumDocs();
numDeletedDocs += segment.getDeletedDocs();
sizeInByte += segment.getSizeInBytes();
}
}
return new DocsStats(numDocs, numDeletedDocs, sizeInByte);
} that way we maintain a consistent total and we can calculate the average at read time and aggregation of doc stats will be much simpler? I also think we should make sure the size in bytes is based on the currently used reader which is guaranteed by the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, this makes the DocsStats simpler and the average value more accurate. I have updated this in 662f062 |
||
try (Engine.Searcher searcher = acquireSearcher("doc_stats")) { | ||
return new DocsStats(searcher.reader().numDocs(), searcher.reader().numDeletedDocs()); | ||
long numDocs = 0; | ||
long numDeletedDocs = 0; | ||
long sizeInBytes = 0; | ||
List<Segment> segments = segments(false); | ||
for (Segment segment : segments) { | ||
if (segment.search) { | ||
numDocs += segment.getNumDocs(); | ||
numDeletedDocs += segment.getDeletedDocs(); | ||
sizeInBytes += segment.getSizeInBytes(); | ||
} | ||
} | ||
return new DocsStats(numDocs, numDeletedDocs, sizeInBytes); | ||
} | ||
|
||
/** | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
/* | ||
* Licensed to Elasticsearch under one or more contributor | ||
* license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright | ||
* ownership. Elasticsearch licenses this file to you under | ||
* the Apache License, Version 2.0 (the "License"); you may | ||
* not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.elasticsearch.index.shard; | ||
|
||
import org.elasticsearch.common.bytes.BytesReference; | ||
import org.elasticsearch.common.io.stream.BytesStreamOutput; | ||
import org.elasticsearch.common.io.stream.StreamInput; | ||
import org.elasticsearch.test.ESTestCase; | ||
|
||
import static org.hamcrest.Matchers.equalTo; | ||
|
||
public class DocsStatsTests extends ESTestCase { | ||
|
||
public void testCalculateAverageDocSize() throws Exception { | ||
DocsStats stats = new DocsStats(10, 2, 120); | ||
assertThat(stats.getAverageSizeInBytes(), equalTo(10L)); | ||
|
||
stats.add(new DocsStats(0, 0, 0)); | ||
assertThat(stats.getAverageSizeInBytes(), equalTo(10L)); | ||
|
||
stats.add(new DocsStats(8, 30, 480)); | ||
assertThat(stats.getCount(), equalTo(18L)); | ||
assertThat(stats.getDeleted(), equalTo(32L)); | ||
assertThat(stats.getTotalSizeInBytes(), equalTo(600L)); | ||
assertThat(stats.getAverageSizeInBytes(), equalTo(12L)); | ||
} | ||
|
||
public void testSerialize() throws Exception { | ||
DocsStats originalStats = new DocsStats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()); | ||
try (BytesStreamOutput out = new BytesStreamOutput()) { | ||
originalStats.writeTo(out); | ||
BytesReference bytes = out.bytes(); | ||
try (StreamInput in = bytes.streamInput()) { | ||
DocsStats cloneStats = new DocsStats(); | ||
cloneStats.readFrom(in); | ||
assertThat(cloneStats.getCount(), equalTo(originalStats.getCount())); | ||
assertThat(cloneStats.getDeleted(), equalTo(originalStats.getDeleted())); | ||
assertThat(cloneStats.getAverageSizeInBytes(), equalTo(originalStats.getAverageSizeInBytes())); | ||
} | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also, this should be
V_6_1_0
since you will be backporting this to the 6.x branchThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@jpountz recommended to make this for v7, then change for the backport later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is correct, it should be 7.0.0. Then when you backport set it to 6.1.0 in the 6.x branch and make sure that the BWC tests in master against 6.x pass (you might have to skip some of them). Then push a commit to master flipping the version to 6.1.0 and removing the skips.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ahh okay, I hadn't realized we were doing it the reverse way now :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am not sure what you mean by the reverse way, these are part of the steps to have green CI on all branches every step of the way.