Skip to content

Commit 0f28e97

Browse files
Total data set size in stats (#70625)
With shared cache searchable snapshots we have shards that have a size in S3 that differs from the locally occupied disk space. This commit introduces `store.total_data_set_size` to node and indices stats, allowing to differ between the two. Relates #69820
1 parent 9addf0b commit 0f28e97

File tree

16 files changed

+182
-45
lines changed

16 files changed

+182
-45
lines changed

docs/reference/cluster/nodes-stats.asciidoc

+10
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,16 @@ Total size of all shards assigned to the node.
256256
(integer)
257257
Total size, in bytes, of all shards assigned to the node.
258258

259+
`total_data_set_size`::
260+
(<<byte-units,byte value>>)
261+
Total data set size of all shards assigned to the node.
262+
This includes the size of shards not stored fully on the node (shared cache searchable snapshots).
263+
264+
`total_data_set_size_in_bytes`::
265+
(integer)
266+
Total data set size, in bytes, of all shards assigned to the node.
267+
This includes the size of shards not stored fully on the node (shared cache searchable snapshots).
268+
259269
`reserved`::
260270
(<<byte-units,byte value>>)
261271
A prediction of how much larger the shard stores on this node will eventually

docs/reference/cluster/stats.asciidoc

+12
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,16 @@ Total size of all shards assigned to selected nodes.
240240
(integer)
241241
Total size, in bytes, of all shards assigned to selected nodes.
242242

243+
`total_data_set_size`::
244+
(<<byte-units, byte units>>)
245+
Total data set size of all shards assigned to selected nodes.
246+
This includes the size of shards not stored fully on the nodes (shared cache searchable snapshots).
247+
248+
`total_data_set_size_in_bytes`::
249+
(integer)
250+
Total data set size, in bytes, of all shards assigned to selected nodes.
251+
This includes the size of shards not stored fully on the nodes (shared cache searchable snapshots).
252+
243253
`reserved`::
244254
(<<byte-units,byte value>>)
245255
A prediction of how much larger the shard stores will eventually grow due to
@@ -1238,6 +1248,8 @@ The API returns the following response:
12381248
"store": {
12391249
"size": "16.2kb",
12401250
"size_in_bytes": 16684,
1251+
"total_data_set_size": "16.2kb",
1252+
"total_data_set_size_in_bytes": 16684,
12411253
"reserved": "0b",
12421254
"reserved_in_bytes": 0
12431255
},

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/40_store_stats.yml

+28
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,33 @@
11
---
22
"Store stats":
3+
- skip:
4+
features: [arbitrary_key]
5+
# todo change after backport
6+
version: " - 7.99.99"
7+
reason: "total_data_set_size added in 7.13"
8+
9+
- do:
10+
nodes.info:
11+
node_id: _master
12+
- set:
13+
nodes._arbitrary_key_: master
14+
15+
- do:
16+
nodes.stats:
17+
metric: [ indices ]
18+
index_metric: [ store ]
19+
20+
- is_false: nodes.$master.discovery
21+
- is_true: nodes.$master.indices.store
22+
- gte: { nodes.$master.indices.store.size_in_bytes: 0 }
23+
- gte: { nodes.$master.indices.store.reserved_in_bytes: -1 }
24+
- set:
25+
nodes.$master.indices.store.size_in_bytes: size_in_bytes
26+
- match: { nodes.$master.indices.store.total_data_set_size_in_bytes: $size_in_bytes }
27+
28+
---
29+
#remove when 7.13 is released
30+
"Store stats bwc":
331
- skip:
432
features: [arbitrary_key]
533

server/src/main/java/org/elasticsearch/index/IndexService.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@
9292
import java.util.function.Consumer;
9393
import java.util.function.Function;
9494
import java.util.function.LongSupplier;
95+
import java.util.function.LongUnaryOperator;
9596
import java.util.function.Supplier;
9697

9798
import static java.util.Collections.emptyMap;
@@ -372,7 +373,7 @@ private long getAvgShardSizeInBytes() throws IOException {
372373
long sum = 0;
373374
int count = 0;
374375
for (IndexShard indexShard : this) {
375-
sum += indexShard.store().stats(0L).sizeInBytes();
376+
sum += indexShard.store().stats(0L, LongUnaryOperator.identity()).sizeInBytes();
376377
count++;
377378
}
378379
if (count == 0) {

server/src/main/java/org/elasticsearch/index/shard/IndexShard.java

+9-7
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@
178178
import java.util.function.Consumer;
179179
import java.util.function.Function;
180180
import java.util.function.LongSupplier;
181+
import java.util.function.LongUnaryOperator;
181182
import java.util.function.Supplier;
182183
import java.util.stream.Collectors;
183184
import java.util.stream.StreamSupport;
@@ -1054,15 +1055,16 @@ public GetStats getStats() {
10541055
}
10551056

10561057
public StoreStats storeStats() {
1057-
if (DiskThresholdDecider.SETTING_IGNORE_DISK_WATERMARKS.get(indexSettings.getSettings())) {
1058-
// if this shard has no disk footprint then its size is reported as 0
1059-
return new StoreStats(0, 0);
1060-
}
10611058
try {
10621059
final RecoveryState recoveryState = this.recoveryState;
1063-
final long bytesStillToRecover = recoveryState == null ? -1L : recoveryState.getIndex().bytesStillToRecover();
1064-
final long reservedBytes = bytesStillToRecover == -1 ? StoreStats.UNKNOWN_RESERVED_BYTES : bytesStillToRecover;
1065-
return store.stats(reservedBytes);
1060+
if (DiskThresholdDecider.SETTING_IGNORE_DISK_WATERMARKS.get(indexSettings.getSettings())) {
1061+
// if this shard has no disk footprint then its local size is reported as 0
1062+
return store.stats(0, size -> 0);
1063+
} else {
1064+
final long bytesStillToRecover = recoveryState == null ? -1L : recoveryState.getIndex().bytesStillToRecover();
1065+
final long reservedBytes = bytesStillToRecover == -1 ? StoreStats.UNKNOWN_RESERVED_BYTES : bytesStillToRecover;
1066+
return store.stats(reservedBytes, LongUnaryOperator.identity());
1067+
}
10661068
} catch (IOException e) {
10671069
failShard("Failing shard because of exception during storeStats", e);
10681070
throw new ElasticsearchException("io exception while building 'store stats'", e);

server/src/main/java/org/elasticsearch/index/store/Store.java

+5-2
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@
9090
import java.util.concurrent.atomic.AtomicBoolean;
9191
import java.util.concurrent.locks.ReentrantReadWriteLock;
9292
import java.util.function.Consumer;
93+
import java.util.function.LongUnaryOperator;
9394
import java.util.zip.CRC32;
9495
import java.util.zip.Checksum;
9596

@@ -337,10 +338,12 @@ public CheckIndex.Status checkIndex(PrintStream out) throws IOException {
337338

338339
/**
339340
* @param reservedBytes a prediction of how much larger the store is expected to grow, or {@link StoreStats#UNKNOWN_RESERVED_BYTES}.
341+
* @param localSizeFunction to calculate the local size of the shard based on the shard size.
340342
*/
341-
public StoreStats stats(long reservedBytes) throws IOException {
343+
public StoreStats stats(long reservedBytes, LongUnaryOperator localSizeFunction) throws IOException {
342344
ensureOpen();
343-
return new StoreStats(directory.estimateSize(), reservedBytes);
345+
long sizeInBytes = directory.estimateSize();
346+
return new StoreStats(localSizeFunction.applyAsLong(sizeInBytes), sizeInBytes, reservedBytes);
344347
}
345348

346349
/**

server/src/main/java/org/elasticsearch/index/store/StoreStats.java

+26-2
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,10 @@ public class StoreStats implements Writeable, ToXContentFragment {
2727
public static final long UNKNOWN_RESERVED_BYTES = -1L;
2828

2929
public static final Version RESERVED_BYTES_VERSION = Version.V_7_9_0;
30+
public static final Version TOTAL_DATA_SET_SIZE_SIZE_VERSION = Version.V_8_0_0; // todo: Version.V_7_13_0;
3031

3132
private long sizeInBytes;
33+
private long totalDataSetSizeInBytes;
3234
private long reservedSize;
3335

3436
public StoreStats() {
@@ -37,6 +39,11 @@ public StoreStats() {
3739

3840
public StoreStats(StreamInput in) throws IOException {
3941
sizeInBytes = in.readVLong();
42+
if (in.getVersion().onOrAfter(TOTAL_DATA_SET_SIZE_SIZE_VERSION)) {
43+
totalDataSetSizeInBytes = in.readVLong();
44+
} else {
45+
totalDataSetSizeInBytes = sizeInBytes;
46+
}
4047
if (in.getVersion().onOrAfter(RESERVED_BYTES_VERSION)) {
4148
reservedSize = in.readZLong();
4249
} else {
@@ -46,19 +53,22 @@ public StoreStats(StreamInput in) throws IOException {
4653

4754
/**
4855
* @param sizeInBytes the size of the store in bytes
56+
* @param totalDataSetSizeInBytes the size of the total data set in bytes, can differ from sizeInBytes for shards using shared cache
57+
* storage
4958
* @param reservedSize a prediction of how much larger the store is expected to grow, or {@link StoreStats#UNKNOWN_RESERVED_BYTES}.
5059
*/
51-
public StoreStats(long sizeInBytes, long reservedSize) {
60+
public StoreStats(long sizeInBytes, long totalDataSetSizeInBytes, long reservedSize) {
5261
assert reservedSize == UNKNOWN_RESERVED_BYTES || reservedSize >= 0 : reservedSize;
5362
this.sizeInBytes = sizeInBytes;
63+
this.totalDataSetSizeInBytes = totalDataSetSizeInBytes;
5464
this.reservedSize = reservedSize;
5565
}
56-
5766
public void add(StoreStats stats) {
5867
if (stats == null) {
5968
return;
6069
}
6170
sizeInBytes += stats.sizeInBytes;
71+
totalDataSetSizeInBytes += stats.totalDataSetSizeInBytes;
6272
reservedSize = ignoreIfUnknown(reservedSize) + ignoreIfUnknown(stats.reservedSize);
6373
}
6474

@@ -82,6 +92,14 @@ public ByteSizeValue getSize() {
8292
return size();
8393
}
8494

95+
public ByteSizeValue totalDataSetSize() {
96+
return new ByteSizeValue(totalDataSetSizeInBytes);
97+
}
98+
99+
public ByteSizeValue getTotalDataSetSize() {
100+
return totalDataSetSize();
101+
}
102+
85103
/**
86104
* A prediction of how much larger this store will eventually grow. For instance, if we are currently doing a peer recovery or restoring
87105
* a snapshot into this store then we can account for the rest of the recovery using this field. A value of {@code -1B} indicates that
@@ -94,6 +112,9 @@ public ByteSizeValue getReservedSize() {
94112
@Override
95113
public void writeTo(StreamOutput out) throws IOException {
96114
out.writeVLong(sizeInBytes);
115+
if (out.getVersion().onOrAfter(TOTAL_DATA_SET_SIZE_SIZE_VERSION)) {
116+
out.writeVLong(totalDataSetSizeInBytes);
117+
}
97118
if (out.getVersion().onOrAfter(RESERVED_BYTES_VERSION)) {
98119
out.writeZLong(reservedSize);
99120
}
@@ -103,6 +124,7 @@ public void writeTo(StreamOutput out) throws IOException {
103124
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
104125
builder.startObject(Fields.STORE);
105126
builder.humanReadableField(Fields.SIZE_IN_BYTES, Fields.SIZE, size());
127+
builder.humanReadableField(Fields.TOTAL_DATA_SET_SIZE_IN_BYTES, Fields.TOTAL_DATA_SET_SIZE, totalDataSetSize());
106128
builder.humanReadableField(Fields.RESERVED_IN_BYTES, Fields.RESERVED, getReservedSize());
107129
builder.endObject();
108130
return builder;
@@ -112,6 +134,8 @@ static final class Fields {
112134
static final String STORE = "store";
113135
static final String SIZE = "size";
114136
static final String SIZE_IN_BYTES = "size_in_bytes";
137+
static final String TOTAL_DATA_SET_SIZE = "total_data_set_size";
138+
static final String TOTAL_DATA_SET_SIZE_IN_BYTES = "total_data_set_size_in_bytes";
115139
static final String RESERVED = "reserved";
116140
static final String RESERVED_IN_BYTES = "reserved_in_bytes";
117141
}

server/src/test/java/org/elasticsearch/action/admin/cluster/stats/VersionStatsTests.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ public void testCreation() {
9999
Path path = createTempDir().resolve("indices").resolve(shardRouting.shardId().getIndex().getUUID())
100100
.resolve(String.valueOf(shardRouting.shardId().id()));
101101
IndexShard indexShard = mock(IndexShard.class);
102-
StoreStats storeStats = new StoreStats(100, 200);
102+
StoreStats storeStats = new StoreStats(100, 150, 200);
103103
when(indexShard.storeStats()).thenReturn(storeStats);
104104
ShardStats shardStats = new ShardStats(shardRouting, new ShardPath(false, path, path, shardRouting.shardId()),
105105
new CommonStats(null, indexShard, new CommonStatsFlags(CommonStatsFlags.Flag.Store)),

server/src/test/java/org/elasticsearch/action/admin/indices/shrink/TransportResizeActionTests.java

+17-12
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ public void testErrorCondition() {
6868
assertTrue(
6969
expectThrows(IllegalStateException.class, () ->
7070
TransportResizeAction.prepareCreateIndexRequest(new ResizeRequest("target", "source"), state,
71-
new StoreStats(between(1, 100), between(1, 100)), (i) -> new DocsStats(Integer.MAX_VALUE, between(1, 1000),
71+
new StoreStats(between(1, 100), between(0, 100), between(1, 100)),
72+
(i) -> new DocsStats(Integer.MAX_VALUE, between(1, 1000),
7273
between(1, 100)), "target")
7374
).getMessage().startsWith("Can't merge index with more than [2147483519] docs - too many documents in shards "));
7475

@@ -80,7 +81,7 @@ public void testErrorCondition() {
8081
TransportResizeAction.prepareCreateIndexRequest(req,
8182
createClusterState("source", 8, 1,
8283
Settings.builder().put("index.blocks.write", true).build()).metadata().index("source"),
83-
new StoreStats(between(1, 100), between(1, 100)),
84+
new StoreStats(between(1, 100), between(0, 100), between(1, 100)),
8485
(i) -> i == 2 || i == 3 ? new DocsStats(Integer.MAX_VALUE / 2, between(1, 1000), between(1, 10000)) : null
8586
, "target");
8687
}
@@ -94,7 +95,7 @@ public void testErrorCondition() {
9495
createClusterState("source", 8, 1,
9596
Settings.builder().put("index.blocks.write", true).put("index.soft_deletes.enabled", true).build())
9697
.metadata().index("source"),
97-
new StoreStats(between(1, 100), between(1, 100)),
98+
new StoreStats(between(1, 100), between(0, 100), between(1, 100)),
9899
(i) -> new DocsStats(between(10, 1000), between(1, 10), between(1, 10000)), "target");
99100
});
100101
assertThat(softDeletesError.getMessage(), equalTo("Can't disable [index.soft_deletes.enabled] setting on resize"));
@@ -115,7 +116,7 @@ public void testErrorCondition() {
115116
clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
116117

117118
TransportResizeAction.prepareCreateIndexRequest(new ResizeRequest("target", "source"), clusterState.metadata().index("source"),
118-
new StoreStats(between(1, 100), between(1, 100)),
119+
new StoreStats(between(1, 100), between(0, 100), between(1, 100)),
119120
(i) -> new DocsStats(between(1, 1000), between(1, 1000), between(0, 10000)), "target");
120121
}
121122

@@ -139,15 +140,17 @@ public void testPassNumRoutingShards() {
139140
resizeRequest.getTargetIndexRequest()
140141
.settings(Settings.builder().put("index.number_of_shards", 2).build());
141142
IndexMetadata indexMetadata = clusterState.metadata().index("source");
142-
TransportResizeAction.prepareCreateIndexRequest(resizeRequest, indexMetadata, new StoreStats(between(1, 100), between(1, 100)),
143+
TransportResizeAction.prepareCreateIndexRequest(resizeRequest, indexMetadata,
144+
new StoreStats(between(1, 100), between(0, 100), between(1, 100)),
143145
null, "target");
144146

145147
resizeRequest.getTargetIndexRequest()
146148
.settings(Settings.builder()
147149
.put("index.number_of_routing_shards", randomIntBetween(2, 10))
148150
.put("index.number_of_shards", 2)
149151
.build());
150-
TransportResizeAction.prepareCreateIndexRequest(resizeRequest, indexMetadata, new StoreStats(between(1, 100), between(1, 100)),
152+
TransportResizeAction.prepareCreateIndexRequest(resizeRequest, indexMetadata,
153+
new StoreStats(between(1, 100), between(0, 100), between(1, 100)),
151154
null, "target");
152155
}
153156

@@ -172,7 +175,7 @@ public void testPassNumRoutingShardsAndFail() {
172175
resizeRequest.getTargetIndexRequest()
173176
.settings(Settings.builder().put("index.number_of_shards", numShards * 2).build());
174177
TransportResizeAction.prepareCreateIndexRequest(resizeRequest, clusterState.metadata().index("source"),
175-
new StoreStats(between(1, 100), between(1, 100)), null, "target");
178+
new StoreStats(between(1, 100), between(0, 100), between(1, 100)), null, "target");
176179

177180
resizeRequest.getTargetIndexRequest()
178181
.settings(Settings.builder()
@@ -181,7 +184,7 @@ public void testPassNumRoutingShardsAndFail() {
181184
ClusterState finalState = clusterState;
182185
IllegalArgumentException iae = expectThrows(IllegalArgumentException.class,
183186
() -> TransportResizeAction.prepareCreateIndexRequest(resizeRequest, finalState.metadata().index("source"),
184-
new StoreStats(between(1, 100), between(1, 100)), null, "target"));
187+
new StoreStats(between(1, 100), between(0, 100), between(1, 100)), null, "target"));
185188
assertEquals("cannot provide index.number_of_routing_shards on resize", iae.getMessage());
186189
}
187190

@@ -209,7 +212,8 @@ public void testShrinkIndexSettings() {
209212
final ActiveShardCount activeShardCount = randomBoolean() ? ActiveShardCount.ALL : ActiveShardCount.ONE;
210213
target.setWaitForActiveShards(activeShardCount);
211214
CreateIndexClusterStateUpdateRequest request = TransportResizeAction.prepareCreateIndexRequest(
212-
target, clusterState.metadata().index(indexName), new StoreStats(between(1, 100), between(1, 100)), (i) -> stats, "target");
215+
target, clusterState.metadata().index(indexName),
216+
new StoreStats(between(1, 100), between(0, 100), between(1, 100)), (i) -> stats, "target");
213217
assertNotNull(request.recoverFrom());
214218
assertEquals(indexName, request.recoverFrom().getName());
215219
assertEquals("1", request.settings().get("index.number_of_shards"));
@@ -240,7 +244,8 @@ public void testShrinkWithMaxPrimaryShardSize() {
240244
.settings(Settings.builder().put("index.number_of_shards", 2).build());
241245
assertTrue(
242246
expectThrows(IllegalArgumentException.class, () ->
243-
TransportResizeAction.prepareCreateIndexRequest(resizeRequest, state, new StoreStats(between(1, 100), between(1, 100)),
247+
TransportResizeAction.prepareCreateIndexRequest(resizeRequest, state,
248+
new StoreStats(between(1, 100), between(0, 100), between(1, 100)),
244249
(i) -> new DocsStats(Integer.MAX_VALUE, between(1, 1000), between(1, 100)), "target")
245250
).getMessage().startsWith("Cannot set both index.number_of_shards and max_primary_shard_size for the target index"));
246251

@@ -266,7 +271,7 @@ public void testShrinkWithMaxPrimaryShardSize() {
266271
// each shard's storage will not be greater than the `max_primary_shard_size`
267272
ResizeRequest target1 = new ResizeRequest("target", "source");
268273
target1.setMaxPrimaryShardSize(new ByteSizeValue(2));
269-
StoreStats storeStats = new StoreStats(10, between(1, 100));
274+
StoreStats storeStats = new StoreStats(10, between(0, 100), between(1, 100));
270275
final int targetIndexShardsNum1 = 5;
271276
final ActiveShardCount activeShardCount1 = ActiveShardCount.from(targetIndexShardsNum1);
272277
target1.setWaitForActiveShards(targetIndexShardsNum1);
@@ -283,7 +288,7 @@ public void testShrinkWithMaxPrimaryShardSize() {
283288
// the shards number of the target index will be equal to the source index's shards number
284289
ResizeRequest target2 = new ResizeRequest("target2", "source");
285290
target2.setMaxPrimaryShardSize(new ByteSizeValue(1));
286-
StoreStats storeStats2 = new StoreStats(100, between(1, 100));
291+
StoreStats storeStats2 = new StoreStats(100, between(0, 100), between(1, 100));
287292
final int targetIndexShardsNum2 = 10;
288293
final ActiveShardCount activeShardCount2 = ActiveShardCount.from(targetIndexShardsNum2);
289294
target2.setWaitForActiveShards(activeShardCount2);

server/src/test/java/org/elasticsearch/cluster/DiskUsageTests.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -94,14 +94,14 @@ public void testFillShardLevelInfo() {
9494
test_0 = ShardRoutingHelper.moveToStarted(test_0);
9595
Path test0Path = createTempDir().resolve("indices").resolve(index.getUUID()).resolve("0");
9696
CommonStats commonStats0 = new CommonStats();
97-
commonStats0.store = new StoreStats(100, 0L);
97+
commonStats0.store = new StoreStats(100, 100, 0L);
9898
ShardRouting test_1 = ShardRouting.newUnassigned(new ShardId(index, 1), false, PeerRecoverySource.INSTANCE,
9999
new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo"));
100100
test_1 = ShardRoutingHelper.initialize(test_1, "node2");
101101
test_1 = ShardRoutingHelper.moveToStarted(test_1);
102102
Path test1Path = createTempDir().resolve("indices").resolve(index.getUUID()).resolve("1");
103103
CommonStats commonStats1 = new CommonStats();
104-
commonStats1.store = new StoreStats(1000, 0L);
104+
commonStats1.store = new StoreStats(1000, 1000, 0L);
105105
ShardStats[] stats = new ShardStats[] {
106106
new ShardStats(test_0, new ShardPath(false, test0Path, test0Path, test_0.shardId()), commonStats0 , null, null, null),
107107
new ShardStats(test_1, new ShardPath(false, test1Path, test1Path, test_1.shardId()), commonStats1 , null, null, null)

0 commit comments

Comments
 (0)