Skip to content

Commit 06a50fa

Browse files
committed
ShardActiveResponseHandler shouldn't hold to an entire cluster state
ShardActiveResponseHandler doesn't need to hold to an entire cluster state since it only needs to know the cluster state version. It seems that on overloaded systems where nodes are unresponsive holding onto a lot of different cluster states can make the situation worse. Closes #21394
1 parent 3001b63 commit 06a50fa

File tree

1 file changed

+9
-8
lines changed

1 file changed

+9
-8
lines changed

core/src/main/java/org/elasticsearch/indices/store/IndicesStore.java

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,8 @@ private void deleteShardIfExistElseWhere(ClusterState state, IndexShardRoutingTa
191191
}
192192
}
193193

194-
ShardActiveResponseHandler responseHandler = new ShardActiveResponseHandler(indexShardRoutingTable.shardId(), state, requests.size());
194+
ShardActiveResponseHandler responseHandler = new ShardActiveResponseHandler(indexShardRoutingTable.shardId(), state.getVersion(),
195+
requests.size());
195196
for (Tuple<DiscoveryNode, ShardActiveRequest> request : requests) {
196197
logger.trace("{} sending shard active check to {}", request.v2().shardId, request.v1());
197198
transportService.sendRequest(request.v1(), ACTION_SHARD_EXISTS, request.v2(), responseHandler);
@@ -202,14 +203,14 @@ private class ShardActiveResponseHandler implements TransportResponseHandler<Sha
202203

203204
private final ShardId shardId;
204205
private final int expectedActiveCopies;
205-
private final ClusterState clusterState;
206+
private final long clusterStateVersion;
206207
private final AtomicInteger awaitingResponses;
207208
private final AtomicInteger activeCopies;
208209

209-
public ShardActiveResponseHandler(ShardId shardId, ClusterState clusterState, int expectedActiveCopies) {
210+
public ShardActiveResponseHandler(ShardId shardId, long clusterStateVersion, int expectedActiveCopies) {
210211
this.shardId = shardId;
211212
this.expectedActiveCopies = expectedActiveCopies;
212-
this.clusterState = clusterState;
213+
this.clusterStateVersion = clusterStateVersion;
213214
this.awaitingResponses = new AtomicInteger(expectedActiveCopies);
214215
this.activeCopies = new AtomicInteger();
215216
}
@@ -251,8 +252,8 @@ private void allNodesResponded() {
251252
}
252253

253254
ClusterState latestClusterState = clusterService.state();
254-
if (clusterState.getVersion() != latestClusterState.getVersion()) {
255-
logger.trace("not deleting shard {}, the latest cluster state version[{}] is not equal to cluster state before shard active api call [{}]", shardId, latestClusterState.getVersion(), clusterState.getVersion());
255+
if (clusterStateVersion != latestClusterState.getVersion()) {
256+
logger.trace("not deleting shard {}, the latest cluster state version[{}] is not equal to cluster state before shard active api call [{}]", shardId, latestClusterState.getVersion(), clusterStateVersion);
256257
return;
257258
}
258259

@@ -264,8 +265,8 @@ public boolean runOnlyOnMaster() {
264265

265266
@Override
266267
public ClusterState execute(ClusterState currentState) throws Exception {
267-
if (clusterState.getVersion() != currentState.getVersion()) {
268-
logger.trace("not deleting shard {}, the update task state version[{}] is not equal to cluster state before shard active api call [{}]", shardId, currentState.getVersion(), clusterState.getVersion());
268+
if (clusterStateVersion != currentState.getVersion()) {
269+
logger.trace("not deleting shard {}, the update task state version[{}] is not equal to cluster state before shard active api call [{}]", shardId, currentState.getVersion(), clusterStateVersion);
269270
return currentState;
270271
}
271272
try {

0 commit comments

Comments
 (0)