Skip to content

Commit 9a77f41

Browse files
Fix cluster health when closing (elastic#61709)
When master shuts down it's cluster service, a waiting health request would fail rather than fail over to a new master.
1 parent 2eeb1bd commit 9a77f41

File tree

2 files changed

+19
-2
lines changed

2 files changed

+19
-2
lines changed

server/src/internalClusterTest/java/org/elasticsearch/cluster/ClusterHealthIT.java

+17-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import org.elasticsearch.action.ActionFuture;
2323
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
24+
import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest;
2425
import org.elasticsearch.action.support.IndicesOptions;
2526
import org.elasticsearch.action.support.PlainActionFuture;
2627
import org.elasticsearch.cluster.health.ClusterHealthStatus;
@@ -309,14 +310,29 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS
309310

310311
public void testHealthOnMasterFailover() throws Exception {
311312
final String node = internalCluster().startDataOnlyNode();
313+
boolean withIndex = randomBoolean();
314+
if (withIndex) {
315+
// Create index with many shards to provoke the health request to wait (for green) while master is being shut down.
316+
// Notice that this is set to 0 after the test completed starting a number of health requests and master restarts.
317+
// This ensures that the cluster is yellow when the health request is made, making the health request wait on the observer,
318+
// triggering a call to observer.onClusterServiceClose when master is shutdown.
319+
createIndex("test", Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, randomIntBetween(0, 10)).build());
320+
}
312321
final List<ActionFuture<ClusterHealthResponse>> responseFutures = new ArrayList<>();
313322
// Run a few health requests concurrent to master fail-overs against a data-node to make sure master failover is handled
314323
// without exceptions
315324
for (int i = 0; i < 20; ++i) {
316325
responseFutures.add(client(node).admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID)
317-
.setWaitForGreenStatus().execute());
326+
.setWaitForGreenStatus().setMasterNodeTimeout(TimeValue.timeValueMinutes(1)).execute());
318327
internalCluster().restartNode(internalCluster().getMasterName(), InternalTestCluster.EMPTY_CALLBACK);
319328
}
329+
if (withIndex) {
330+
assertAcked(
331+
client().admin().indices()
332+
.updateSettings(new UpdateSettingsRequest("test")
333+
.settings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0))).get()
334+
);
335+
}
320336
for (ActionFuture<ClusterHealthResponse> responseFuture : responseFutures) {
321337
assertSame(responseFuture.get().getStatus(), ClusterHealthStatus.GREEN);
322338
}

server/src/main/java/org/elasticsearch/action/admin/cluster/health/TransportClusterHealthAction.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
import org.elasticsearch.common.unit.TimeValue;
4646
import org.elasticsearch.common.util.CollectionUtils;
4747
import org.elasticsearch.index.IndexNotFoundException;
48+
import org.elasticsearch.node.NodeClosedException;
4849
import org.elasticsearch.tasks.Task;
4950
import org.elasticsearch.threadpool.ThreadPool;
5051
import org.elasticsearch.transport.TransportService;
@@ -211,7 +212,7 @@ public void onNewClusterState(ClusterState newState) {
211212

212213
@Override
213214
public void onClusterServiceClose() {
214-
listener.onFailure(new IllegalStateException("ClusterService was close during health call"));
215+
listener.onFailure(new NodeClosedException(clusterService.localNode()));
215216
}
216217

217218
@Override

0 commit comments

Comments
 (0)