Skip to content

Commit d8c1428

Browse files
authored
[7.x] Use query param instead of a system property for opting in for new cluster health response code (#79397) (#79435)
Backport #79351 to 7.x: The original change was implemented in #78940, but we have decided to move from a system property to a request parameter, so Cloud users/clients have an easier way to opt-in for the new status code. Relates #70849
1 parent 68a6ce0 commit d8c1428

File tree

14 files changed

+113
-59
lines changed

14 files changed

+113
-59
lines changed

client/rest-high-level/src/test/java/org/elasticsearch/client/ClusterClientIT.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,13 @@
3737
import org.elasticsearch.common.compress.CompressedXContent;
3838
import org.elasticsearch.common.settings.Settings;
3939
import org.elasticsearch.common.unit.ByteSizeUnit;
40-
import org.elasticsearch.core.TimeValue;
41-
import org.elasticsearch.xcontent.XContentType;
4240
import org.elasticsearch.common.xcontent.support.XContentMapValues;
41+
import org.elasticsearch.core.TimeValue;
4342
import org.elasticsearch.indices.recovery.RecoverySettings;
4443
import org.elasticsearch.rest.RestStatus;
4544
import org.elasticsearch.transport.RemoteClusterService;
4645
import org.elasticsearch.transport.SniffConnectionStrategy;
46+
import org.elasticsearch.xcontent.XContentType;
4747

4848
import java.io.IOException;
4949
import java.util.Collections;
@@ -317,7 +317,7 @@ public void testClusterHealthNotFoundIndex() throws IOException {
317317
assertThat(response.getStatus(), equalTo(ClusterHealthStatus.RED));
318318
assertNoIndices(response);
319319
assertWarnings("The HTTP status code for a cluster health timeout will be changed from 408 to 200 in a " +
320-
"future version. Set the [es.cluster_health.request_timeout_200] system property to [true] to suppress this message and " +
320+
"future version. Set the [return_200_for_cluster_health_timeout] query parameter to [true] to suppress this message and " +
321321
"opt in to the future behaviour now.");
322322
}
323323

docs/reference/cluster/health.asciidoc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,11 @@ include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=timeoutparms]
9797
provided or better, i.e. `green` > `yellow` > `red`. By default, will not
9898
wait for any status.
9999

100+
`return_200_for_cluster_health_timeout`::
101+
(Optional, Boolean) A boolean value which controls whether to return HTTP 200
102+
status code instead of HTTP 408 in case of a cluster health timeout from
103+
the server side. Defaults to false.
104+
100105
[[cluster-health-api-response-body]]
101106
==== {api-response-body-title}
102107

rest-api-spec/src/main/resources/rest-api-spec/api/cluster.health.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,10 @@
102102
"red"
103103
],
104104
"description":"Wait until cluster is in a specific state"
105+
},
106+
"return_200_for_cluster_health_timeout":{
107+
"type":"boolean",
108+
"description":"Whether to return HTTP 200 instead of 408 in case of a cluster health timeout from the server side"
105109
}
106110
}
107111
}

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.health/20_request_timeout.yml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,25 @@
3535
- match: { initializing_shards: 0 }
3636
- match: { unassigned_shards: 0 }
3737
- gte: { number_of_pending_tasks: 0 }
38+
39+
---
40+
"cluster health request timeout with 200 response code":
41+
- skip:
42+
version: " - 7.15.99"
43+
reason: "return_200_for_cluster_health_timeout was added in 7.16"
44+
- do:
45+
cluster.health:
46+
timeout: 1ms
47+
wait_for_active_shards: 5
48+
return_200_for_cluster_health_timeout: true
49+
50+
- is_true: cluster_name
51+
- is_true: timed_out
52+
- gte: { number_of_nodes: 1 }
53+
- gte: { number_of_data_nodes: 1 }
54+
- match: { active_primary_shards: 0 }
55+
- match: { active_shards: 0 }
56+
- match: { relocating_shards: 0 }
57+
- match: { initializing_shards: 0 }
58+
- match: { unassigned_shards: 0 }
59+
- gte: { number_of_pending_tasks: 0 }

server/src/main/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthRequest.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ public class ClusterHealthRequest extends MasterNodeReadRequest<ClusterHealthReq
3535
private ActiveShardCount waitForActiveShards = ActiveShardCount.NONE;
3636
private String waitForNodes = "";
3737
private Priority waitForEvents = null;
38+
private boolean return200ForClusterHealthTimeout;
39+
3840
/**
3941
* Only used by the high-level REST Client. Controls the details level of the health information returned.
4042
* The default value is 'cluster'.
@@ -69,6 +71,9 @@ public ClusterHealthRequest(StreamInput in) throws IOException {
6971
} else {
7072
indicesOptions = IndicesOptions.lenientExpandOpen();
7173
}
74+
if (in.getVersion().onOrAfter(Version.V_7_16_0)) {
75+
return200ForClusterHealthTimeout = in.readBoolean();
76+
}
7277
}
7378

7479
@Override
@@ -101,6 +106,11 @@ public void writeTo(StreamOutput out) throws IOException {
101106
if (out.getVersion().onOrAfter(Version.V_7_2_0)) {
102107
indicesOptions.writeIndicesOptions(out);
103108
}
109+
if (out.getVersion().onOrAfter(Version.V_7_16_0)) {
110+
out.writeBoolean(return200ForClusterHealthTimeout);
111+
} else if (return200ForClusterHealthTimeout) {
112+
throw new IllegalArgumentException("Can't fix response code in a cluster involving nodes with version " + out.getVersion());
113+
}
104114
}
105115

106116
@Override
@@ -244,6 +254,18 @@ public Priority waitForEvents() {
244254
return this.waitForEvents;
245255
}
246256

257+
public boolean doesReturn200ForClusterHealthTimeout() {
258+
return return200ForClusterHealthTimeout;
259+
}
260+
261+
/**
262+
* Sets whether to return HTTP 200 status code instead of HTTP 408 in case of a
263+
* cluster health timeout from the server side.
264+
*/
265+
public void return200ForClusterHealthTimeout(boolean return200ForClusterHealthTimeout) {
266+
this.return200ForClusterHealthTimeout = return200ForClusterHealthTimeout;
267+
}
268+
247269
/**
248270
* Set the level of detail for the health information to be returned.
249271
* Only used by the high-level REST Client.

server/src/main/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponse.java

Lines changed: 26 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -8,25 +8,26 @@
88

99
package org.elasticsearch.action.admin.cluster.health;
1010

11+
import org.elasticsearch.Version;
1112
import org.elasticsearch.action.ActionResponse;
1213
import org.elasticsearch.cluster.ClusterState;
1314
import org.elasticsearch.cluster.health.ClusterHealthStatus;
1415
import org.elasticsearch.cluster.health.ClusterIndexHealth;
1516
import org.elasticsearch.cluster.health.ClusterStateHealth;
16-
import org.elasticsearch.common.logging.DeprecationCategory;
17-
import org.elasticsearch.common.logging.DeprecationLogger;
18-
import org.elasticsearch.xcontent.ParseField;
1917
import org.elasticsearch.common.Strings;
2018
import org.elasticsearch.common.io.stream.StreamInput;
2119
import org.elasticsearch.common.io.stream.StreamOutput;
20+
import org.elasticsearch.common.logging.DeprecationCategory;
21+
import org.elasticsearch.common.logging.DeprecationLogger;
22+
import org.elasticsearch.common.xcontent.StatusToXContentObject;
2223
import org.elasticsearch.core.TimeValue;
24+
import org.elasticsearch.rest.RestStatus;
25+
import org.elasticsearch.rest.action.search.RestSearchAction;
2326
import org.elasticsearch.xcontent.ConstructingObjectParser;
2427
import org.elasticsearch.xcontent.ObjectParser;
25-
import org.elasticsearch.common.xcontent.StatusToXContentObject;
28+
import org.elasticsearch.xcontent.ParseField;
2629
import org.elasticsearch.xcontent.XContentBuilder;
2730
import org.elasticsearch.xcontent.XContentParser;
28-
import org.elasticsearch.rest.RestStatus;
29-
import org.elasticsearch.rest.action.search.RestSearchAction;
3031

3132
import java.io.IOException;
3233
import java.util.HashMap;
@@ -102,10 +103,10 @@ public class ClusterHealthResponse extends ActionResponse implements StatusToXCo
102103

103104
private static final ObjectParser.NamedObjectParser<ClusterIndexHealth, Void> INDEX_PARSER =
104105
(XContentParser parser, Void context, String index) -> ClusterIndexHealth.innerFromXContent(parser, index);
105-
private static final String ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY = "es.cluster_health.request_timeout_200";
106+
static final String ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY = "return_200_for_cluster_health_timeout";
106107
static final String CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG = "The HTTP status code for a cluster health timeout " +
107108
"will be changed from 408 to 200 in a future version. Set the [" + ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY + "] " +
108-
"system property to [true] to suppress this message and opt in to the future behaviour now.";
109+
"query parameter to [true] to suppress this message and opt in to the future behaviour now.";
109110

110111
static {
111112
// ClusterStateHealth fields
@@ -138,15 +139,7 @@ public class ClusterHealthResponse extends ActionResponse implements StatusToXCo
138139
private boolean timedOut = false;
139140
private ClusterStateHealth clusterStateHealth;
140141
private ClusterHealthStatus clusterHealthStatus;
141-
private boolean esClusterHealthRequestTimeout200 = readEsClusterHealthRequestTimeout200FromProperty();
142-
143-
public ClusterHealthResponse() {
144-
}
145-
146-
/** For the testing of opting in for the 200 status code without setting a system property */
147-
ClusterHealthResponse(boolean esClusterHealthRequestTimeout200) {
148-
this.esClusterHealthRequestTimeout200 = esClusterHealthRequestTimeout200;
149-
}
142+
private boolean return200ForClusterHealthTimeout;
150143

151144
public ClusterHealthResponse(StreamInput in) throws IOException {
152145
super(in);
@@ -158,22 +151,29 @@ public ClusterHealthResponse(StreamInput in) throws IOException {
158151
numberOfInFlightFetch = in.readInt();
159152
delayedUnassignedShards= in.readInt();
160153
taskMaxWaitingTime = in.readTimeValue();
154+
if (in.getVersion().onOrAfter(Version.V_7_16_0)) {
155+
return200ForClusterHealthTimeout = in.readBoolean();
156+
}
161157
}
162158

163159
/** needed for plugins BWC */
164-
public ClusterHealthResponse(String clusterName, String[] concreteIndices, ClusterState clusterState) {
165-
this(clusterName, concreteIndices, clusterState, -1, -1, -1, TimeValue.timeValueHours(0));
160+
public ClusterHealthResponse(String clusterName, String[] concreteIndices, ClusterState clusterState,
161+
boolean return200ForServerTimeout) {
162+
this(clusterName, concreteIndices, clusterState, -1, -1, -1, TimeValue.timeValueHours(0),
163+
return200ForServerTimeout);
166164
}
167165

168166
public ClusterHealthResponse(String clusterName, String[] concreteIndices, ClusterState clusterState, int numberOfPendingTasks,
169-
int numberOfInFlightFetch, int delayedUnassignedShards, TimeValue taskMaxWaitingTime) {
167+
int numberOfInFlightFetch, int delayedUnassignedShards, TimeValue taskMaxWaitingTime,
168+
boolean return200ForServerTimeout) {
170169
this.clusterName = clusterName;
171170
this.numberOfPendingTasks = numberOfPendingTasks;
172171
this.numberOfInFlightFetch = numberOfInFlightFetch;
173172
this.delayedUnassignedShards = delayedUnassignedShards;
174173
this.taskMaxWaitingTime = taskMaxWaitingTime;
175174
this.clusterStateHealth = new ClusterStateHealth(clusterState, concreteIndices);
176175
this.clusterHealthStatus = clusterStateHealth.getStatus();
176+
this.return200ForClusterHealthTimeout = return200ForServerTimeout;
177177
}
178178

179179
/**
@@ -305,6 +305,11 @@ public void writeTo(StreamOutput out) throws IOException {
305305
out.writeInt(numberOfInFlightFetch);
306306
out.writeInt(delayedUnassignedShards);
307307
out.writeTimeValue(taskMaxWaitingTime);
308+
if (out.getVersion().onOrAfter(Version.V_7_16_0)) {
309+
out.writeBoolean(return200ForClusterHealthTimeout);
310+
} else if (return200ForClusterHealthTimeout) {
311+
throw new IllegalArgumentException("Can't fix response code in a cluster involving nodes with version " + out.getVersion());
312+
}
308313
}
309314

310315
@Override
@@ -317,7 +322,7 @@ public RestStatus status() {
317322
if (isTimedOut() == false) {
318323
return RestStatus.OK;
319324
}
320-
if (esClusterHealthRequestTimeout200) {
325+
if (return200ForClusterHealthTimeout) {
321326
return RestStatus.OK;
322327
} else {
323328
deprecationLogger.critical(DeprecationCategory.API,"cluster_health_request_timeout",
@@ -383,17 +388,4 @@ public int hashCode() {
383388
return Objects.hash(clusterName, numberOfPendingTasks, numberOfInFlightFetch, delayedUnassignedShards, taskMaxWaitingTime,
384389
timedOut, clusterStateHealth, clusterHealthStatus);
385390
}
386-
387-
private static boolean readEsClusterHealthRequestTimeout200FromProperty() {
388-
String property = System.getProperty(ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY);
389-
if (property == null) {
390-
return false;
391-
}
392-
if (Boolean.parseBoolean(property)) {
393-
return true;
394-
} else {
395-
throw new IllegalArgumentException(ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY + " can only be unset or [true] but was ["
396-
+ property + "]");
397-
}
398-
}
399391
}

server/src/main/java/org/elasticsearch/action/admin/cluster/health/TransportClusterHealthAction.java

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@
3030
import org.elasticsearch.cluster.service.ClusterService;
3131
import org.elasticsearch.common.Strings;
3232
import org.elasticsearch.common.inject.Inject;
33-
import org.elasticsearch.core.TimeValue;
3433
import org.elasticsearch.common.util.CollectionUtils;
34+
import org.elasticsearch.core.TimeValue;
3535
import org.elasticsearch.index.IndexNotFoundException;
3636
import org.elasticsearch.node.NodeClosedException;
3737
import org.elasticsearch.tasks.Task;
@@ -232,7 +232,8 @@ private enum TimeoutState {
232232

233233
private ClusterHealthResponse getResponse(final ClusterHealthRequest request, ClusterState clusterState,
234234
final int waitFor, TimeoutState timeoutState) {
235-
ClusterHealthResponse response = clusterHealth(request, clusterState, clusterService.getMasterService().numberOfPendingTasks(),
235+
ClusterHealthResponse response = clusterHealth(request, clusterState,
236+
clusterService.getMasterService().numberOfPendingTasks(),
236237
allocationService.getNumberOfInFlightFetches(), clusterService.getMasterService().getMaxTaskWaitTime());
237238
int readyCounter = prepareResponse(request, response, clusterState, indexNameExpressionResolver);
238239
boolean valid = (readyCounter == waitFor);
@@ -331,8 +332,8 @@ static int prepareResponse(final ClusterHealthRequest request, final ClusterHeal
331332
}
332333

333334

334-
private ClusterHealthResponse clusterHealth(ClusterHealthRequest request, ClusterState clusterState, int numberOfPendingTasks,
335-
int numberOfInFlightFetch, TimeValue pendingTaskTimeInQueue) {
335+
private ClusterHealthResponse clusterHealth(ClusterHealthRequest request, ClusterState clusterState,
336+
int numberOfPendingTasks, int numberOfInFlightFetch, TimeValue pendingTaskTimeInQueue) {
336337
if (logger.isTraceEnabled()) {
337338
logger.trace("Calculating health based on state version [{}]", clusterState.version());
338339
}
@@ -344,12 +345,13 @@ private ClusterHealthResponse clusterHealth(ClusterHealthRequest request, Cluste
344345
// one of the specified indices is not there - treat it as RED.
345346
ClusterHealthResponse response = new ClusterHealthResponse(clusterState.getClusterName().value(), Strings.EMPTY_ARRAY,
346347
clusterState, numberOfPendingTasks, numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(clusterState),
347-
pendingTaskTimeInQueue);
348+
pendingTaskTimeInQueue, request.doesReturn200ForClusterHealthTimeout());
348349
response.setStatus(ClusterHealthStatus.RED);
349350
return response;
350351
}
351352

352-
return new ClusterHealthResponse(clusterState.getClusterName().value(), concreteIndices, clusterState, numberOfPendingTasks,
353-
numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(clusterState), pendingTaskTimeInQueue);
353+
return new ClusterHealthResponse(clusterState.getClusterName().value(), concreteIndices, clusterState,
354+
numberOfPendingTasks, numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(clusterState), pendingTaskTimeInQueue,
355+
request.doesReturn200ForClusterHealthTimeout());
354356
}
355357
}

server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterHealthAction.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ public static ClusterHealthRequest fromRequest(final RestRequest request) {
8383
if (request.param("wait_for_events") != null) {
8484
clusterHealthRequest.waitForEvents(Priority.valueOf(request.param("wait_for_events").toUpperCase(Locale.ROOT)));
8585
}
86+
clusterHealthRequest.return200ForClusterHealthTimeout(request.paramAsBoolean(
87+
"return_200_for_cluster_health_timeout",
88+
clusterHealthRequest.doesReturn200ForClusterHealthTimeout()));
8689
return clusterHealthRequest;
8790
}
8891

server/src/test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthRequestTests.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ public void testSerialize() throws Exception {
4242
assertThat(cloneRequest.waitForEvents(), equalTo(originalRequest.waitForEvents()));
4343
assertIndicesEquals(cloneRequest.indices(), originalRequest.indices());
4444
assertThat(cloneRequest.indicesOptions(), equalTo(originalRequest.indicesOptions()));
45+
assertThat(cloneRequest.doesReturn200ForClusterHealthTimeout(), equalTo(originalRequest.doesReturn200ForClusterHealthTimeout()));
4546
}
4647

4748
public void testRequestReturnsHiddenIndicesByDefault() {

server/src/test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponsesTests.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@
2020
import org.elasticsearch.common.io.stream.Writeable;
2121
import org.elasticsearch.common.settings.Settings;
2222
import org.elasticsearch.core.TimeValue;
23-
import org.elasticsearch.xcontent.ToXContent;
24-
import org.elasticsearch.xcontent.XContentParser;
2523
import org.elasticsearch.rest.RestStatus;
2624
import org.elasticsearch.test.AbstractSerializingTestCase;
25+
import org.elasticsearch.xcontent.ToXContent;
26+
import org.elasticsearch.xcontent.XContentParser;
2727
import org.hamcrest.Matchers;
2828

2929
import java.io.IOException;
@@ -43,7 +43,7 @@ public class ClusterHealthResponsesTests extends AbstractSerializingTestCase<Clu
4343
private final ClusterHealthRequest.Level level = randomFrom(ClusterHealthRequest.Level.values());
4444

4545
public void testIsTimeout() {
46-
ClusterHealthResponse res = new ClusterHealthResponse();
46+
ClusterHealthResponse res = new ClusterHealthResponse("", new String[]{}, ClusterState.EMPTY_STATE, false);
4747
for (int i = 0; i < 5; i++) {
4848
res.setTimedOut(randomBoolean());
4949
if (res.isTimedOut()) {
@@ -56,7 +56,7 @@ public void testIsTimeout() {
5656
}
5757

5858
public void testTimeoutReturns200IfOptedIn() {
59-
ClusterHealthResponse res = new ClusterHealthResponse(true);
59+
ClusterHealthResponse res = new ClusterHealthResponse("", new String[]{}, ClusterState.EMPTY_STATE, true);
6060
for (int i = 0; i < 5; i++) {
6161
res.setTimedOut(randomBoolean());
6262
assertEquals(RestStatus.OK, res.status());
@@ -70,7 +70,7 @@ public void testClusterHealth() throws IOException {
7070
int delayedUnassigned = randomIntBetween(0, 200);
7171
TimeValue pendingTaskInQueueTime = TimeValue.timeValueMillis(randomIntBetween(1000, 100000));
7272
ClusterHealthResponse clusterHealth = new ClusterHealthResponse("bla", new String[] {Metadata.ALL},
73-
clusterState, pendingTasks, inFlight, delayedUnassigned, pendingTaskInQueueTime);
73+
clusterState, pendingTasks, inFlight, delayedUnassigned, pendingTaskInQueueTime, false);
7474
clusterHealth = maybeSerialize(clusterHealth);
7575
assertClusterHealth(clusterHealth);
7676
assertThat(clusterHealth.getNumberOfPendingTasks(), Matchers.equalTo(pendingTasks));

0 commit comments

Comments
 (0)