Skip to content

Commit 417bd0c

Browse files
author
Ali Beyad
committed
Index creation does not cause the cluster health to go RED
Previously, index creation would momentarily cause the cluster health to go RED, because the primaries were still being assigned and activated. This commit ensures that when an index is created or an index is being recovered during cluster recovery and it does not have any active allocation ids, then the cluster health status will not go RED, but instead be YELLOW. Relates elastic#9126
1 parent 47bd2f9 commit 417bd0c

File tree

9 files changed

+555
-28
lines changed

9 files changed

+555
-28
lines changed

core/src/main/java/org/elasticsearch/action/admin/indices/shards/TransportIndicesShardStoresAction.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.elasticsearch.cluster.block.ClusterBlockLevel;
2929
import org.elasticsearch.cluster.health.ClusterHealthStatus;
3030
import org.elasticsearch.cluster.health.ClusterShardHealth;
31+
import org.elasticsearch.cluster.metadata.IndexMetaData;
3132
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
3233
import org.elasticsearch.cluster.node.DiscoveryNode;
3334
import org.elasticsearch.cluster.node.DiscoveryNodes;
@@ -93,12 +94,16 @@ protected void masterOperation(IndicesShardStoresRequest request, ClusterState s
9394
logger.trace("using cluster state version [{}] to determine shards", state.version());
9495
// collect relevant shard ids of the requested indices for fetching store infos
9596
for (String index : concreteIndices) {
97+
IndexMetaData indexMetaData = state.metaData().index(index);
9698
IndexRoutingTable indexShardRoutingTables = routingTables.index(index);
9799
if (indexShardRoutingTables == null) {
98100
continue;
99101
}
100102
for (IndexShardRoutingTable routing : indexShardRoutingTables) {
101-
ClusterShardHealth shardHealth = new ClusterShardHealth(routing.shardId().id(), routing);
103+
final int shardId = routing.shardId().id();
104+
ClusterShardHealth shardHealth = new ClusterShardHealth(shardId,
105+
routing,
106+
indexMetaData.activeAllocationIds(shardId).isEmpty());
102107
if (request.shardStatuses().contains(shardHealth.getStatus())) {
103108
shardIdsToFetch.add(routing.shardId());
104109
}

core/src/main/java/org/elasticsearch/cluster/health/ClusterIndexHealth.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ public ClusterIndexHealth(final IndexMetaData indexMetaData, final IndexRoutingT
5454

5555
for (IndexShardRoutingTable shardRoutingTable : indexRoutingTable) {
5656
int shardId = shardRoutingTable.shardId().id();
57-
shards.put(shardId, new ClusterShardHealth(shardId, shardRoutingTable));
57+
shards.put(shardId, new ClusterShardHealth(shardId, shardRoutingTable, indexMetaData.activeAllocationIds(shardId).isEmpty()));
5858
}
5959

6060
// update the index status

core/src/main/java/org/elasticsearch/cluster/health/ClusterShardHealth.java

+7-8
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
2323
import org.elasticsearch.cluster.routing.ShardRouting;
24+
import org.elasticsearch.cluster.routing.UnassignedInfo;
2425
import org.elasticsearch.common.io.stream.StreamInput;
2526
import org.elasticsearch.common.io.stream.StreamOutput;
2627
import org.elasticsearch.common.io.stream.Writeable;
@@ -37,45 +38,42 @@ public final class ClusterShardHealth implements Writeable {
3738
private final int unassignedShards;
3839
private final boolean primaryActive;
3940

40-
public ClusterShardHealth(final int shardId, final IndexShardRoutingTable shardRoutingTable) {
41+
public ClusterShardHealth(final int shardId, final IndexShardRoutingTable shardRoutingTable, final boolean noActiveAllocationIds) {
4142
this.shardId = shardId;
4243
int computeActiveShards = 0;
4344
int computeRelocatingShards = 0;
4445
int computeInitializingShards = 0;
4546
int computeUnassignedShards = 0;
46-
boolean computePrimaryActive = false;
4747
for (ShardRouting shardRouting : shardRoutingTable) {
4848
if (shardRouting.active()) {
4949
computeActiveShards++;
5050
if (shardRouting.relocating()) {
5151
// the shard is relocating, the one it is relocating to will be in initializing state, so we don't count it
5252
computeRelocatingShards++;
5353
}
54-
if (shardRouting.primary()) {
55-
computePrimaryActive = true;
56-
}
5754
} else if (shardRouting.initializing()) {
5855
computeInitializingShards++;
5956
} else if (shardRouting.unassigned()) {
6057
computeUnassignedShards++;
6158
}
6259
}
6360
ClusterHealthStatus computeStatus;
64-
if (computePrimaryActive) {
61+
final ShardRouting primaryRouting = shardRoutingTable.primaryShard();
62+
if (primaryRouting.active()) {
6563
if (computeActiveShards == shardRoutingTable.size()) {
6664
computeStatus = ClusterHealthStatus.GREEN;
6765
} else {
6866
computeStatus = ClusterHealthStatus.YELLOW;
6967
}
7068
} else {
71-
computeStatus = ClusterHealthStatus.RED;
69+
computeStatus = UnassignedInfo.unassignedPrimaryShardHealth(primaryRouting.unassignedInfo(), noActiveAllocationIds);
7270
}
7371
this.status = computeStatus;
7472
this.activeShards = computeActiveShards;
7573
this.relocatingShards = computeRelocatingShards;
7674
this.initializingShards = computeInitializingShards;
7775
this.unassignedShards = computeUnassignedShards;
78-
this.primaryActive = computePrimaryActive;
76+
this.primaryActive = primaryRouting.active();
7977
}
8078

8179
public ClusterShardHealth(final StreamInput in) throws IOException {
@@ -126,4 +124,5 @@ public void writeTo(final StreamOutput out) throws IOException {
126124
out.writeVInt(unassignedShards);
127125
out.writeBoolean(primaryActive);
128126
}
127+
129128
}

core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java

+31
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import org.elasticsearch.ExceptionsHelper;
2323
import org.elasticsearch.cluster.ClusterState;
24+
import org.elasticsearch.cluster.health.ClusterHealthStatus;
2425
import org.elasticsearch.cluster.metadata.MetaData;
2526
import org.elasticsearch.common.Nullable;
2627
import org.elasticsearch.common.io.stream.StreamInput;
@@ -289,6 +290,35 @@ public static long findNextDelayedAllocation(long currentNanoTime, ClusterState
289290
return nextDelayNanos == Long.MAX_VALUE ? -1L : nextDelayNanos;
290291
}
291292

293+
/**
294+
* Returns the appropriate {@link ClusterHealthStatus} for an unassigned primary shard
295+
* based on its unassigned information and allocation id history.
296+
*/
297+
public static ClusterHealthStatus unassignedPrimaryShardHealth(final UnassignedInfo unassignedInfo,
298+
final boolean noActiveAllocationIds) {
299+
assert unassignedInfo != null;
300+
final UnassignedInfo.Reason reason = unassignedInfo.getReason();
301+
/**
302+
* Normally, an inactive primary shard in an index should cause the cluster health to be RED. However,
303+
* there are exceptions where a health status of RED is inappropriate, namely in these scenarios:
304+
* 1. Index Creation. When an index is first created, the primary shards are in the initializing state, so
305+
* there is a small window where the cluster health is RED due to the primaries not being activated yet.
306+
* However, this leads to a false sense that the cluster is in an unhealthy state, when in reality, its
307+
* simply a case of needing to wait for the primaries to initialize.
308+
* 2. When a cluster is in the recovery state, and the shard never had any allocation ids assigned to it,
309+
* which indicates the index was created and before allocation of the primary occurred for this shard,
310+
* a cluster restart happened.
311+
*
312+
* Here, we check for these scenarios and set the cluster health to YELLOW if any are applicable.
313+
*/
314+
if (reason == UnassignedInfo.Reason.INDEX_CREATED
315+
|| (reason == UnassignedInfo.Reason.CLUSTER_RECOVERED && noActiveAllocationIds)) {
316+
return ClusterHealthStatus.YELLOW;
317+
} else {
318+
return ClusterHealthStatus.RED;
319+
}
320+
}
321+
292322
public String shortSummary() {
293323
StringBuilder sb = new StringBuilder();
294324
sb.append("[reason=").append(reason).append("]");
@@ -366,4 +396,5 @@ public int hashCode() {
366396
result = 31 * result + (failure != null ? failure.hashCode() : 0);
367397
return result;
368398
}
399+
369400
}

core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ThrottlingAllocationDecider.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ public class ThrottlingAllocationDecider extends AllocationDecider {
5959
Integer.toString(DEFAULT_CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_RECOVERIES),
6060
(s) -> Setting.parseInt(s, 0, "cluster.routing.allocation.node_concurrent_recoveries"),
6161
Property.Dynamic, Property.NodeScope);
62-
public static final Setting<Integer> CLUSTER_ROUTING_ALLOCATION_NODE_INITIAL_PRIMARIES_RECOVERIES_SETTING =
62+
public static final Setting<Integer> CLUSTER_ROUTING_ALLOCATION_NODE_INITIAL_PRIMARIES_RECOVERIES_SETTING =
6363
Setting.intSetting("cluster.routing.allocation.node_initial_primaries_recoveries",
6464
DEFAULT_CLUSTER_ROUTING_ALLOCATION_NODE_INITIAL_PRIMARIES_RECOVERIES, 0,
6565
Property.Dynamic, Property.NodeScope);

core/src/test/java/org/elasticsearch/cluster/ClusterHealthIT.java

+120-4
Original file line numberDiff line numberDiff line change
@@ -20,28 +20,63 @@
2020
package org.elasticsearch.cluster;
2121

2222
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
23+
import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
24+
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
2325
import org.elasticsearch.cluster.health.ClusterHealthStatus;
26+
import org.elasticsearch.cluster.metadata.IndexMetaData;
27+
import org.elasticsearch.cluster.routing.ShardRouting;
28+
import org.elasticsearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider;
29+
import org.elasticsearch.cluster.service.ClusterService;
2430
import org.elasticsearch.common.Priority;
31+
import org.elasticsearch.common.settings.Settings;
32+
import org.elasticsearch.common.unit.TimeValue;
2533
import org.elasticsearch.test.ESIntegTestCase;
34+
import org.elasticsearch.threadpool.ThreadPool;
35+
import org.junit.AfterClass;
36+
import org.junit.BeforeClass;
37+
38+
import java.util.concurrent.TimeUnit;
2639

2740
import static org.hamcrest.Matchers.equalTo;
2841

2942
public class ClusterHealthIT extends ESIntegTestCase {
43+
44+
private static ThreadPool threadPool;
45+
46+
@BeforeClass
47+
public static void customBeforeClass() throws Exception {
48+
threadPool = new ThreadPool("ClusterHealthIT");
49+
}
50+
51+
@AfterClass
52+
public static void customAfterClass() throws Exception {
53+
ThreadPool.terminate(threadPool, 10L, TimeUnit.SECONDS);
54+
threadPool = null;
55+
}
56+
3057
public void testSimpleLocalHealth() {
3158
createIndex("test");
3259
ensureGreen(); // master should thing it's green now.
3360

3461
for (String node : internalCluster().getNodeNames()) {
3562
// a very high time out, which should never fire due to the local flag
36-
ClusterHealthResponse health = client(node).admin().cluster().prepareHealth().setLocal(true).setWaitForEvents(Priority.LANGUID).setTimeout("30s").get("10s");
63+
ClusterHealthResponse health = client(node).admin().cluster().prepareHealth()
64+
.setLocal(true)
65+
.setWaitForEvents(Priority.LANGUID)
66+
.setTimeout("30s")
67+
.get("10s");
3768
assertThat(health.getStatus(), equalTo(ClusterHealthStatus.GREEN));
3869
assertThat(health.isTimedOut(), equalTo(false));
3970
}
4071
}
4172

4273
public void testHealth() {
4374
logger.info("--> running cluster health on an index that does not exists");
44-
ClusterHealthResponse healthResponse = client().admin().cluster().prepareHealth("test1").setWaitForYellowStatus().setTimeout("1s").execute().actionGet();
75+
ClusterHealthResponse healthResponse = client().admin().cluster().prepareHealth("test1")
76+
.setWaitForYellowStatus()
77+
.setTimeout("1s")
78+
.execute()
79+
.actionGet();
4580
assertThat(healthResponse.isTimedOut(), equalTo(true));
4681
assertThat(healthResponse.getStatus(), equalTo(ClusterHealthStatus.RED));
4782
assertThat(healthResponse.getIndices().isEmpty(), equalTo(true));
@@ -62,10 +97,91 @@ public void testHealth() {
6297
assertThat(healthResponse.getIndices().get("test1").getStatus(), equalTo(ClusterHealthStatus.GREEN));
6398

6499
logger.info("--> running cluster health on an index that does exists and an index that doesn't exists");
65-
healthResponse = client().admin().cluster().prepareHealth("test1", "test2").setWaitForYellowStatus().setTimeout("1s").execute().actionGet();
100+
healthResponse = client().admin().cluster().prepareHealth("test1", "test2")
101+
.setWaitForYellowStatus()
102+
.setTimeout("1s")
103+
.execute()
104+
.actionGet();
66105
assertThat(healthResponse.isTimedOut(), equalTo(true));
67106
assertThat(healthResponse.getStatus(), equalTo(ClusterHealthStatus.RED));
68107
assertThat(healthResponse.getIndices().get("test1").getStatus(), equalTo(ClusterHealthStatus.GREEN));
69108
assertThat(healthResponse.getIndices().size(), equalTo(1));
70109
}
71-
}
110+
111+
public void testHealthOnIndexCreation() throws Exception {
112+
final int numNodes = randomIntBetween(2, 5);
113+
logger.info("--> starting {} nodes", numNodes);
114+
final Settings nodeSettings = Settings.builder().put(
115+
ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_INITIAL_PRIMARIES_RECOVERIES_SETTING.getKey(), numNodes
116+
).build();
117+
internalCluster().ensureAtLeastNumDataNodes(numNodes, nodeSettings);
118+
119+
ClusterHealthResponse healthResponse = client().admin().cluster()
120+
.prepareHealth()
121+
.setWaitForGreenStatus()
122+
.setTimeout("10s")
123+
.execute()
124+
.actionGet();
125+
assertThat(healthResponse.isTimedOut(), equalTo(false));
126+
assertThat(healthResponse.getStatus(), equalTo(ClusterHealthStatus.GREEN));
127+
128+
// first, register a cluster state observer that checks cluster health
129+
// upon index creation in the cluster state
130+
final String masterNode = internalCluster().getMasterName();
131+
final String indexName = "test-idx";
132+
final ClusterService clusterService = internalCluster().clusterService(masterNode);
133+
final ClusterStateObserver observer = new ClusterStateObserver(clusterService, logger, threadPool.getThreadContext());
134+
final ClusterStateObserver.ChangePredicate validationPredicate = new ClusterStateObserver.ValidationPredicate() {
135+
@Override
136+
protected boolean validate(ClusterState newState) {
137+
return newState.status() == ClusterState.ClusterStateStatus.APPLIED
138+
&& newState.metaData().hasIndex(indexName);
139+
}
140+
};
141+
142+
final ClusterStateObserver.Listener stateListener = new ClusterStateObserver.Listener() {
143+
@Override
144+
public void onNewClusterState(ClusterState clusterState) {
145+
// make sure we have inactive primaries
146+
// see if we can terminate observing on the cluster state
147+
final ClusterStateResponse csResponse = client().admin().cluster().prepareState().execute().actionGet();
148+
boolean inactivePrimaries = false;
149+
for (ShardRouting shardRouting : csResponse.getState().routingTable().allShards(indexName)) {
150+
if (shardRouting.primary() == false) {
151+
continue;
152+
}
153+
if (shardRouting.active() == false) {
154+
inactivePrimaries = true;
155+
break;
156+
}
157+
}
158+
assertTrue(inactivePrimaries);
159+
// verify cluster health is YELLOW (even though primaries are still being allocated)
160+
final ClusterHealthResponse response = client().admin().cluster().prepareHealth(indexName).get();
161+
assertThat(response.getStatus(), equalTo(ClusterHealthStatus.YELLOW));
162+
}
163+
@Override
164+
public void onClusterServiceClose() {
165+
fail("cluster service should not have closed");
166+
}
167+
@Override
168+
public void onTimeout(TimeValue timeout) {
169+
fail("timeout on cluster state observer");
170+
}
171+
};
172+
observer.waitForNextChange(stateListener, validationPredicate, TimeValue.timeValueSeconds(30L));
173+
final Settings settings = Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, numNodes)
174+
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, numNodes - 1)
175+
.build();
176+
CreateIndexResponse response = client().admin().indices().prepareCreate(indexName)
177+
.setSettings(settings)
178+
.execute()
179+
.actionGet();
180+
assertTrue(response.isAcknowledged());
181+
182+
// now, make sure we eventually get to the green state,
183+
// we have at least two nodes so this should happen
184+
ensureGreen(indexName);
185+
}
186+
187+
}

0 commit comments

Comments
 (0)