Skip to content

Commit 97df136

Browse files
Optimize log cluster health performance. (#87723)
Optimize log cluster health performance by not building cluster health instances needlessly. relates #77466
1 parent b672194 commit 97df136

File tree

3 files changed

+56
-10
lines changed

3 files changed

+56
-10
lines changed

docs/changelog/87723.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 87723
2+
summary: Optimize log cluster health performance.
3+
area: Allocation
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/cluster/routing/IndexRoutingTable.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,18 +61,21 @@ public class IndexRoutingTable implements SimpleDiffable<IndexRoutingTable> {
6161
// note, we assume that when the index routing is created, ShardRoutings are created for all possible number of
6262
// shards with state set to UNASSIGNED
6363
private final IndexShardRoutingTable[] shards;
64-
64+
private final boolean allShardsActive;
6565
private final List<ShardRouting> allActiveShards;
6666

6767
IndexRoutingTable(Index index, IndexShardRoutingTable[] shards) {
6868
this.index = index;
6969
this.shuffler = new RotationShardShuffler(Randomness.get().nextInt());
7070
this.shards = shards;
71+
int totalShardCount = 0;
7172
List<ShardRouting> allActiveShards = new ArrayList<>();
7273
for (IndexShardRoutingTable shard : shards) {
7374
allActiveShards.addAll(shard.activeShards());
75+
totalShardCount += shard.size();
7476
}
7577
this.allActiveShards = CollectionUtils.wrapUnmodifiableOrEmptySingleton(allActiveShards);
78+
this.allShardsActive = totalShardCount == allActiveShards.size();
7679
}
7780

7881
/**
@@ -217,6 +220,10 @@ public boolean allPrimaryShardsActive() {
217220
return primaryShardsActive() == shards.length;
218221
}
219222

223+
public boolean allShardsActive() {
224+
return this.allShardsActive;
225+
}
226+
220227
/**
221228
* Calculates the number of primary shards in active state in routing table
222229
*

server/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,14 @@
1414
import org.elasticsearch.cluster.ClusterState;
1515
import org.elasticsearch.cluster.RestoreInProgress;
1616
import org.elasticsearch.cluster.health.ClusterHealthStatus;
17-
import org.elasticsearch.cluster.health.ClusterStateHealth;
1817
import org.elasticsearch.cluster.metadata.AutoExpandReplicas;
1918
import org.elasticsearch.cluster.metadata.IndexMetadata;
2019
import org.elasticsearch.cluster.metadata.Metadata;
2120
import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata;
2221
import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata.Type;
2322
import org.elasticsearch.cluster.node.DiscoveryNode;
23+
import org.elasticsearch.cluster.routing.IndexRoutingTable;
24+
import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
2425
import org.elasticsearch.cluster.routing.RerouteService;
2526
import org.elasticsearch.cluster.routing.RoutingNode;
2627
import org.elasticsearch.cluster.routing.RoutingNodes;
@@ -39,6 +40,7 @@
3940
import org.elasticsearch.common.util.set.Sets;
4041
import org.elasticsearch.gateway.GatewayAllocator;
4142
import org.elasticsearch.gateway.PriorityComparator;
43+
import org.elasticsearch.rest.RestStatus;
4244
import org.elasticsearch.snapshots.SnapshotsInfoService;
4345

4446
import java.util.ArrayList;
@@ -55,6 +57,7 @@
5557

5658
import static java.util.Collections.emptyList;
5759
import static java.util.Collections.singletonList;
60+
import static org.elasticsearch.cluster.health.ClusterShardHealth.getInactivePrimaryHealth;
5861
import static org.elasticsearch.cluster.routing.UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING;
5962

6063
/**
@@ -168,7 +171,7 @@ private static ClusterState buildResultAndLogHealthChange(ClusterState oldState,
168171
}
169172
final ClusterState newState = newStateBuilder.build();
170173

171-
logClusterHealthStateChange(new ClusterStateHealth(oldState), new ClusterStateHealth(newState), reason);
174+
logClusterHealthStateChange(oldState, newState, reason);
172175

173176
return newState;
174177
}
@@ -495,13 +498,10 @@ public ClusterState reroute(ClusterState clusterState, String reason) {
495498
return buildResultAndLogHealthChange(clusterState, allocation, reason);
496499
}
497500

498-
private static void logClusterHealthStateChange(
499-
ClusterStateHealth previousStateHealth,
500-
ClusterStateHealth newStateHealth,
501-
String reason
502-
) {
503-
ClusterHealthStatus previousHealth = previousStateHealth.getStatus();
504-
ClusterHealthStatus currentHealth = newStateHealth.getStatus();
501+
private static void logClusterHealthStateChange(final ClusterState previousState, final ClusterState newState, String reason) {
502+
ClusterHealthStatus previousHealth = getHealthStatus(previousState);
503+
ClusterHealthStatus currentHealth = getHealthStatus(newState);
504+
505505
if (previousHealth.equals(currentHealth) == false) {
506506
logger.info(
507507
new ESLogMessage("Cluster health status changed from [{}] to [{}] (reason: [{}]).").argAndField(
@@ -513,6 +513,40 @@ private static void logClusterHealthStateChange(
513513
}
514514
}
515515

516+
public static ClusterHealthStatus getHealthStatus(final ClusterState clusterState) {
517+
if (clusterState.blocks().hasGlobalBlockWithStatus(RestStatus.SERVICE_UNAVAILABLE)) {
518+
return ClusterHealthStatus.RED;
519+
}
520+
521+
ClusterHealthStatus computeStatus = ClusterHealthStatus.GREEN;
522+
for (String index : clusterState.metadata().getConcreteAllIndices()) {
523+
IndexRoutingTable indexRoutingTable = clusterState.routingTable().index(index);
524+
if (indexRoutingTable == null) {
525+
continue;
526+
}
527+
if (indexRoutingTable.allShardsActive()) {
528+
// GREEN index
529+
continue;
530+
}
531+
532+
for (int i = 0; i < indexRoutingTable.size(); i++) {
533+
IndexShardRoutingTable indexShardRoutingTable = indexRoutingTable.shard(i);
534+
ShardRouting primary = indexShardRoutingTable.primaryShard();
535+
if (primary.active()) {
536+
// index has inactive replicas
537+
computeStatus = ClusterHealthStatus.YELLOW;
538+
continue;
539+
}
540+
computeStatus = getInactivePrimaryHealth(primary);
541+
if (computeStatus == ClusterHealthStatus.RED) {
542+
logger.debug("One of inactive primary shard {} causes cluster state RED.", primary.shardId());
543+
return ClusterHealthStatus.RED;
544+
}
545+
}
546+
}
547+
return computeStatus;
548+
}
549+
516550
private static boolean hasDeadNodes(RoutingAllocation allocation) {
517551
for (RoutingNode routingNode : allocation.routingNodes()) {
518552
if (allocation.nodes().getDataNodes().containsKey(routingNode.nodeId()) == false) {

0 commit comments

Comments
 (0)