Skip to content

Commit b31fa7e

Browse files
authored
Merge pull request #7 from gmarouli/disk-indicator-review
Disk indicator review
2 parents c0c284c + e96ffa7 commit b31fa7e

File tree

2 files changed

+41
-35
lines changed

2 files changed

+41
-35
lines changed

server/src/main/java/org/elasticsearch/cluster/coordination/StableMasterHealthIndicatorService.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ public class StableMasterHealthIndicatorService implements HealthIndicatorServic
7272
/**
7373
* This is the list of the impacts to be reported when the master node is determined to be unstable.
7474
*/
75-
public static final List<HealthIndicatorImpact> UNSTABLE_MASTER_IMPACTS = List.of(
75+
private static final List<HealthIndicatorImpact> UNSTABLE_MASTER_IMPACTS = List.of(
7676
new HealthIndicatorImpact(1, UNSTABLE_MASTER_INGEST_IMPACT, List.of(ImpactArea.INGEST)),
7777
new HealthIndicatorImpact(1, UNSTABLE_MASTER_DEPLOYMENT_MANAGEMENT_IMPACT, List.of(ImpactArea.DEPLOYMENT_MANAGEMENT)),
7878
new HealthIndicatorImpact(3, UNSTABLE_MASTER_BACKUP_IMPACT, List.of(ImpactArea.BACKUP))

server/src/main/java/org/elasticsearch/health/node/DiskHealthIndicatorService.java

+40-34
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import org.apache.logging.log4j.LogManager;
1212
import org.apache.logging.log4j.Logger;
13+
import org.elasticsearch.cluster.ClusterState;
1314
import org.elasticsearch.cluster.metadata.IndexMetadata;
1415
import org.elasticsearch.cluster.node.DiscoveryNode;
1516
import org.elasticsearch.cluster.node.DiscoveryNodeRole;
@@ -29,7 +30,6 @@
2930
import java.util.List;
3031
import java.util.Locale;
3132
import java.util.Map;
32-
import java.util.Objects;
3333
import java.util.Set;
3434
import java.util.stream.Collectors;
3535
import java.util.stream.Stream;
@@ -62,17 +62,17 @@ public HealthIndicatorResult calculate(boolean explain, HealthInfo healthInfo) {
6262
Collections.emptyList()
6363
);
6464
}
65-
Set<String> indicesWithBlock = clusterService.state()
66-
.blocks()
65+
ClusterState clusterState = clusterService.state();
66+
Set<String> indicesWithBlock = clusterState.blocks()
6767
.indices()
6868
.entrySet()
6969
.stream()
7070
.filter(entry -> entry.getValue().contains(IndexMetadata.INDEX_READ_ONLY_ALLOW_DELETE_BLOCK))
7171
.map(Map.Entry::getKey)
7272
.collect(Collectors.toSet());
7373
boolean hasAtLeastOneIndexReadOnlyAllowDeleteBlock = indicesWithBlock.isEmpty() == false;
74-
logMissingHealthInfoData(diskHealthInfoMap);
75-
HealthIndicatorDetails details = getDetails(explain, diskHealthInfoMap);
74+
logMissingHealthInfoData(diskHealthInfoMap, clusterState);
75+
HealthIndicatorDetails details = getDetails(explain, diskHealthInfoMap, clusterState);
7676
final HealthStatus healthStatusFromNodes = HealthStatus.merge(
7777
diskHealthInfoMap.values().stream().map(DiskHealthInfo::healthStatus)
7878
);
@@ -93,26 +93,32 @@ public HealthIndicatorResult calculate(boolean explain, HealthInfo healthInfo) {
9393
.filter(entry -> HealthStatus.RED.equals(entry.getValue().healthStatus()))
9494
.map(Map.Entry::getKey)
9595
.collect(Collectors.toSet());
96-
Set<DiscoveryNodeRole> rolesOnRedNodes = getRolesOnNodes(nodesReportingRed);
96+
Set<DiscoveryNodeRole> rolesOnRedNodes = getRolesOnNodes(nodesReportingRed, clusterState);
9797
if (hasAtLeastOneIndexReadOnlyAllowDeleteBlock || rolesOnRedNodes.stream().anyMatch(DiscoveryNodeRole::canContainData)) {
9898
healthIndicatorResult = getResultForRedIndicesOrDataNodes(
9999
nodesReportingRed,
100-
Stream.concat(indicesWithBlock.stream(), getIndicesForNodes(nodesReportingRed).stream())
100+
Stream.concat(indicesWithBlock.stream(), getIndicesForNodes(nodesReportingRed, clusterState).stream())
101101
.collect(Collectors.toSet()),
102102
true,
103103
details,
104104
healthStatus
105105
);
106106
} else {
107-
healthIndicatorResult = getResultForNonDataNodeProblem(rolesOnRedNodes, nodesReportingRed, details, healthStatus);
107+
healthIndicatorResult = getResultForNonDataNodeProblem(
108+
rolesOnRedNodes,
109+
nodesReportingRed,
110+
details,
111+
healthStatus,
112+
clusterState
113+
);
108114
}
109115
} else {
110116
final Set<String> nodesReportingYellow = diskHealthInfoMap.entrySet()
111117
.stream()
112118
.filter(entry -> HealthStatus.YELLOW.equals(entry.getValue().healthStatus()))
113119
.map(Map.Entry::getKey)
114120
.collect(Collectors.toSet());
115-
Set<DiscoveryNodeRole> rolesOnYellowNodes = getRolesOnNodes(nodesReportingYellow);
121+
Set<DiscoveryNodeRole> rolesOnYellowNodes = getRolesOnNodes(nodesReportingYellow, clusterState);
116122
if (hasAtLeastOneIndexReadOnlyAllowDeleteBlock) {
117123
healthIndicatorResult = getResultForRedIndicesOrDataNodes(
118124
nodesReportingYellow,
@@ -122,9 +128,15 @@ public HealthIndicatorResult calculate(boolean explain, HealthInfo healthInfo) {
122128
healthStatus
123129
);
124130
} else if (rolesOnYellowNodes.stream().anyMatch(DiscoveryNodeRole::canContainData)) {
125-
healthIndicatorResult = getResultForYellowDataNodes(nodesReportingYellow, details, healthStatus);
131+
healthIndicatorResult = getResultForYellowDataNodes(nodesReportingYellow, details, healthStatus, clusterState);
126132
} else {
127-
healthIndicatorResult = getResultForNonDataNodeProblem(rolesOnYellowNodes, nodesReportingYellow, details, healthStatus);
133+
healthIndicatorResult = getResultForNonDataNodeProblem(
134+
rolesOnYellowNodes,
135+
nodesReportingYellow,
136+
details,
137+
healthStatus,
138+
clusterState
139+
);
128140
}
129141
}
130142
}
@@ -135,14 +147,14 @@ private HealthIndicatorResult getResultForNonDataNodeProblem(
135147
Set<DiscoveryNodeRole> roles,
136148
Set<String> problemNodes,
137149
HealthIndicatorDetails details,
138-
HealthStatus status
150+
HealthStatus status,
151+
ClusterState clusterState
139152
) {
140153
String symptom;
141154
final List<HealthIndicatorImpact> impacts;
142155
final List<Diagnosis> diagnosisList;
143156
if (roles.contains(DiscoveryNodeRole.MASTER_ROLE)) {
144-
Set<DiscoveryNode> problemMasterNodes = clusterService.state()
145-
.nodes()
157+
Set<DiscoveryNode> problemMasterNodes = clusterState.nodes()
146158
.getNodes()
147159
.values()
148160
.stream()
@@ -259,9 +271,10 @@ public HealthIndicatorResult getResultForRedIndicesOrDataNodes(
259271
public HealthIndicatorResult getResultForYellowDataNodes(
260272
Set<String> problemNodes,
261273
HealthIndicatorDetails details,
262-
HealthStatus status
274+
HealthStatus status,
275+
ClusterState clusterState
263276
) {
264-
final Set<String> problemIndices = getIndicesForNodes(problemNodes);
277+
final Set<String> problemIndices = getIndicesForNodes(problemNodes, clusterState);
265278
final String symptom = String.format(
266279
Locale.ROOT,
267280
"%d data node%s increased disk usage. As a result %d %s at risk of not being able to process any more " + "updates.",
@@ -298,9 +311,8 @@ public HealthIndicatorResult getResultForYellowDataNodes(
298311
return createIndicator(status, symptom, details, impacts, diagnosisList);
299312
}
300313

301-
private Set<DiscoveryNodeRole> getRolesOnNodes(Set<String> nodeIds) {
302-
return clusterService.state()
303-
.nodes()
314+
private Set<DiscoveryNodeRole> getRolesOnNodes(Set<String> nodeIds, ClusterState clusterState) {
315+
return clusterState.nodes()
304316
.getNodes()
305317
.values()
306318
.stream()
@@ -310,9 +322,8 @@ private Set<DiscoveryNodeRole> getRolesOnNodes(Set<String> nodeIds) {
310322
.collect(Collectors.toSet());
311323
}
312324

313-
private Set<String> getIndicesForNodes(Set<String> nodes) {
314-
return clusterService.state()
315-
.routingTable()
325+
private Set<String> getIndicesForNodes(Set<String> nodes, ClusterState clusterState) {
326+
return clusterState.routingTable()
316327
.allShards()
317328
.stream()
318329
.filter(routing -> nodes.contains(routing.currentNodeId()))
@@ -325,9 +336,9 @@ private Set<String> getIndicesForNodes(Set<String> nodes) {
325336
* not ordinarly important, but could be useful in tracking down problems where nodes have stopped reporting health node information.
326337
* @param diskHealthInfoMap A map of nodeId to DiskHealthInfo
327338
*/
328-
private void logMissingHealthInfoData(Map<String, DiskHealthInfo> diskHealthInfoMap) {
339+
private void logMissingHealthInfoData(Map<String, DiskHealthInfo> diskHealthInfoMap, ClusterState clusterState) {
329340
if (logger.isDebugEnabled()) {
330-
Set<DiscoveryNode> nodesInClusterState = new HashSet<>(clusterService.state().nodes());
341+
Set<DiscoveryNode> nodesInClusterState = new HashSet<>(clusterState.nodes());
331342
Set<String> nodeIdsInClusterState = nodesInClusterState.stream().map(DiscoveryNode::getId).collect(Collectors.toSet());
332343
Set<String> nodeIdsInHealthInfo = diskHealthInfoMap.keySet();
333344
if (nodeIdsInHealthInfo.containsAll(nodeIdsInClusterState) == false) {
@@ -340,7 +351,7 @@ private void logMissingHealthInfoData(Map<String, DiskHealthInfo> diskHealthInfo
340351
}
341352
}
342353

343-
private HealthIndicatorDetails getDetails(boolean explain, Map<String, DiskHealthInfo> diskHealthInfoMap) {
354+
private HealthIndicatorDetails getDetails(boolean explain, Map<String, DiskHealthInfo> diskHealthInfoMap, ClusterState clusterState) {
344355
if (explain == false) {
345356
return HealthIndicatorDetails.EMPTY;
346357
}
@@ -351,7 +362,7 @@ private HealthIndicatorDetails getDetails(boolean explain, Map<String, DiskHealt
351362
builder.startObject();
352363
String nodeId = entry.getKey();
353364
builder.field("node_id", nodeId);
354-
String nodeName = getNameForNodeId(nodeId);
365+
String nodeName = getNameForNodeId(nodeId, clusterState);
355366
if (nodeName != null) {
356367
builder.field("name", nodeName);
357368
}
@@ -374,13 +385,8 @@ private HealthIndicatorDetails getDetails(boolean explain, Map<String, DiskHealt
374385
* @return The current name of the node, or null if the node is not in the cluster state or does not have a name
375386
*/
376387
@Nullable
377-
private String getNameForNodeId(String nodeId) {
378-
DiscoveryNode node = clusterService.state().nodes().get(nodeId);
379-
if (node == null) {
380-
return null;
381-
} else {
382-
String nodeName = node.getName();
383-
return Objects.requireNonNullElse(nodeName, null);
384-
}
388+
private String getNameForNodeId(String nodeId, ClusterState clusterState) {
389+
DiscoveryNode node = clusterState.nodes().get(nodeId);
390+
return node == null ? null : node.getName();
385391
}
386392
}

0 commit comments

Comments
 (0)