Skip to content

Commit 420825c

Browse files
committed
Strengthen validateClusterFormed check (#49248)
Strengthens the validateClusterFormed check that is used by the test infrastructure to make sure that nodes are properly connected and know about each other. Is used in situations where the cluster is scaled up and down, and where there previously was a network disruption that has been healed. Closes #49243
1 parent d9835f7 commit 420825c

File tree

2 files changed

+25
-26
lines changed

2 files changed

+25
-26
lines changed

test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -161,9 +161,9 @@
161161
import static org.hamcrest.Matchers.equalTo;
162162
import static org.hamcrest.Matchers.greaterThan;
163163
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
164-
import static org.junit.Assert.assertEquals;
165164
import static org.hamcrest.Matchers.not;
166165
import static org.hamcrest.Matchers.nullValue;
166+
import static org.junit.Assert.assertEquals;
167167
import static org.junit.Assert.assertFalse;
168168
import static org.junit.Assert.assertThat;
169169
import static org.junit.Assert.assertTrue;
@@ -1239,29 +1239,35 @@ private synchronized void reset(boolean wipeData) throws IOException {
12391239

12401240
/** ensure a cluster is formed with all published nodes. */
12411241
public synchronized void validateClusterFormed() {
1242-
String name = randomFrom(random, getNodeNames());
1243-
validateClusterFormed(name);
1244-
}
1245-
1246-
/** ensure a cluster is formed with all published nodes, but do so by using the client of the specified node */
1247-
private synchronized void validateClusterFormed(String viaNode) {
1248-
Set<DiscoveryNode> expectedNodes = new HashSet<>();
1242+
final Set<DiscoveryNode> expectedNodes = new HashSet<>();
12491243
for (NodeAndClient nodeAndClient : nodes.values()) {
12501244
expectedNodes.add(getInstanceFromNode(ClusterService.class, nodeAndClient.node()).localNode());
12511245
}
1252-
logger.trace("validating cluster formed via [{}], expecting {}", viaNode, expectedNodes);
1253-
final Client client = client(viaNode);
1246+
logger.trace("validating cluster formed, expecting {}", expectedNodes);
1247+
12541248
try {
12551249
assertBusy(() -> {
1256-
DiscoveryNodes discoveryNodes = client.admin().cluster().prepareState().get().getState().nodes();
1257-
assertEquals(expectedNodes.size(), discoveryNodes.getSize());
1258-
for (DiscoveryNode expectedNode : expectedNodes) {
1259-
assertTrue("Expected node to exist: " + expectedNode, discoveryNodes.nodeExists(expectedNode));
1260-
}
1250+
final List<ClusterState> states = nodes.values().stream()
1251+
.map(node -> getInstanceFromNode(ClusterService.class, node.node()))
1252+
.map(ClusterService::state)
1253+
.collect(Collectors.toList());
1254+
final String debugString = ", expected nodes: " + expectedNodes + " and actual cluster states " + states;
1255+
// all nodes have a master
1256+
assertTrue("Missing master" + debugString, states.stream().allMatch(cs -> cs.nodes().getMasterNodeId() != null));
1257+
// all nodes have the same master (in same term)
1258+
assertEquals("Not all masters in same term" + debugString, 1,
1259+
states.stream().mapToLong(ClusterState::term).distinct().count());
1260+
// all nodes know about all other nodes
1261+
states.forEach(cs -> {
1262+
DiscoveryNodes discoveryNodes = cs.nodes();
1263+
assertEquals("Node size mismatch" + debugString, expectedNodes.size(), discoveryNodes.getSize());
1264+
for (DiscoveryNode expectedNode : expectedNodes) {
1265+
assertTrue("Expected node to exist: " + expectedNode + debugString, discoveryNodes.nodeExists(expectedNode));
1266+
}
1267+
});
12611268
}, 30, TimeUnit.SECONDS);
12621269
} catch (AssertionError ae) {
1263-
throw new IllegalStateException("cluster failed to form with expected nodes " + expectedNodes + " and actual nodes " +
1264-
client.admin().cluster().prepareState().get().getState().nodes());
1270+
throw new IllegalStateException("cluster failed to form", ae);
12651271
} catch (Exception e) {
12661272
throw new IllegalStateException(e);
12671273
}
@@ -1821,8 +1827,7 @@ private void restartNode(NodeAndClient nodeAndClient, RestartCallback callback)
18211827

18221828
if (callback.validateClusterForming() || excludedNodeIds.isEmpty() == false) {
18231829
// we have to validate cluster size to ensure that the restarted node has rejoined the cluster if it was master-eligible;
1824-
// we have to do this via the node that was just restarted as it may be that the master didn't yet process the fact that it left
1825-
validateClusterFormed(nodeAndClient.name);
1830+
validateClusterFormed();
18261831
}
18271832

18281833
if (excludedNodeIds.isEmpty() == false) {

test/framework/src/main/java/org/elasticsearch/test/disruption/NetworkDisruption.java

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
package org.elasticsearch.test.disruption;
2121

2222
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
23-
2423
import org.apache.logging.log4j.LogManager;
2524
import org.apache.logging.log4j.Logger;
2625
import org.elasticsearch.cluster.ClusterState;
@@ -41,8 +40,6 @@
4140
import java.util.concurrent.CountDownLatch;
4241
import java.util.function.BiConsumer;
4342

44-
import static org.junit.Assert.assertFalse;
45-
4643
/**
4744
* Network disruptions are modeled using two components:
4845
* 1) the {@link DisruptedLinks} represents the links in the network that are to be disrupted
@@ -119,10 +116,7 @@ public static void ensureFullyConnectedCluster(InternalTestCluster cluster) {
119116
}
120117

121118
protected void ensureNodeCount(InternalTestCluster cluster) {
122-
assertFalse("cluster failed to form after disruption was healed", cluster.client().admin().cluster().prepareHealth()
123-
.setWaitForNodes(String.valueOf(cluster.size()))
124-
.setWaitForNoRelocatingShards(true)
125-
.get().isTimedOut());
119+
cluster.validateClusterFormed();
126120
}
127121

128122
@Override

0 commit comments

Comments
 (0)