Skip to content

Commit 049970a

Browse files
committed
Only connect to new nodes on new cluster state (#39629)
Today, when applying new cluster state we attempt to connect to all of its nodes as a blocking part of the application process. This is the right thing to do with new nodes, and is a no-op on any already-connected nodes, but is questionable on known nodes from which we are currently disconnected: there is a risk that we are partitioned from these nodes so that any attempt to connect to them will hang until it times out. This can dramatically slow down the application of new cluster states which hinders the recovery of the cluster during certain kinds of partition. If nodes are disconnected from the master then it is likely that they are to be removed as part of a subsequent cluster state update, so there's no need to try and reconnect to them like this. Moreover there is no need to attempt to reconnect to disconnected nodes as part of the cluster state application process, because we periodically try and reconnect to any disconnected nodes, and handle their disconnectedness reasonably gracefully in the meantime. This commit alters this behaviour to avoid reconnecting to known nodes during cluster state application. Resolves #29025.
1 parent aeb0116 commit 049970a

File tree

8 files changed

+644
-244
lines changed

8 files changed

+644
-244
lines changed

server/src/main/java/org/elasticsearch/cluster/NodeConnectionsService.java

+329-107
Large diffs are not rendered by default.

server/src/main/java/org/elasticsearch/cluster/service/ClusterApplierService.java

+14-1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import java.util.Queue;
5656
import java.util.concurrent.ConcurrentHashMap;
5757
import java.util.concurrent.CopyOnWriteArrayList;
58+
import java.util.concurrent.CountDownLatch;
5859
import java.util.concurrent.TimeUnit;
5960
import java.util.concurrent.atomic.AtomicReference;
6061
import java.util.function.Consumer;
@@ -450,7 +451,7 @@ private void applyChanges(UpdateTask task, ClusterState previousClusterState, Cl
450451
}
451452

452453
logger.trace("connecting to nodes of cluster state with version {}", newClusterState.version());
453-
nodeConnectionsService.connectToNodes(newClusterState.nodes());
454+
connectToNodesAndWait(newClusterState);
454455

455456
// nothing to do until we actually recover from the gateway or any other block indicates we need to disable persistency
456457
if (clusterChangedEvent.state().blocks().disableStatePersistence() == false && clusterChangedEvent.metaDataChanged()) {
@@ -470,6 +471,18 @@ private void applyChanges(UpdateTask task, ClusterState previousClusterState, Cl
470471
callClusterStateListeners(clusterChangedEvent);
471472
}
472473

474+
protected void connectToNodesAndWait(ClusterState newClusterState) {
475+
// can't wait for an ActionFuture on the cluster applier thread, but we do want to block the thread here, so use a CountDownLatch.
476+
final CountDownLatch countDownLatch = new CountDownLatch(1);
477+
nodeConnectionsService.connectToNodes(newClusterState.nodes(), countDownLatch::countDown);
478+
try {
479+
countDownLatch.await();
480+
} catch (InterruptedException e) {
481+
logger.debug("interrupted while connecting to nodes, continuing", e);
482+
Thread.currentThread().interrupt();
483+
}
484+
}
485+
473486
private void callClusterStateAppliers(ClusterChangedEvent clusterChangedEvent) {
474487
clusterStateAppliers.forEach(applier -> {
475488
logger.trace("calling [{}] with change to version [{}]", applier, clusterChangedEvent.state().version());

server/src/test/java/org/elasticsearch/cluster/NodeConnectionsServiceTests.java

+258-84
Large diffs are not rendered by default.

server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java

+5-7
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444
import org.elasticsearch.cluster.metadata.MetaData;
4545
import org.elasticsearch.cluster.node.DiscoveryNode;
4646
import org.elasticsearch.cluster.node.DiscoveryNode.Role;
47-
import org.elasticsearch.cluster.node.DiscoveryNodes;
4847
import org.elasticsearch.cluster.service.ClusterApplierService;
4948
import org.elasticsearch.cluster.service.ClusterService;
5049
import org.elasticsearch.common.Nullable;
@@ -1755,12 +1754,7 @@ protected Optional<DisruptableMockTransport> getDisruptableMockTransport(Transpo
17551754
clusterService = new ClusterService(settings, clusterSettings, masterService, clusterApplierService);
17561755
clusterService.setNodeConnectionsService(
17571756
new NodeConnectionsService(clusterService.getSettings(), deterministicTaskQueue.getThreadPool(this::onNode),
1758-
transportService) {
1759-
@Override
1760-
public void connectToNodes(DiscoveryNodes discoveryNodes) {
1761-
// override this method as it does blocking calls
1762-
}
1763-
});
1757+
transportService));
17641758
final Collection<BiConsumer<DiscoveryNode, ClusterState>> onJoinValidators =
17651759
Collections.singletonList((dn, cs) -> extraJoinValidators.forEach(validator -> validator.accept(dn, cs)));
17661760
coordinator = new Coordinator("test_node", settings, clusterSettings, transportService, writableRegistry(),
@@ -2149,6 +2143,10 @@ public void onNewClusterState(String source, Supplier<ClusterState> clusterState
21492143
}
21502144
}
21512145

2146+
@Override
2147+
protected void connectToNodesAndWait(ClusterState newClusterState) {
2148+
// don't do anything, and don't block
2149+
}
21522150
}
21532151

21542152
private static DiscoveryNode createDiscoveryNode(int nodeIndex, boolean masterEligible) {

server/src/test/java/org/elasticsearch/cluster/service/ClusterApplierServiceTests.java

+4-14
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,13 @@
1919
package org.elasticsearch.cluster.service;
2020

2121
import org.apache.logging.log4j.Level;
22-
import org.apache.logging.log4j.Logger;
2322
import org.apache.logging.log4j.LogManager;
23+
import org.apache.logging.log4j.Logger;
2424
import org.elasticsearch.Version;
2525
import org.elasticsearch.cluster.ClusterName;
2626
import org.elasticsearch.cluster.ClusterState;
2727
import org.elasticsearch.cluster.ClusterStateObserver;
2828
import org.elasticsearch.cluster.LocalNodeMasterListener;
29-
import org.elasticsearch.cluster.NodeConnectionsService;
3029
import org.elasticsearch.cluster.block.ClusterBlocks;
3130
import org.elasticsearch.cluster.coordination.NoMasterBlockService;
3231
import org.elasticsearch.cluster.metadata.MetaData;
@@ -54,6 +53,7 @@
5453

5554
import static java.util.Collections.emptyMap;
5655
import static java.util.Collections.emptySet;
56+
import static org.elasticsearch.test.ClusterServiceUtils.createNoOpNodeConnectionsService;
5757
import static org.elasticsearch.test.ClusterServiceUtils.setState;
5858
import static org.hamcrest.Matchers.containsString;
5959
import static org.hamcrest.Matchers.is;
@@ -88,23 +88,13 @@ public void tearDown() throws Exception {
8888
super.tearDown();
8989
}
9090

91-
TimedClusterApplierService createTimedClusterService(boolean makeMaster) {
91+
private TimedClusterApplierService createTimedClusterService(boolean makeMaster) {
9292
DiscoveryNode localNode = new DiscoveryNode("node1", buildNewFakeTransportAddress(), emptyMap(),
9393
emptySet(), Version.CURRENT);
9494
TimedClusterApplierService timedClusterApplierService = new TimedClusterApplierService(Settings.builder().put("cluster.name",
9595
"ClusterApplierServiceTests").build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS),
9696
threadPool);
97-
timedClusterApplierService.setNodeConnectionsService(new NodeConnectionsService(Settings.EMPTY, null, null) {
98-
@Override
99-
public void connectToNodes(DiscoveryNodes discoveryNodes) {
100-
// skip
101-
}
102-
103-
@Override
104-
public void disconnectFromNodesExcept(DiscoveryNodes nodesToKeep) {
105-
// skip
106-
}
107-
});
97+
timedClusterApplierService.setNodeConnectionsService(createNoOpNodeConnectionsService());
10898
timedClusterApplierService.setInitialState(ClusterState.builder(new ClusterName("ClusterApplierServiceTests"))
10999
.nodes(DiscoveryNodes.builder()
110100
.add(localNode)

server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java

+6-17
Original file line numberDiff line numberDiff line change
@@ -748,6 +748,11 @@ private final class TestClusterNode {
748748
protected PrioritizedEsThreadPoolExecutor createThreadPoolExecutor() {
749749
return new MockSinglePrioritizingExecutor(node.getName(), deterministicTaskQueue);
750750
}
751+
752+
@Override
753+
protected void connectToNodesAndWait(ClusterState newClusterState) {
754+
// don't do anything, and don't block
755+
}
751756
});
752757
mockTransport = new DisruptableMockTransport(node, logger) {
753758
@Override
@@ -992,23 +997,7 @@ public void start(ClusterState initialState) {
992997
coordinator.start();
993998
masterService.start();
994999
clusterService.getClusterApplierService().setNodeConnectionsService(
995-
new NodeConnectionsService(clusterService.getSettings(), threadPool, transportService) {
996-
@Override
997-
public void connectToNodes(DiscoveryNodes discoveryNodes) {
998-
// override this method as it does blocking calls
999-
boolean callSuper = true;
1000-
for (final DiscoveryNode node : discoveryNodes) {
1001-
try {
1002-
transportService.connectToNode(node);
1003-
} catch (Exception e) {
1004-
callSuper = false;
1005-
}
1006-
}
1007-
if (callSuper) {
1008-
super.connectToNodes(discoveryNodes);
1009-
}
1010-
}
1011-
});
1000+
new NodeConnectionsService(clusterService.getSettings(), threadPool, transportService));
10121001
clusterService.getClusterApplierService().start();
10131002
indicesService.start();
10141003
indicesClusterStateService.start();

test/framework/src/main/java/org/elasticsearch/test/ClusterServiceUtils.java

+16-11
Original file line numberDiff line numberDiff line change
@@ -137,17 +137,7 @@ public static ClusterService createClusterService(ThreadPool threadPool, Discove
137137
.put("cluster.name", "ClusterServiceTests")
138138
.build();
139139
ClusterService clusterService = new ClusterService(settings, clusterSettings, threadPool);
140-
clusterService.setNodeConnectionsService(new NodeConnectionsService(Settings.EMPTY, null, null) {
141-
@Override
142-
public void connectToNodes(DiscoveryNodes discoveryNodes) {
143-
// skip
144-
}
145-
146-
@Override
147-
public void disconnectFromNodesExcept(DiscoveryNodes nodesToKeep) {
148-
// skip
149-
}
150-
});
140+
clusterService.setNodeConnectionsService(createNoOpNodeConnectionsService());
151141
ClusterState initialClusterState = ClusterState.builder(new ClusterName(ClusterServiceUtils.class.getSimpleName()))
152142
.nodes(DiscoveryNodes.builder()
153143
.add(localNode)
@@ -162,6 +152,21 @@ public void disconnectFromNodesExcept(DiscoveryNodes nodesToKeep) {
162152
return clusterService;
163153
}
164154

155+
public static NodeConnectionsService createNoOpNodeConnectionsService() {
156+
return new NodeConnectionsService(Settings.EMPTY, null, null) {
157+
@Override
158+
public void connectToNodes(DiscoveryNodes discoveryNodes, Runnable onCompletion) {
159+
// don't do anything
160+
onCompletion.run();
161+
}
162+
163+
@Override
164+
public void disconnectFromNodesExcept(DiscoveryNodes nodesToKeep) {
165+
// don't do anything
166+
}
167+
};
168+
}
169+
165170
public static ClusterStatePublisher createClusterStatePublisher(ClusterApplier clusterApplier) {
166171
return (event, publishListener, ackListener) ->
167172
clusterApplier.onNewClusterState("mock_publish_to_self[" + event.source() + "]", () -> event.state(),

test/framework/src/main/java/org/elasticsearch/test/disruption/NetworkDisruption.java

+12-3
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import java.util.HashSet;
3939
import java.util.Random;
4040
import java.util.Set;
41+
import java.util.concurrent.CountDownLatch;
4142
import java.util.function.BiConsumer;
4243

4344
import static org.junit.Assert.assertFalse;
@@ -49,7 +50,7 @@
4950
*/
5051
public class NetworkDisruption implements ServiceDisruptionScheme {
5152

52-
private final Logger logger = LogManager.getLogger(NetworkDisruption.class);
53+
private static final Logger logger = LogManager.getLogger(NetworkDisruption.class);
5354

5455
private final DisruptedLinks disruptedLinks;
5556
private final NetworkLinkDisruptionType networkLinkDisruptionType;
@@ -103,9 +104,17 @@ public void ensureHealthy(InternalTestCluster cluster) {
103104
* handy to be able to ensure this happens faster
104105
*/
105106
public static void ensureFullyConnectedCluster(InternalTestCluster cluster) {
106-
for (String node: cluster.getNodeNames()) {
107+
final String[] nodeNames = cluster.getNodeNames();
108+
final CountDownLatch countDownLatch = new CountDownLatch(nodeNames.length);
109+
for (String node : nodeNames) {
107110
ClusterState stateOnNode = cluster.getInstance(ClusterService.class, node).state();
108-
cluster.getInstance(NodeConnectionsService.class, node).connectToNodes(stateOnNode.nodes());
111+
cluster.getInstance(NodeConnectionsService.class, node).reconnectToNodes(stateOnNode.nodes(), countDownLatch::countDown);
112+
}
113+
114+
try {
115+
countDownLatch.await();
116+
} catch (InterruptedException e) {
117+
throw new AssertionError(e);
109118
}
110119
}
111120

0 commit comments

Comments
 (0)