From 24c157268c6ee014874dae660401014bb5bca923 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Mon, 20 Aug 2018 17:13:57 +0200 Subject: [PATCH 1/3] Use a dedicated ConnectionManger for RemoteClusterConnection This change introduces a dedicated ConnectionManager for every RemoteClusterConnection such that there is not state shared with the TransportService internal ConnectionManager. All connections to a remote cluster are isolated from the TransportService but still uses the TransportService and it's internal properties like the Transport, tracing and internal listener actions on disconnects etc. This allows a remote cluster connection to have a different lifecycle than a local cluster connection, also local discovery code doesn't get notified if there is a disconnect on from a remote cluster and each connection can use it's own dedicated connection profile which allows to have a reduced set of connections per cluster without conflicting with the local cluster. Closes #31835 --- .../transport/ConnectionManager.java | 67 ++++++++------- .../transport/RemoteClusterConnection.java | 85 +++++++++---------- .../transport/RemoteClusterService.java | 8 +- .../transport/TransportService.java | 25 ++++-- .../TransportClientNodesServiceTests.java | 4 +- .../cluster/NodeConnectionsServiceTests.java | 2 +- .../transport/ConnectionManagerTests.java | 6 +- .../transport/RemoteClusterClientTests.java | 18 ++-- .../RemoteClusterConnectionTests.java | 58 +++++++------ .../transport/RemoteClusterServiceTests.java | 19 +++-- 10 files changed, 164 insertions(+), 128 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/transport/ConnectionManager.java b/server/src/main/java/org/elasticsearch/transport/ConnectionManager.java index 84c337399d5b4..0c6be15fd92b7 100644 --- a/server/src/main/java/org/elasticsearch/transport/ConnectionManager.java +++ b/server/src/main/java/org/elasticsearch/transport/ConnectionManager.java @@ -44,6 +44,7 @@ import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -62,6 +63,7 @@ public class ConnectionManager implements Closeable { private final TimeValue pingSchedule; private final ConnectionProfile defaultProfile; private final Lifecycle lifecycle = new Lifecycle(); + private final AtomicBoolean closed = new AtomicBoolean(false); private final ReadWriteLock closeLock = new ReentrantReadWriteLock(); private final DelegatingNodeConnectionListener connectionListener = new DelegatingNodeConnectionListener(); @@ -83,7 +85,9 @@ public ConnectionManager(Settings settings, Transport transport, ThreadPool thre } public void addListener(TransportConnectionListener listener) { - this.connectionListener.listeners.add(listener); + if (connectionListener.listeners.contains(listener) == false) { + this.connectionListener.listeners.add(listener); + } } public void removeListener(TransportConnectionListener listener) { @@ -186,45 +190,50 @@ public void disconnectFromNode(DiscoveryNode node) { } } - public int connectedNodeCount() { + /** + * Returns the number of nodes this manager is connected to. + */ + public int size() { return connectedNodes.size(); } @Override public void close() { - lifecycle.moveToStopped(); - CountDownLatch latch = new CountDownLatch(1); + if (closed.compareAndSet(false, true)) { + lifecycle.moveToStopped(); + CountDownLatch latch = new CountDownLatch(1); - // TODO: Consider moving all read/write lock (in Transport and this class) to the TransportService - threadPool.generic().execute(() -> { - closeLock.writeLock().lock(); - try { - // we are holding a write lock so nobody modifies the connectedNodes / openConnections map - it's safe to first close - // all instances and then clear them maps - Iterator> iterator = connectedNodes.entrySet().iterator(); - while (iterator.hasNext()) { - Map.Entry next = iterator.next(); - try { - IOUtils.closeWhileHandlingException(next.getValue()); - } finally { - iterator.remove(); + // TODO: Consider moving all read/write lock (in Transport and this class) to the TransportService + threadPool.generic().execute(() -> { + closeLock.writeLock().lock(); + try { + // we are holding a write lock so nobody modifies the connectedNodes / openConnections map - it's safe to first close + // all instances and then clear them maps + Iterator> iterator = connectedNodes.entrySet().iterator(); + while (iterator.hasNext()) { + Map.Entry next = iterator.next(); + try { + IOUtils.closeWhileHandlingException(next.getValue()); + } finally { + iterator.remove(); + } } + } finally { + closeLock.writeLock().unlock(); + latch.countDown(); } - } finally { - closeLock.writeLock().unlock(); - latch.countDown(); - } - }); + }); - try { try { - latch.await(30, TimeUnit.SECONDS); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - // ignore + try { + latch.await(30, TimeUnit.SECONDS); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + // ignore + } + } finally { + lifecycle.moveToClosed(); } - } finally { - lifecycle.moveToClosed(); } } diff --git a/server/src/main/java/org/elasticsearch/transport/RemoteClusterConnection.java b/server/src/main/java/org/elasticsearch/transport/RemoteClusterConnection.java index 15cf7899dc03f..c38b424e9a704 100644 --- a/server/src/main/java/org/elasticsearch/transport/RemoteClusterConnection.java +++ b/server/src/main/java/org/elasticsearch/transport/RemoteClusterConnection.java @@ -35,6 +35,7 @@ import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.common.component.AbstractComponent; +import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.unit.TimeValue; @@ -80,16 +81,17 @@ final class RemoteClusterConnection extends AbstractComponent implements TransportConnectionListener, Closeable { private final TransportService transportService; + private final ConnectionManager connectionManager; private final ConnectionProfile remoteProfile; private final ConnectedNodes connectedNodes; private final String clusterAlias; private final int maxNumRemoteConnections; private final Predicate nodePredicate; + private final ThreadPool threadPool; private volatile List> seedNodes; private volatile boolean skipUnavailable; private final ConnectHandler connectHandler; private SetOnce remoteClusterName = new SetOnce<>(); - private final ClusterName localClusterName; /** * Creates a new {@link RemoteClusterConnection} @@ -101,9 +103,9 @@ final class RemoteClusterConnection extends AbstractComponent implements Transpo * @param nodePredicate a predicate to filter eligible remote nodes to connect to */ RemoteClusterConnection(Settings settings, String clusterAlias, List> seedNodes, - TransportService transportService, int maxNumRemoteConnections, Predicate nodePredicate) { + TransportService transportService, ConnectionManager connectionManager, int maxNumRemoteConnections, + Predicate nodePredicate) { super(settings); - this.localClusterName = ClusterName.CLUSTER_NAME_SETTING.get(settings); this.transportService = transportService; this.maxNumRemoteConnections = maxNumRemoteConnections; this.nodePredicate = nodePredicate; @@ -122,7 +124,11 @@ final class RemoteClusterConnection extends AbstractComponent implements Transpo this.skipUnavailable = RemoteClusterService.REMOTE_CLUSTER_SKIP_UNAVAILABLE .getConcreteSettingForNamespace(clusterAlias).get(settings); this.connectHandler = new ConnectHandler(); - transportService.addConnectionListener(this); + this.threadPool = transportService.threadPool; + this.connectionManager = connectionManager; + connectionManager.addListener(this); + // we register the transport service here as a listener to make sure we notify handlers on disconnect etc. + connectionManager.addListener(transportService); } /** @@ -183,8 +189,9 @@ public void ensureConnected(ActionListener voidActionListener) { private void fetchShardsInternal(ClusterSearchShardsRequest searchShardsRequest, final ActionListener listener) { - final DiscoveryNode node = connectedNodes.getAny(); - transportService.sendRequest(node, ClusterSearchShardsAction.NAME, searchShardsRequest, + final DiscoveryNode node = getAnyConnectedNode(); + Transport.Connection connection = connectionManager.getConnection(node); + transportService.sendRequest(connection, ClusterSearchShardsAction.NAME, searchShardsRequest, TransportRequestOptions.EMPTY, new TransportResponseHandler() { @Override @@ -219,12 +226,16 @@ void collectNodes(ActionListener> listener) { request.clear(); request.nodes(true); request.local(true); // run this on the node that gets the request it's as good as any other - final DiscoveryNode node = connectedNodes.getAny(); - transportService.sendRequest(node, ClusterStateAction.NAME, request, TransportRequestOptions.EMPTY, + final DiscoveryNode node = getAnyConnectedNode(); + Transport.Connection connection = connectionManager.getConnection(node); + transportService.sendRequest(connection, ClusterStateAction.NAME, request, TransportRequestOptions.EMPTY, new TransportResponseHandler() { + @Override - public ClusterStateResponse newInstance() { - return new ClusterStateResponse(); + public ClusterStateResponse read(StreamInput in) throws IOException { + ClusterStateResponse response = new ClusterStateResponse(); + response.readFrom(in); + return response; } @Override @@ -261,11 +272,11 @@ public String executor() { * If such node is not connected, the returned connection will be a proxy connection that redirects to it. */ Transport.Connection getConnection(DiscoveryNode remoteClusterNode) { - if (transportService.nodeConnected(remoteClusterNode)) { - return transportService.getConnection(remoteClusterNode); + if (connectionManager.nodeConnected(remoteClusterNode)) { + return connectionManager.getConnection(remoteClusterNode); } - DiscoveryNode discoveryNode = connectedNodes.getAny(); - Transport.Connection connection = transportService.getConnection(discoveryNode); + DiscoveryNode discoveryNode = getAnyConnectedNode(); + Transport.Connection connection = connectionManager.getConnection(discoveryNode); return new ProxyConnection(connection, remoteClusterNode); } @@ -317,33 +328,18 @@ public Version getVersion() { } Transport.Connection getConnection() { - return transportService.getConnection(getAnyConnectedNode()); + return connectionManager.getConnection(getAnyConnectedNode()); } @Override public void close() throws IOException { - connectHandler.close(); + IOUtils.close(connectHandler, connectionManager); } public boolean isClosed() { return connectHandler.isClosed(); } - private ConnectionProfile getRemoteProfile(ClusterName name) { - // we can only compare the cluster name to make a decision if we should use a remote profile - // we can't use a cluster UUID here since we could be connecting to that remote cluster before - // the remote node has joined its cluster and have a cluster UUID. The fact that we just lose a - // rather smallish optimization on the connection layer under certain situations where remote clusters - // have the same name as the local one is minor here. - // the alternative here is to complicate the remote infrastructure to also wait until we formed a cluster, - // gained a cluster UUID and then start connecting etc. we rather use this simplification in order to maintain simplicity - if (this.localClusterName.equals(name)) { - return null; - } else { - return remoteProfile; - } - } - /** * The connect handler manages node discovery and the actual connect to the remote cluster. * There is at most one connect job running at any time. If such a connect job is triggered @@ -387,7 +383,7 @@ private void connect(ActionListener connectListener, boolean forceRun) { final boolean runConnect; final Collection> toNotify; final ActionListener listener = connectListener == null ? null : - ContextPreservingActionListener.wrapPreservingContext(connectListener, transportService.getThreadPool().getThreadContext()); + ContextPreservingActionListener.wrapPreservingContext(connectListener, threadPool.getThreadContext()); synchronized (queue) { if (listener != null && queue.offer(listener) == false) { listener.onFailure(new RejectedExecutionException("connect queue is full")); @@ -415,7 +411,6 @@ private void connect(ActionListener connectListener, boolean forceRun) { } private void forkConnect(final Collection> toNotify) { - ThreadPool threadPool = transportService.getThreadPool(); ExecutorService executor = threadPool.executor(ThreadPool.Names.MANAGEMENT); executor.submit(new AbstractRunnable() { @Override @@ -452,13 +447,13 @@ protected void doRun() { maybeConnect(); } }); - collectRemoteNodes(seedNodes.iterator(), transportService, listener); + collectRemoteNodes(seedNodes.iterator(), transportService, connectionManager, listener); } }); } private void collectRemoteNodes(Iterator> seedNodes, - final TransportService transportService, ActionListener listener) { + final TransportService transportService, final ConnectionManager manager, ActionListener listener) { if (Thread.currentThread().isInterrupted()) { listener.onFailure(new InterruptedException("remote connect thread got interrupted")); } @@ -467,7 +462,7 @@ private void collectRemoteNodes(Iterator> seedNodes, cancellableThreads.executeIO(() -> { final DiscoveryNode seedNode = seedNodes.next().get(); final TransportService.HandshakeResponse handshakeResponse; - Transport.Connection connection = transportService.openConnection(seedNode, + Transport.Connection connection = manager.openConnection(seedNode, ConnectionProfile.buildSingleChannelProfile(TransportRequestOptions.Type.REG, null, null)); boolean success = false; try { @@ -482,7 +477,7 @@ private void collectRemoteNodes(Iterator> seedNodes, final DiscoveryNode handshakeNode = handshakeResponse.getDiscoveryNode(); if (nodePredicate.test(handshakeNode) && connectedNodes.size() < maxNumRemoteConnections) { - transportService.connectToNode(handshakeNode, getRemoteProfile(handshakeResponse.getClusterName())); + manager.connectToNode(handshakeNode, remoteProfile, transportService.connectionValidator(handshakeNode)); if (remoteClusterName.get() == null) { assert handshakeResponse.getClusterName().value() != null; remoteClusterName.set(handshakeResponse.getClusterName()); @@ -524,7 +519,7 @@ private void collectRemoteNodes(Iterator> seedNodes, // ISE if we fail the handshake with an version incompatible node if (seedNodes.hasNext()) { logger.debug(() -> new ParameterizedMessage("fetching nodes from external cluster {} failed", clusterAlias), ex); - collectRemoteNodes(seedNodes, transportService, listener); + collectRemoteNodes(seedNodes, transportService, manager, listener); } else { listener.onFailure(ex); } @@ -552,7 +547,6 @@ final boolean isClosed() { /* This class handles the _state response from the remote cluster when sniffing nodes to connect to */ private class SniffClusterStateResponseHandler implements TransportResponseHandler { - private final TransportService transportService; private final Transport.Connection connection; private final ActionListener listener; private final Iterator> seedNodes; @@ -561,7 +555,6 @@ private class SniffClusterStateResponseHandler implements TransportResponseHandl SniffClusterStateResponseHandler(TransportService transportService, Transport.Connection connection, ActionListener listener, Iterator> seedNodes, CancellableThreads cancellableThreads) { - this.transportService = transportService; this.connection = connection; this.listener = listener; this.seedNodes = seedNodes; @@ -592,8 +585,8 @@ public void handleResponse(ClusterStateResponse response) { for (DiscoveryNode node : nodesIter) { if (nodePredicate.test(node) && connectedNodes.size() < maxNumRemoteConnections) { try { - transportService.connectToNode(node, getRemoteProfile(remoteClusterName.get())); // noop if node is - // connected + connectionManager.connectToNode(node, remoteProfile, + transportService.connectionValidator(node)); // noop if node is connected connectedNodes.add(node); } catch (ConnectTransportException | IllegalStateException ex) { // ISE if we fail the handshake with an version incompatible node @@ -609,7 +602,7 @@ public void handleResponse(ClusterStateResponse response) { listener.onFailure(ex); // we got canceled - fail the listener and step out } catch (Exception ex) { logger.warn(() -> new ParameterizedMessage("fetching nodes from external cluster {} failed", clusterAlias), ex); - collectRemoteNodes(seedNodes, transportService, listener); + collectRemoteNodes(seedNodes, transportService, connectionManager, listener); } } @@ -620,7 +613,7 @@ public void handleException(TransportException exp) { IOUtils.closeWhileHandlingException(connection); } finally { // once the connection is closed lets try the next node - collectRemoteNodes(seedNodes, transportService, listener); + collectRemoteNodes(seedNodes, transportService, connectionManager, listener); } } @@ -715,4 +708,8 @@ private synchronized void ensureIteratorAvailable() { } } } + + ConnectionManager getConnectionManager() { + return connectionManager; + } } diff --git a/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java b/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java index 956a0d94179e0..2e24b230cf4fb 100644 --- a/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java +++ b/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java @@ -18,6 +18,7 @@ */ package org.elasticsearch.transport; +import java.util.Collection; import java.util.function.Supplier; import org.elasticsearch.Version; import org.elasticsearch.action.ActionListener; @@ -139,7 +140,8 @@ private synchronized void updateRemoteClusters(Map getConnections() { + return remoteClusters.values(); + } } diff --git a/server/src/main/java/org/elasticsearch/transport/TransportService.java b/server/src/main/java/org/elasticsearch/transport/TransportService.java index fb14ae96dbf20..e37ea81211ad7 100644 --- a/server/src/main/java/org/elasticsearch/transport/TransportService.java +++ b/server/src/main/java/org/elasticsearch/transport/TransportService.java @@ -28,6 +28,7 @@ import org.elasticsearch.client.transport.TransportClient; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.common.CheckedBiConsumer; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Strings; import org.elasticsearch.common.component.AbstractLifecycleComponent; @@ -56,6 +57,7 @@ import org.elasticsearch.threadpool.ThreadPool; import java.io.IOException; +import java.io.UncheckedIOException; import java.net.UnknownHostException; import java.util.Arrays; import java.util.Collections; @@ -268,8 +270,9 @@ protected void doStart() { @Override protected void doStop() { try { - connectionManager.close(); - transport.stop(); + IOUtils.close(connectionManager, remoteClusterService, transport::stop); + } catch (IOException e) { + throw new UncheckedIOException(e); } finally { // in case the transport is not connected to our local node (thus cleaned on node disconnect) // make sure to clean any leftover on going handles @@ -306,7 +309,7 @@ public void doRun() { @Override protected void doClose() throws IOException { - IOUtils.close(remoteClusterService, transport); + transport.close(); } /** @@ -364,14 +367,18 @@ public void connectToNode(final DiscoveryNode node, ConnectionProfile connection if (isLocalNode(node)) { return; } + connectionManager.connectToNode(node, connectionProfile, connectionValidator(node)); + } - connectionManager.connectToNode(node, connectionProfile, (newConnection, actualProfile) -> { + public CheckedBiConsumer connectionValidator(DiscoveryNode node) { + return (newConnection, actualProfile) -> { // We don't validate cluster names to allow for CCS connections. final DiscoveryNode remote = handshake(newConnection, actualProfile.getHandshakeTimeout().millis(), cn -> true).discoveryNode; if (validateConnections && node.equals(remote) == false) { throw new ConnectTransportException(node, "handshake failed. unexpected remote node " + remote); } - }); + }; + } /** @@ -562,8 +569,12 @@ public final void sendRequest(final Transport.Conn final TransportRequest request, final TransportRequestOptions options, TransportResponseHandler handler) { - - asyncSender.sendRequest(connection, action, request, options, handler); + try { + asyncSender.sendRequest(connection, action, request, options, handler); + } catch (NodeNotConnectedException ex) { + // the caller might not handle this so we invoke the handler + handler.handleException(ex); + } } /** diff --git a/server/src/test/java/org/elasticsearch/client/transport/TransportClientNodesServiceTests.java b/server/src/test/java/org/elasticsearch/client/transport/TransportClientNodesServiceTests.java index afc6b47483eba..9baf2e1c95644 100644 --- a/server/src/test/java/org/elasticsearch/client/transport/TransportClientNodesServiceTests.java +++ b/server/src/test/java/org/elasticsearch/client/transport/TransportClientNodesServiceTests.java @@ -379,10 +379,10 @@ public void testSniffNodesSamplerClosesConnections() throws Exception { transportClientNodesService.addTransportAddresses(remoteService.getLocalDiscoNode().getAddress()); assertEquals(1, transportClientNodesService.connectedNodes().size()); - assertEquals(1, clientService.connectionManager().connectedNodeCount()); + assertEquals(1, clientService.connectionManager().size()); transportClientNodesService.doSample(); - assertEquals(1, clientService.connectionManager().connectedNodeCount()); + assertEquals(1, clientService.connectionManager().size()); establishedConnections.clear(); handler.blockRequest(); diff --git a/server/src/test/java/org/elasticsearch/cluster/NodeConnectionsServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/NodeConnectionsServiceTests.java index 473f5152e8fd7..b8c39c48f8870 100644 --- a/server/src/test/java/org/elasticsearch/cluster/NodeConnectionsServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/NodeConnectionsServiceTests.java @@ -134,7 +134,7 @@ public void testReconnect() { private void assertConnectedExactlyToNodes(ClusterState state) { assertConnected(state.nodes()); - assertThat(transportService.getConnectionManager().connectedNodeCount(), equalTo(state.nodes().getSize())); + assertThat(transportService.getConnectionManager().size(), equalTo(state.nodes().getSize())); } private void assertConnected(Iterable nodes) { diff --git a/server/src/test/java/org/elasticsearch/transport/ConnectionManagerTests.java b/server/src/test/java/org/elasticsearch/transport/ConnectionManagerTests.java index 3c099c32bde2d..bff5a2b122d2f 100644 --- a/server/src/test/java/org/elasticsearch/transport/ConnectionManagerTests.java +++ b/server/src/test/java/org/elasticsearch/transport/ConnectionManagerTests.java @@ -159,7 +159,7 @@ public void onNodeDisconnected(DiscoveryNode node) { assertFalse(connection.isClosed()); assertTrue(connectionManager.nodeConnected(node)); assertSame(connection, connectionManager.getConnection(node)); - assertEquals(1, connectionManager.connectedNodeCount()); + assertEquals(1, connectionManager.size()); assertEquals(1, nodeConnectedCount.get()); assertEquals(0, nodeDisconnectedCount.get()); @@ -169,7 +169,7 @@ public void onNodeDisconnected(DiscoveryNode node) { connection.close(); } assertTrue(connection.isClosed()); - assertEquals(0, connectionManager.connectedNodeCount()); + assertEquals(0, connectionManager.size()); assertEquals(1, nodeConnectedCount.get()); assertEquals(1, nodeDisconnectedCount.get()); } @@ -205,7 +205,7 @@ public void onNodeDisconnected(DiscoveryNode node) { assertTrue(connection.isClosed()); assertFalse(connectionManager.nodeConnected(node)); expectThrows(NodeNotConnectedException.class, () -> connectionManager.getConnection(node)); - assertEquals(0, connectionManager.connectedNodeCount()); + assertEquals(0, connectionManager.size()); assertEquals(0, nodeConnectedCount.get()); assertEquals(0, nodeDisconnectedCount.get()); } diff --git a/server/src/test/java/org/elasticsearch/transport/RemoteClusterClientTests.java b/server/src/test/java/org/elasticsearch/transport/RemoteClusterClientTests.java index 8cfec0a07f910..34e22fd20de7f 100644 --- a/server/src/test/java/org/elasticsearch/transport/RemoteClusterClientTests.java +++ b/server/src/test/java/org/elasticsearch/transport/RemoteClusterClientTests.java @@ -81,13 +81,15 @@ public void testEnsureWeReconnect() throws Exception { try (MockTransportService service = MockTransportService.createNewService(localSettings, Version.CURRENT, threadPool, null)) { Semaphore semaphore = new Semaphore(1); service.start(); - service.addConnectionListener(new TransportConnectionListener() { - @Override - public void onNodeDisconnected(DiscoveryNode node) { - if (remoteNode.equals(node)) { - semaphore.release(); + service.getRemoteClusterService().getConnections().forEach(con -> { + con.getConnectionManager().addListener(new TransportConnectionListener() { + @Override + public void onNodeDisconnected(DiscoveryNode node) { + if (remoteNode.equals(node)) { + semaphore.release(); + } } - } + }); }); // this test is not perfect since we might reconnect concurrently but it will fail most of the time if we don't have // the right calls in place in the RemoteAwareClient @@ -95,7 +97,9 @@ public void onNodeDisconnected(DiscoveryNode node) { for (int i = 0; i < 10; i++) { semaphore.acquire(); try { - service.disconnectFromNode(remoteNode); + service.getRemoteClusterService().getConnections().forEach(con -> { + con.getConnectionManager().disconnectFromNode(remoteNode); + }); semaphore.acquire(); RemoteClusterService remoteClusterService = service.getRemoteClusterService(); Client client = remoteClusterService.getRemoteClusterClient(threadPool, "test"); diff --git a/server/src/test/java/org/elasticsearch/transport/RemoteClusterConnectionTests.java b/server/src/test/java/org/elasticsearch/transport/RemoteClusterConnectionTests.java index 3d0388ccfad96..e40486d63dc40 100644 --- a/server/src/test/java/org/elasticsearch/transport/RemoteClusterConnectionTests.java +++ b/server/src/test/java/org/elasticsearch/transport/RemoteClusterConnectionTests.java @@ -145,7 +145,7 @@ public static MockTransportService startTransport( } } - public void testLocalProfileIsUsedForLocalCluster() throws Exception { + public void testRemoteProfileIsUsedForLocalCluster() throws Exception { List knownNodes = new CopyOnWriteArrayList<>(); try (MockTransportService seedTransport = startTransport("seed_node", knownNodes, Version.CURRENT); MockTransportService discoverableTransport = startTransport("discoverable_node", knownNodes, Version.CURRENT)) { @@ -159,7 +159,7 @@ public void testLocalProfileIsUsedForLocalCluster() throws Exception { service.start(); service.acceptIncomingRequests(); try (RemoteClusterConnection connection = new RemoteClusterConnection(Settings.EMPTY, "test-cluster", - Arrays.asList(() -> seedNode), service, Integer.MAX_VALUE, n -> true)) { + Arrays.asList(() -> seedNode), service, service.getConnectionManager(), Integer.MAX_VALUE, n -> true)) { updateSeedNodes(connection, Arrays.asList(() -> seedNode)); assertTrue(service.nodeConnected(seedNode)); assertTrue(service.nodeConnected(discoverableNode)); @@ -175,9 +175,12 @@ public ClusterSearchShardsResponse read(StreamInput in) throws IOException { }); TransportRequestOptions options = TransportRequestOptions.builder().withType(TransportRequestOptions.Type.BULK) .build(); - service.sendRequest(connection.getConnection(), ClusterSearchShardsAction.NAME, new ClusterSearchShardsRequest(), - options, futureHandler); - futureHandler.txGet(); + IllegalStateException ise = (IllegalStateException) expectThrows(SendRequestTransportException.class, () -> { + service.sendRequest(discoverableNode, + ClusterSearchShardsAction.NAME, new ClusterSearchShardsRequest(), options, futureHandler); + futureHandler.txGet(); + }).getCause(); + assertEquals(ise.getMessage(), "can't select channel size is 0 for types: [RECOVERY, BULK, STATE]"); } } } @@ -199,7 +202,7 @@ public void testRemoteProfileIsUsedForRemoteCluster() throws Exception { service.start(); service.acceptIncomingRequests(); try (RemoteClusterConnection connection = new RemoteClusterConnection(Settings.EMPTY, "test-cluster", - Arrays.asList(() -> seedNode), service, Integer.MAX_VALUE, n -> true)) { + Arrays.asList(() -> seedNode), service, service.getConnectionManager(), Integer.MAX_VALUE, n -> true)) { updateSeedNodes(connection, Arrays.asList(() -> seedNode)); assertTrue(service.nodeConnected(seedNode)); assertTrue(service.nodeConnected(discoverableNode)); @@ -255,7 +258,7 @@ public void testDiscoverSingleNode() throws Exception { service.start(); service.acceptIncomingRequests(); try (RemoteClusterConnection connection = new RemoteClusterConnection(Settings.EMPTY, "test-cluster", - Arrays.asList(() -> seedNode), service, Integer.MAX_VALUE, n -> true)) { + Arrays.asList(() -> seedNode), service, service.getConnectionManager(), Integer.MAX_VALUE, n -> true)) { updateSeedNodes(connection, Arrays.asList(() -> seedNode)); assertTrue(service.nodeConnected(seedNode)); assertTrue(service.nodeConnected(discoverableNode)); @@ -284,7 +287,7 @@ public void testDiscoverSingleNodeWithIncompatibleSeed() throws Exception { service.start(); service.acceptIncomingRequests(); try (RemoteClusterConnection connection = new RemoteClusterConnection(Settings.EMPTY, "test-cluster", - seedNodes, service, Integer.MAX_VALUE, n -> true)) { + seedNodes, service, service.getConnectionManager(), Integer.MAX_VALUE, n -> true)) { updateSeedNodes(connection, seedNodes); assertTrue(service.nodeConnected(seedNode)); assertTrue(service.nodeConnected(discoverableNode)); @@ -311,7 +314,7 @@ public void testNodeDisconnected() throws Exception { service.start(); service.acceptIncomingRequests(); try (RemoteClusterConnection connection = new RemoteClusterConnection(Settings.EMPTY, "test-cluster", - Arrays.asList(() -> seedNode), service, Integer.MAX_VALUE, n -> true)) { + Arrays.asList(() -> seedNode), service, service.getConnectionManager(), Integer.MAX_VALUE, n -> true)) { updateSeedNodes(connection, Arrays.asList(() -> seedNode)); assertTrue(service.nodeConnected(seedNode)); assertTrue(service.nodeConnected(discoverableNode)); @@ -360,7 +363,8 @@ public void testFilterDiscoveredNodes() throws Exception { service.start(); service.acceptIncomingRequests(); try (RemoteClusterConnection connection = new RemoteClusterConnection(Settings.EMPTY, "test-cluster", - Arrays.asList(() -> seedNode), service, Integer.MAX_VALUE, n -> n.equals(rejectedNode) == false)) { + Arrays.asList(() -> seedNode), service, service.getConnectionManager(), Integer.MAX_VALUE, + n -> n.equals(rejectedNode) == false)) { updateSeedNodes(connection, Arrays.asList(() -> seedNode)); if (rejectedNode.equals(seedNode)) { assertFalse(service.nodeConnected(seedNode)); @@ -399,7 +403,7 @@ public void testConnectWithIncompatibleTransports() throws Exception { service.start(); service.acceptIncomingRequests(); try (RemoteClusterConnection connection = new RemoteClusterConnection(Settings.EMPTY, "test-cluster", - Arrays.asList(() -> seedNode), service, Integer.MAX_VALUE, n -> true)) { + Arrays.asList(() -> seedNode), service, service.getConnectionManager(), Integer.MAX_VALUE, n -> true)) { expectThrows(Exception.class, () -> updateSeedNodes(connection, Arrays.asList(() -> seedNode))); assertFalse(service.nodeConnected(seedNode)); assertTrue(connection.assertNoRunningConnections()); @@ -462,7 +466,7 @@ public void close() { service.start(); service.acceptIncomingRequests(); try (RemoteClusterConnection connection = new RemoteClusterConnection(Settings.EMPTY, "test-cluster", - Arrays.asList(() -> seedNode), service, Integer.MAX_VALUE, n -> true)) { + Arrays.asList(() -> seedNode), service, service.getConnectionManager(), Integer.MAX_VALUE, n -> true)) { connection.addConnectedNode(seedNode); for (DiscoveryNode node : knownNodes) { final Transport.Connection transportConnection = connection.getConnection(node); @@ -505,7 +509,7 @@ public void run() { CountDownLatch listenerCalled = new CountDownLatch(1); AtomicReference exceptionReference = new AtomicReference<>(); try (RemoteClusterConnection connection = new RemoteClusterConnection(Settings.EMPTY, "test-cluster", - Arrays.asList(() -> seedNode), service, Integer.MAX_VALUE, n -> true)) { + Arrays.asList(() -> seedNode), service, service.getConnectionManager(), Integer.MAX_VALUE, n -> true)) { ActionListener listener = ActionListener.wrap(x -> { listenerCalled.countDown(); fail("expected exception"); @@ -542,7 +546,7 @@ public void testFetchShards() throws Exception { service.acceptIncomingRequests(); List> nodes = Collections.singletonList(() -> seedNode); try (RemoteClusterConnection connection = new RemoteClusterConnection(Settings.EMPTY, "test-cluster", - nodes, service, Integer.MAX_VALUE, n -> true)) { + nodes, service, service.getConnectionManager(), Integer.MAX_VALUE, n -> true)) { if (randomBoolean()) { updateSeedNodes(connection, nodes); } @@ -582,7 +586,7 @@ public void testFetchShardsThreadContextHeader() throws Exception { service.acceptIncomingRequests(); List> nodes = Collections.singletonList(() -> seedNode); try (RemoteClusterConnection connection = new RemoteClusterConnection(Settings.EMPTY, "test-cluster", - nodes, service, Integer.MAX_VALUE, n -> true)) { + nodes, service, service.getConnectionManager(), Integer.MAX_VALUE, n -> true)) { SearchRequest request = new SearchRequest("test-index"); Thread[] threads = new Thread[10]; for (int i = 0; i < threads.length; i++) { @@ -636,7 +640,7 @@ public void testFetchShardsSkipUnavailable() throws Exception { service.start(); service.acceptIncomingRequests(); try (RemoteClusterConnection connection = new RemoteClusterConnection(Settings.EMPTY, "test-cluster", - Collections.singletonList(() -> seedNode), service, Integer.MAX_VALUE, n -> true)) { + Collections.singletonList(() -> seedNode), service, service.getConnectionManager(), Integer.MAX_VALUE, n -> true)) { SearchRequest request = new SearchRequest("test-index"); ClusterSearchShardsRequest searchShardsRequest = new ClusterSearchShardsRequest("test-index") @@ -746,7 +750,7 @@ public void testTriggerUpdatesConcurrently() throws IOException, InterruptedExce service.start(); service.acceptIncomingRequests(); try (RemoteClusterConnection connection = new RemoteClusterConnection(Settings.EMPTY, "test-cluster", - seedNodes, service, Integer.MAX_VALUE, n -> true)) { + seedNodes, service, service.getConnectionManager(), Integer.MAX_VALUE, n -> true)) { int numThreads = randomIntBetween(4, 10); Thread[] threads = new Thread[numThreads]; CyclicBarrier barrier = new CyclicBarrier(numThreads); @@ -824,7 +828,7 @@ public void testCloseWhileConcurrentlyConnecting() throws IOException, Interrupt service.start(); service.acceptIncomingRequests(); try (RemoteClusterConnection connection = new RemoteClusterConnection(Settings.EMPTY, "test-cluster", - seedNodes, service, Integer.MAX_VALUE, n -> true)) { + seedNodes, service, service.getConnectionManager(), Integer.MAX_VALUE, n -> true)) { int numThreads = randomIntBetween(4, 10); Thread[] threads = new Thread[numThreads]; CyclicBarrier barrier = new CyclicBarrier(numThreads + 1); @@ -913,7 +917,7 @@ public void testGetConnectionInfo() throws Exception { service.acceptIncomingRequests(); int maxNumConnections = randomIntBetween(1, 5); try (RemoteClusterConnection connection = new RemoteClusterConnection(Settings.EMPTY, "test-cluster", - seedNodes, service, maxNumConnections, n -> true)) { + seedNodes, service, service.connectionManager(), maxNumConnections, n -> true)) { // test no nodes connected RemoteConnectionInfo remoteConnectionInfo = assertSerialization(connection.getConnectionInfo()); assertNotNull(remoteConnectionInfo); @@ -1060,7 +1064,7 @@ public void testEnsureConnected() throws IOException, InterruptedException { service.start(); service.acceptIncomingRequests(); try (RemoteClusterConnection connection = new RemoteClusterConnection(Settings.EMPTY, "test-cluster", - Arrays.asList(() -> seedNode), service, Integer.MAX_VALUE, n -> true)) { + Arrays.asList(() -> seedNode), service, service.getConnectionManager(), Integer.MAX_VALUE, n -> true)) { assertFalse(service.nodeConnected(seedNode)); assertFalse(service.nodeConnected(discoverableNode)); assertTrue(connection.assertNoRunningConnections()); @@ -1109,7 +1113,7 @@ public void testCollectNodes() throws Exception { service.start(); service.acceptIncomingRequests(); try (RemoteClusterConnection connection = new RemoteClusterConnection(Settings.EMPTY, "test-cluster", - Arrays.asList(() -> seedNode), service, Integer.MAX_VALUE, n -> true)) { + Arrays.asList(() -> seedNode), service, service.getConnectionManager(), Integer.MAX_VALUE, n -> true)) { if (randomBoolean()) { updateSeedNodes(connection, Arrays.asList(() -> seedNode)); } @@ -1157,7 +1161,7 @@ public void testConnectedNodesConcurrentAccess() throws IOException, Interrupted service.start(); service.acceptIncomingRequests(); try (RemoteClusterConnection connection = new RemoteClusterConnection(Settings.EMPTY, "test-cluster", - seedNodes, service, Integer.MAX_VALUE, n -> true)) { + seedNodes, service, service.getConnectionManager(), Integer.MAX_VALUE, n -> true)) { final int numGetThreads = randomIntBetween(4, 10); final Thread[] getThreads = new Thread[numGetThreads]; final int numModifyingThreads = randomIntBetween(4, 10); @@ -1247,7 +1251,7 @@ public void testClusterNameIsChecked() throws Exception { service.start(); service.acceptIncomingRequests(); try (RemoteClusterConnection connection = new RemoteClusterConnection(Settings.EMPTY, "test-cluster", - Arrays.asList( () -> seedNode), service, Integer.MAX_VALUE, n -> true)) { + Arrays.asList( () -> seedNode), service, service.getConnectionManager(), Integer.MAX_VALUE, n -> true)) { updateSeedNodes(connection, Arrays.asList(() -> seedNode)); assertTrue(service.nodeConnected(seedNode)); assertTrue(service.nodeConnected(discoverableNode)); @@ -1327,7 +1331,7 @@ public void close() { service.start(); service.acceptIncomingRequests(); try (RemoteClusterConnection connection = new RemoteClusterConnection(Settings.EMPTY, "test-cluster", - Collections.singletonList(() -> connectedNode), service, Integer.MAX_VALUE, n -> true)) { + Collections.singletonList(() -> connectedNode), service, service.getConnectionManager(), Integer.MAX_VALUE, n -> true)) { connection.addConnectedNode(connectedNode); for (int i = 0; i < 10; i++) { //always a direct connection as the remote node is already connected @@ -1335,9 +1339,9 @@ public void close() { assertSame(seedConnection, remoteConnection); } for (int i = 0; i < 10; i++) { - //always a direct connection as the remote node is already connected + // we don't use the transport service connection manager so we will get a proxy connection for the local node Transport.Connection remoteConnection = connection.getConnection(service.getLocalNode()); - assertThat(remoteConnection, not(instanceOf(RemoteClusterConnection.ProxyConnection.class))); + assertThat(remoteConnection, instanceOf(RemoteClusterConnection.ProxyConnection.class)); assertThat(remoteConnection.getNode(), equalTo(service.getLocalNode())); } for (int i = 0; i < 10; i++) { @@ -1369,7 +1373,7 @@ public void testLazyResolveTransportAddress() throws Exception { return seedNode; }; try (RemoteClusterConnection connection = new RemoteClusterConnection(Settings.EMPTY, "test-cluster", - Arrays.asList(seedSupplier), service, Integer.MAX_VALUE, n -> true)) { + Arrays.asList(seedSupplier), service, service.getConnectionManager(), Integer.MAX_VALUE, n -> true)) { updateSeedNodes(connection, Arrays.asList(seedSupplier)); // Closing connections leads to RemoteClusterConnection.ConnectHandler.collectRemoteNodes // being called again so we try to resolve the same seed node's host twice diff --git a/server/src/test/java/org/elasticsearch/transport/RemoteClusterServiceTests.java b/server/src/test/java/org/elasticsearch/transport/RemoteClusterServiceTests.java index c94b1cbdef547..f1929e72d8b33 100644 --- a/server/src/test/java/org/elasticsearch/transport/RemoteClusterServiceTests.java +++ b/server/src/test/java/org/elasticsearch/transport/RemoteClusterServiceTests.java @@ -283,6 +283,7 @@ public void testRemoteNodeAttribute() throws IOException, InterruptedException { assertTrue(service.isRemoteClusterRegistered("cluster_2")); assertFalse(service.isRemoteNodeConnected("cluster_2", c2N1Node)); assertTrue(service.isRemoteNodeConnected("cluster_2", c2N2Node)); + assertEquals(0, transportService.getConnectionManager().size()); } } } @@ -347,6 +348,7 @@ public void testRemoteNodeRoles() throws IOException, InterruptedException { assertTrue(service.isRemoteClusterRegistered("cluster_2")); assertFalse(service.isRemoteNodeConnected("cluster_2", c2N1Node)); assertTrue(service.isRemoteNodeConnected("cluster_2", c2N2Node)); + assertEquals(0, transportService.getConnectionManager().size()); } } } @@ -579,14 +581,16 @@ public void testCollectSearchShards() throws Exception { } CountDownLatch disconnectedLatch = new CountDownLatch(numDisconnectedClusters); - service.addConnectionListener(new TransportConnectionListener() { - @Override - public void onNodeDisconnected(DiscoveryNode node) { - if (disconnectedNodes.remove(node)) { - disconnectedLatch.countDown(); + for (RemoteClusterConnection connection : remoteClusterService.getConnections()) { + connection.getConnectionManager().addListener(new TransportConnectionListener() { + @Override + public void onNodeDisconnected(DiscoveryNode node) { + if (disconnectedNodes.remove(node)) { + disconnectedLatch.countDown(); + } } - } - }); + }); + } for (DiscoveryNode disconnectedNode : disconnectedNodes) { service.addFailToSendNoConnectRule(disconnectedNode.getAddress()); @@ -664,6 +668,7 @@ public void onNodeDisconnected(DiscoveryNode node) { assertTrue(shardsResponse != ClusterSearchShardsResponse.EMPTY); } } + assertEquals(0, service.getConnectionManager().size()); } } } finally { From 324d3a5b543fd264af189ef4843964dc1cd832f0 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Mon, 20 Aug 2018 22:29:50 +0200 Subject: [PATCH 2/3] add missing file --- .../test/transport/StubbableConnectionManager.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/test/transport/StubbableConnectionManager.java b/test/framework/src/main/java/org/elasticsearch/test/transport/StubbableConnectionManager.java index 486ccc805d055..012369feb839f 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/transport/StubbableConnectionManager.java +++ b/test/framework/src/main/java/org/elasticsearch/test/transport/StubbableConnectionManager.java @@ -120,8 +120,8 @@ public void disconnectFromNode(DiscoveryNode node) { } @Override - public int connectedNodeCount() { - return delegate.connectedNodeCount(); + public int size() { + return delegate.size(); } @Override From 496bcd7c2dcbec94638e1ad6cecfacede705a30e Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Tue, 21 Aug 2018 09:20:20 +0200 Subject: [PATCH 3/3] Fix whitespace issue and add connection manager javaodoc --- .../org/elasticsearch/transport/RemoteClusterConnection.java | 1 + .../java/org/elasticsearch/transport/RemoteClusterService.java | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/transport/RemoteClusterConnection.java b/server/src/main/java/org/elasticsearch/transport/RemoteClusterConnection.java index c38b424e9a704..5621b38557814 100644 --- a/server/src/main/java/org/elasticsearch/transport/RemoteClusterConnection.java +++ b/server/src/main/java/org/elasticsearch/transport/RemoteClusterConnection.java @@ -99,6 +99,7 @@ final class RemoteClusterConnection extends AbstractComponent implements Transpo * @param clusterAlias the configured alias of the cluster to connect to * @param seedNodes a list of seed nodes to discover eligible nodes from * @param transportService the local nodes transport service + * @param connectionManager the connection manager to use for this remote connection * @param maxNumRemoteConnections the maximum number of connections to the remote cluster * @param nodePredicate a predicate to filter eligible remote nodes to connect to */ diff --git a/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java b/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java index 2e24b230cf4fb..34f13b672874f 100644 --- a/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java +++ b/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java @@ -141,7 +141,7 @@ private synchronized void updateRemoteClusters(Map