Skip to content

Commit 4952735

Browse files
committed
Wait for blackholed connection before discovery
Since elastic#42636 we no longer treat connections specially when simulating a blackholed connection. This means that at the end of the safety phase we may have just started a connection attempt which will time out, but the default timeout is 30 seconds, much longer than the 2 seconds we normally allow for post-safety-phase discovery. This commit adds time for such a connection attempt to time out. It also fixes some spurious logging of `this` that now refers to an object with an unhelpful `toString()` implementation introduced in elastic#42636. Fixes elastic#44073
1 parent 18d4e9d commit 4952735

File tree

2 files changed

+10
-7
lines changed

2 files changed

+10
-7
lines changed

server/src/main/java/org/elasticsearch/discovery/HandshakingTransportAddressConnector.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,23 +70,24 @@ public HandshakingTransportAddressConnector(Settings settings, TransportService
7070
@Override
7171
public void connectToRemoteMasterNode(TransportAddress transportAddress, ActionListener<DiscoveryNode> listener) {
7272
transportService.getThreadPool().generic().execute(new AbstractRunnable() {
73+
private final AbstractRunnable thisConnectionAttempt = this;
74+
7375
@Override
7476
protected void doRun() {
75-
7677
// TODO if transportService is already connected to this address then skip the handshaking
7778

7879
final DiscoveryNode targetNode = new DiscoveryNode("", transportAddress.toString(),
7980
UUIDs.randomBase64UUID(Randomness.get()), // generated deterministically for reproducible tests
8081
transportAddress.address().getHostString(), transportAddress.getAddress(), transportAddress, emptyMap(),
8182
emptySet(), Version.CURRENT.minimumCompatibilityVersion());
8283

83-
logger.trace("[{}] opening probe connection", this);
84+
logger.trace("[{}] opening probe connection", thisConnectionAttempt);
8485
transportService.openConnection(targetNode,
8586
ConnectionProfile.buildSingleChannelProfile(Type.REG, probeConnectTimeout, probeHandshakeTimeout,
8687
TimeValue.MINUS_ONE, null), new ActionListener<>() {
8788
@Override
8889
public void onResponse(Connection connection) {
89-
logger.trace("[{}] opened probe connection", this);
90+
logger.trace("[{}] opened probe connection", thisConnectionAttempt);
9091

9192
// use NotifyOnceListener to make sure the following line does not result in onFailure being called when
9293
// the connection is closed in the onResponse handler
@@ -96,7 +97,7 @@ public void onResponse(Connection connection) {
9697
protected void innerOnResponse(DiscoveryNode remoteNode) {
9798
try {
9899
// success means (amongst other things) that the cluster names match
99-
logger.trace("[{}] handshake successful: {}", this, remoteNode);
100+
logger.trace("[{}] handshake successful: {}", thisConnectionAttempt, remoteNode);
100101
IOUtils.closeWhileHandlingException(connection);
101102

102103
if (remoteNode.equals(transportService.getLocalNode())) {
@@ -109,7 +110,7 @@ protected void innerOnResponse(DiscoveryNode remoteNode) {
109110
transportService.connectToNode(remoteNode, new ActionListener<Void>() {
110111
@Override
111112
public void onResponse(Void ignored) {
112-
logger.trace("[{}] full connection successful: {}", this, remoteNode);
113+
logger.trace("[{}] full connection successful: {}", thisConnectionAttempt, remoteNode);
113114
listener.onResponse(remoteNode);
114115
}
115116

@@ -129,7 +130,7 @@ protected void innerOnFailure(Exception e) {
129130
// we opened a connection and successfully performed a low-level handshake, so we were definitely
130131
// talking to an Elasticsearch node, but the high-level handshake failed indicating some kind of
131132
// mismatched configurations (e.g. cluster name) that the user should address
132-
logger.warn(new ParameterizedMessage("handshake failed for [{}]", this), e);
133+
logger.warn(new ParameterizedMessage("handshake failed for [{}]", thisConnectionAttempt), e);
133134
IOUtils.closeWhileHandlingException(connection);
134135
listener.onFailure(e);
135136
}

test/framework/src/main/java/org/elasticsearch/cluster/coordination/AbstractCoordinatorTestCase.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@
121121
import static org.elasticsearch.gateway.GatewayService.STATE_NOT_RECOVERED_BLOCK;
122122
import static org.elasticsearch.node.Node.NODE_NAME_SETTING;
123123
import static org.elasticsearch.transport.TransportService.NOOP_TRANSPORT_INTERCEPTOR;
124+
import static org.elasticsearch.transport.TransportSettings.CONNECT_TIMEOUT;
124125
import static org.hamcrest.Matchers.empty;
125126
import static org.hamcrest.Matchers.equalTo;
126127
import static org.hamcrest.Matchers.greaterThan;
@@ -556,7 +557,8 @@ void bootstrapIfNecessary() {
556557
if (clusterNodes.stream().allMatch(ClusterNode::isNotUsefullyBootstrapped)) {
557558
assertThat("setting initial configuration may fail with disconnected nodes", disconnectedNodes, empty());
558559
assertThat("setting initial configuration may fail with blackholed nodes", blackholedNodes, empty());
559-
runFor(defaultMillis(DISCOVERY_FIND_PEERS_INTERVAL_SETTING) * 2, "discovery prior to setting initial configuration");
560+
runFor(defaultMillis(CONNECT_TIMEOUT) + // may be in a prior connection attempt which has been blackholed
561+
defaultMillis(DISCOVERY_FIND_PEERS_INTERVAL_SETTING) * 2, "discovery prior to setting initial configuration");
560562
final ClusterNode bootstrapNode = getAnyBootstrappableNode();
561563
bootstrapNode.applyInitialConfiguration();
562564
} else {

0 commit comments

Comments
 (0)