|
16 | 16 | import org.elasticsearch.action.get.GetResponse;
|
17 | 17 | import org.elasticsearch.action.index.IndexRequestBuilder;
|
18 | 18 | import org.elasticsearch.action.index.IndexResponse;
|
| 19 | +import org.elasticsearch.action.support.PlainActionFuture; |
19 | 20 | import org.elasticsearch.client.Client;
|
20 | 21 | import org.elasticsearch.cluster.ClusterState;
|
21 | 22 | import org.elasticsearch.cluster.action.shard.ShardStateAction;
|
22 | 23 | import org.elasticsearch.cluster.coordination.ClusterBootstrapService;
|
| 24 | +import org.elasticsearch.cluster.coordination.FollowersChecker; |
23 | 25 | import org.elasticsearch.cluster.coordination.LagDetector;
|
24 | 26 | import org.elasticsearch.cluster.metadata.IndexMetadata;
|
25 | 27 | import org.elasticsearch.cluster.routing.Murmur3HashFunction;
|
26 | 28 | import org.elasticsearch.cluster.routing.ShardRouting;
|
27 | 29 | import org.elasticsearch.cluster.routing.ShardRoutingState;
|
| 30 | +import org.elasticsearch.cluster.service.ClusterService; |
28 | 31 | import org.elasticsearch.common.settings.Settings;
|
29 |
| -import org.elasticsearch.core.TimeValue; |
30 | 32 | import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
|
31 | 33 | import org.elasticsearch.common.xcontent.XContentType;
|
| 34 | +import org.elasticsearch.core.TimeValue; |
32 | 35 | import org.elasticsearch.index.VersionType;
|
33 | 36 | import org.elasticsearch.index.shard.IndexShard;
|
34 | 37 | import org.elasticsearch.index.shard.IndexShardTestCase;
|
|
40 | 43 | import org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions;
|
41 | 44 | import org.elasticsearch.test.disruption.ServiceDisruptionScheme;
|
42 | 45 | import org.elasticsearch.test.junit.annotations.TestIssueLogging;
|
| 46 | +import org.elasticsearch.test.transport.MockTransportService; |
| 47 | +import org.elasticsearch.transport.TransportService; |
43 | 48 |
|
44 | 49 | import java.util.ArrayList;
|
45 | 50 | import java.util.Collections;
|
|
59 | 64 |
|
60 | 65 | import static org.elasticsearch.action.DocWriteResponse.Result.CREATED;
|
61 | 66 | import static org.elasticsearch.action.DocWriteResponse.Result.UPDATED;
|
| 67 | +import static org.elasticsearch.cluster.coordination.FollowersChecker.FOLLOWER_CHECK_INTERVAL_SETTING; |
| 68 | +import static org.elasticsearch.cluster.coordination.FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING; |
| 69 | +import static org.elasticsearch.cluster.coordination.LeaderChecker.LEADER_CHECK_INTERVAL_SETTING; |
| 70 | +import static org.elasticsearch.cluster.coordination.LeaderChecker.LEADER_CHECK_RETRY_COUNT_SETTING; |
| 71 | +import static org.elasticsearch.discovery.PeerFinder.DISCOVERY_FIND_PEERS_INTERVAL_SETTING; |
62 | 72 | import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
|
63 | 73 | import static org.hamcrest.Matchers.equalTo;
|
64 | 74 | import static org.hamcrest.Matchers.everyItem;
|
@@ -494,4 +504,63 @@ public void testRestartNodeWhileIndexing() throws Exception {
|
494 | 504 | }
|
495 | 505 | }
|
496 | 506 |
|
| 507 | + public void testRejoinWhileBeingRemoved() { |
| 508 | + final String masterNode = internalCluster().startMasterOnlyNode(Settings.builder() |
| 509 | + .put(FOLLOWER_CHECK_INTERVAL_SETTING.getKey(), "100ms") |
| 510 | + .put(FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), "1") |
| 511 | + .build()); |
| 512 | + final String dataNode = internalCluster().startDataOnlyNode(Settings.builder() |
| 513 | + .put(DISCOVERY_FIND_PEERS_INTERVAL_SETTING.getKey(), "100ms") |
| 514 | + .put(LEADER_CHECK_INTERVAL_SETTING.getKey(), "100ms") |
| 515 | + .put(LEADER_CHECK_RETRY_COUNT_SETTING.getKey(), "1") |
| 516 | + .build()); |
| 517 | + |
| 518 | + final ClusterService masterClusterService = internalCluster().getInstance(ClusterService.class, masterNode); |
| 519 | + final PlainActionFuture<Void> removedNode = new PlainActionFuture<>(); |
| 520 | + masterClusterService.addListener(clusterChangedEvent -> { |
| 521 | + if (removedNode.isDone() == false && clusterChangedEvent.state().nodes().getDataNodes().isEmpty()) { |
| 522 | + removedNode.onResponse(null); |
| 523 | + } |
| 524 | + }); |
| 525 | + |
| 526 | + final ClusterService dataClusterService = internalCluster().getInstance(ClusterService.class, dataNode); |
| 527 | + final PlainActionFuture<Void> failedLeader = new PlainActionFuture<>() { |
| 528 | + @Override |
| 529 | + protected boolean blockingAllowed() { |
| 530 | + // we're deliberately blocking the cluster applier on the master until the data node starts to rejoin |
| 531 | + return true; |
| 532 | + } |
| 533 | + }; |
| 534 | + final AtomicBoolean dataNodeHasMaster = new AtomicBoolean(true); |
| 535 | + dataClusterService.addListener(clusterChangedEvent -> { |
| 536 | + dataNodeHasMaster.set(clusterChangedEvent.state().nodes().getMasterNode() != null); |
| 537 | + if (failedLeader.isDone() == false && dataNodeHasMaster.get() == false) { |
| 538 | + failedLeader.onResponse(null); |
| 539 | + } |
| 540 | + }); |
| 541 | + |
| 542 | + masterClusterService.addHighPriorityApplier(event -> { |
| 543 | + failedLeader.actionGet(); |
| 544 | + if (dataNodeHasMaster.get() == false) { |
| 545 | + try { |
| 546 | + Thread.sleep(100); |
| 547 | + } catch (InterruptedException e) { |
| 548 | + throw new AssertionError("unexpected", e); |
| 549 | + } |
| 550 | + } |
| 551 | + }); |
| 552 | + |
| 553 | + final MockTransportService dataTransportService |
| 554 | + = (MockTransportService) internalCluster().getInstance(TransportService.class, dataNode); |
| 555 | + dataTransportService.addRequestHandlingBehavior(FollowersChecker.FOLLOWER_CHECK_ACTION_NAME, (handler, request, channel, task) -> { |
| 556 | + if (removedNode.isDone() == false) { |
| 557 | + channel.sendResponse(new ElasticsearchException("simulated check failure")); |
| 558 | + } else { |
| 559 | + handler.messageReceived(request, channel, task); |
| 560 | + } |
| 561 | + }); |
| 562 | + |
| 563 | + removedNode.actionGet(10, TimeUnit.SECONDS); |
| 564 | + ensureStableCluster(2); |
| 565 | + } |
497 | 566 | }
|
0 commit comments