|
38 | 38 | import org.elasticsearch.cluster.metadata.IndexMetaData;
|
39 | 39 | import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
|
40 | 40 | import org.elasticsearch.cluster.metadata.MetaData;
|
| 41 | +import org.elasticsearch.cluster.node.DiscoveryNodes; |
41 | 42 | import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
|
42 | 43 | import org.elasticsearch.cluster.routing.ShardIterator;
|
43 | 44 | import org.elasticsearch.cluster.routing.ShardRouting;
|
|
56 | 57 | import org.elasticsearch.index.shard.ShardId;
|
57 | 58 | import org.elasticsearch.index.shard.ShardNotFoundException;
|
58 | 59 | import org.elasticsearch.rest.RestStatus;
|
| 60 | +import org.elasticsearch.test.ESAllocationTestCase; |
59 | 61 | import org.elasticsearch.test.ESTestCase;
|
60 | 62 | import org.elasticsearch.test.cluster.TestClusterService;
|
61 | 63 | import org.elasticsearch.test.transport.CapturingTransport;
|
|
72 | 74 |
|
73 | 75 | import java.io.IOException;
|
74 | 76 | import java.util.ArrayList;
|
| 77 | +import java.util.Arrays; |
75 | 78 | import java.util.HashMap;
|
76 | 79 | import java.util.HashSet;
|
77 | 80 | import java.util.List;
|
@@ -217,6 +220,59 @@ public void testNotStartedPrimary() throws InterruptedException, ExecutionExcept
|
217 | 220 | assertIndexShardCounter(1);
|
218 | 221 | }
|
219 | 222 |
|
| 223 | + /** |
| 224 | + * When relocating a primary shard, there is a cluster state update at the end of relocation where the active primary is switched from |
| 225 | + * the relocation source to the relocation target. If relocation source receives and processes this cluster state |
| 226 | + * before the relocation target, there is a time span where relocation source believes active primary to be on |
| 227 | + * relocation target and relocation target believes active primary to be on relocation source. This results in replication |
| 228 | + * requests being sent back and forth. |
| 229 | + * |
| 230 | + * This test checks that replication request is not routed back from relocation target to relocation source in case of |
| 231 | + * stale index routing table on relocation target. |
| 232 | + */ |
| 233 | + @Test |
| 234 | + public void testNoRerouteOnStaleClusterState() throws InterruptedException, ExecutionException { |
| 235 | + final String index = "test"; |
| 236 | + final ShardId shardId = new ShardId(index, 0); |
| 237 | + ClusterState state = state(index, true, ShardRoutingState.RELOCATING); |
| 238 | + IndexShardRoutingTable shardRoutingTable = state.getRoutingTable().shardRoutingTable(shardId.getIndex(), shardId.id()); |
| 239 | + String relocationTargetNode = shardRoutingTable.primaryShard().relocatingNodeId(); |
| 240 | + state = ClusterState.builder(state).nodes(DiscoveryNodes.builder(state.nodes()).localNodeId(relocationTargetNode)).build(); |
| 241 | + clusterService.setState(state); |
| 242 | + logger.debug("--> relocation ongoing state:\n{}", clusterService.state().prettyPrint()); |
| 243 | + |
| 244 | + Request request = new Request(shardId).timeout("1ms").routedBasedOnClusterVersion(clusterService.state().version() + 1); |
| 245 | + PlainActionFuture<Response> listener = new PlainActionFuture<>(); |
| 246 | + TransportReplicationAction.ReroutePhase reroutePhase = action.new ReroutePhase(null, request, listener); |
| 247 | + reroutePhase.run(); |
| 248 | + assertListenerThrows("cluster state too old didn't cause a timeout", listener, UnavailableShardsException.class); |
| 249 | + |
| 250 | + request = new Request(shardId).routedBasedOnClusterVersion(clusterService.state().version() + 1); |
| 251 | + listener = new PlainActionFuture<>(); |
| 252 | + reroutePhase = action.new ReroutePhase(null, request, listener); |
| 253 | + reroutePhase.run(); |
| 254 | + assertFalse("cluster state too old didn't cause a retry", listener.isDone()); |
| 255 | + |
| 256 | + // finish relocation |
| 257 | + shardRoutingTable = clusterService.state().getRoutingTable().shardRoutingTable(shardId.getIndex(), shardId.id()); |
| 258 | + ShardRouting relocationTarget = shardRoutingTable.shardsWithState(ShardRoutingState.INITIALIZING).get(0); |
| 259 | + AllocationService allocationService = ESAllocationTestCase.createAllocationService(); |
| 260 | + RoutingAllocation.Result result = allocationService.applyStartedShards(state, Arrays.asList(relocationTarget)); |
| 261 | + ClusterState updatedState = ClusterState.builder(clusterService.state()).routingResult(result).build(); |
| 262 | + |
| 263 | + clusterService.setState(updatedState); |
| 264 | + logger.debug("--> relocation complete state:\n{}", clusterService.state().prettyPrint()); |
| 265 | + |
| 266 | + shardRoutingTable = clusterService.state().routingTable().index(index).shard(shardId.id()); |
| 267 | + final String primaryNodeId = shardRoutingTable.primaryShard().currentNodeId(); |
| 268 | + final List<CapturingTransport.CapturedRequest> capturedRequests = |
| 269 | + transport.capturedRequestsByTargetNode().get(primaryNodeId); |
| 270 | + assertThat(capturedRequests, notNullValue()); |
| 271 | + assertThat(capturedRequests.size(), equalTo(1)); |
| 272 | + assertThat(capturedRequests.get(0).action, equalTo("testAction[p]")); |
| 273 | + assertIndexShardCounter(1); |
| 274 | + } |
| 275 | + |
220 | 276 | @Test
|
221 | 277 | public void testUnknownIndexOrShardOnReroute() throws InterruptedException {
|
222 | 278 | final String index = "test";
|
|
0 commit comments