Skip to content

Commit ded6f3d

Browse files
authored
Fix flaky testRebalanceOnlyAfterAllShardsAreActive (#94102)
1 parent cdb4a3b commit ded6f3d

File tree

1 file changed

+16
-44
lines changed

1 file changed

+16
-44
lines changed

server/src/test/java/org/elasticsearch/cluster/routing/allocation/RebalanceAfterActiveTests.java

Lines changed: 16 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,13 @@
1313
import org.elasticsearch.Version;
1414
import org.elasticsearch.action.ActionListener;
1515
import org.elasticsearch.cluster.ClusterInfo;
16+
import org.elasticsearch.cluster.ClusterName;
1617
import org.elasticsearch.cluster.ClusterState;
1718
import org.elasticsearch.cluster.ESAllocationTestCase;
1819
import org.elasticsearch.cluster.TestShardRoutingRoleStrategies;
1920
import org.elasticsearch.cluster.metadata.IndexMetadata;
2021
import org.elasticsearch.cluster.metadata.Metadata;
2122
import org.elasticsearch.cluster.node.DiscoveryNodes;
22-
import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
2323
import org.elasticsearch.cluster.routing.RoutingNode;
2424
import org.elasticsearch.cluster.routing.RoutingNodes;
2525
import org.elasticsearch.cluster.routing.RoutingTable;
@@ -34,11 +34,11 @@
3434
import static org.elasticsearch.cluster.routing.ShardRoutingState.UNASSIGNED;
3535
import static org.hamcrest.Matchers.equalTo;
3636
import static org.hamcrest.Matchers.nullValue;
37+
import static org.hamcrest.Matchers.oneOf;
3738

3839
public class RebalanceAfterActiveTests extends ESAllocationTestCase {
3940
private final Logger logger = LogManager.getLogger(RebalanceAfterActiveTests.class);
4041

41-
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/94086")
4242
public void testRebalanceOnlyAfterAllShardsAreActive() {
4343
final long[] sizes = new long[5];
4444
for (int i = 0; i < sizes.length; i++) {
@@ -63,18 +63,13 @@ public Long getShardSize(ShardRouting shardRouting) {
6363
);
6464
logger.info("Building initial routing table");
6565

66-
Metadata metadata = Metadata.builder()
67-
.put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(5).numberOfReplicas(1))
68-
.build();
66+
var indexMetadata = IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(5).numberOfReplicas(1).build();
6967

70-
RoutingTable initialRoutingTable = RoutingTable.builder(TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY)
71-
.addAsNew(metadata.index("test"))
68+
ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT)
69+
.metadata(Metadata.builder().put(indexMetadata, false))
70+
.routingTable(RoutingTable.builder(TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY).addAsNew(indexMetadata))
7271
.build();
7372

74-
ClusterState clusterState = ClusterState.builder(
75-
org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)
76-
).metadata(metadata).routingTable(initialRoutingTable).build();
77-
7873
assertThat(clusterState.routingTable().index("test").size(), equalTo(5));
7974
for (int i = 0; i < clusterState.routingTable().index("test").size(); i++) {
8075
assertThat(clusterState.routingTable().index("test").shard(i).size(), equalTo(2));
@@ -93,6 +88,7 @@ public Long getShardSize(ShardRouting shardRouting) {
9388
for (int i = 0; i < clusterState.routingTable().index("test").size(); i++) {
9489
assertThat(clusterState.routingTable().index("test").shard(i).size(), equalTo(2));
9590
assertThat(clusterState.routingTable().index("test").shard(i).primaryShard().state(), equalTo(INITIALIZING));
91+
assertThat(clusterState.routingTable().index("test").shard(i).primaryShard().getExpectedShardSize(), equalTo(sizes[i]));
9692
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().get(0).state(), equalTo(UNASSIGNED));
9793
}
9894

@@ -103,7 +99,7 @@ public Long getShardSize(ShardRouting shardRouting) {
10399
assertThat(clusterState.routingTable().index("test").shard(i).size(), equalTo(2));
104100
assertThat(clusterState.routingTable().index("test").shard(i).primaryShard().state(), equalTo(STARTED));
105101
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().get(0).state(), equalTo(INITIALIZING));
106-
assertEquals(clusterState.routingTable().index("test").shard(i).replicaShards().get(0).getExpectedShardSize(), sizes[i]);
102+
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().get(0).getExpectedShardSize(), equalTo(sizes[i]));
107103
}
108104

109105
logger.info("now, start 8 more nodes, and check that no rebalancing/relocation have happened");
@@ -126,44 +122,20 @@ public Long getShardSize(ShardRouting shardRouting) {
126122
assertThat(clusterState.routingTable().index("test").shard(i).size(), equalTo(2));
127123
assertThat(clusterState.routingTable().index("test").shard(i).primaryShard().state(), equalTo(STARTED));
128124
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().get(0).state(), equalTo(INITIALIZING));
129-
assertEquals(clusterState.routingTable().index("test").shard(i).replicaShards().get(0).getExpectedShardSize(), sizes[i]);
130-
125+
assertThat(clusterState.routingTable().index("test").shard(i).replicaShards().get(0).getExpectedShardSize(), equalTo(sizes[i]));
131126
}
132127

133128
logger.info("start the replica shards, rebalancing should start");
134129
clusterState = startInitializingShardsAndReroute(strategy, clusterState);
135130

136-
// we only allow one relocation at a time
137-
assertThat(shardsWithState(clusterState.getRoutingNodes(), STARTED).size(), equalTo(5));
138-
assertThat(shardsWithState(clusterState.getRoutingNodes(), RELOCATING).size(), equalTo(5));
139-
for (int shardId = 0; shardId < clusterState.routingTable().index("test").size(); shardId++) {
140-
int num = 0;
141-
final IndexShardRoutingTable shardRoutingTable = clusterState.routingTable().index("test").shard(shardId);
142-
for (int copy = 0; copy < shardRoutingTable.size(); copy++) {
143-
ShardRouting routing = shardRoutingTable.shard(copy);
144-
if (routing.state() == RELOCATING || routing.state() == INITIALIZING) {
145-
assertEquals(routing.getExpectedShardSize(), sizes[shardId]);
146-
num++;
147-
}
148-
}
149-
assertTrue(num > 0);
150-
}
151-
152-
logger.info("complete relocation, other half of relocation should happen");
153-
clusterState = startInitializingShardsAndReroute(strategy, clusterState);
131+
// both primary and replica should not be rebalanced at once so 5 replicas should start moving
132+
// unless we computed the balance where one of the indices already have both primary and replica on desired nodes
133+
// in such case only 4 shards are immediately relocating
134+
assertThat(shardsWithState(clusterState.getRoutingNodes(), STARTED).size(), oneOf(5, 6));
135+
assertThat(shardsWithState(clusterState.getRoutingNodes(), RELOCATING).size(), oneOf(4, 5));
154136

155-
// we now only relocate 3, since 2 remain where they are!
156-
assertThat(shardsWithState(clusterState.getRoutingNodes(), STARTED).size(), equalTo(7));
157-
assertThat(shardsWithState(clusterState.getRoutingNodes(), RELOCATING).size(), equalTo(3));
158-
for (int i = 0; i < clusterState.routingTable().index("test").size(); i++) {
159-
final IndexShardRoutingTable shardRoutingTable = clusterState.routingTable().index("test").shard(i);
160-
for (int j = 0; j < shardRoutingTable.size(); j++) {
161-
ShardRouting routing = shardRoutingTable.shard(j);
162-
if (routing.state() == RELOCATING || routing.state() == INITIALIZING) {
163-
assertEquals(routing.getExpectedShardSize(), sizes[i]);
164-
}
165-
}
166-
}
137+
logger.info("complete all relocations");
138+
clusterState = applyStartedShardsUntilNoChange(clusterState, strategy);
167139

168140
logger.info("complete relocation, that's it!");
169141
clusterState = startInitializingShardsAndReroute(strategy, clusterState);

0 commit comments

Comments
 (0)