Skip to content

Commit c2a47d8

Browse files
committed
Simulate moves using cluster_concurrent_rebalance=1
BalancedShardAllocator is prone to perform unnecessary moves when cluster_concurrent_rebalance is set to high values (>2). (See elastic#87279) Above allocator is used in DesiredBalanceComputer. Since we do not move actual shard data during calculation it is possible to artificially set above setting to 1 to avoid unnecessary moves in desired balance.
1 parent 6c56c38 commit c2a47d8

File tree

3 files changed

+94
-6
lines changed

3 files changed

+94
-6
lines changed

server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ConcurrentRebalanceAllocationDecider.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,17 @@ public Decision canRebalance(ShardRouting shardRouting, RoutingAllocation alloca
6262

6363
@Override
6464
public Decision canRebalance(RoutingAllocation allocation) {
65+
int relocatingShards = allocation.routingNodes().getRelocatingShardCount();
66+
if (allocation.isSimulating() && relocatingShards >= 1) {
67+
// BalancedShardAllocator is prone to perform unnecessary moves when cluster_concurrent_rebalance is set to high values (>2).
68+
// (See https://github.com/elastic/elasticsearch/issues/87279)
69+
// Above allocator is used in DesiredBalanceComputer. Since we do not move actual shard data during calculation
70+
// it is possible to artificially set above setting to 1 to avoid unnecessary moves in desired balance.
71+
return allocation.decision(Decision.THROTTLE, NAME, "allocation should move one shard at the time when simulating");
72+
}
6573
if (clusterConcurrentRebalance == -1) {
6674
return allocation.decision(Decision.YES, NAME, "unlimited concurrent rebalances are allowed");
6775
}
68-
int relocatingShards = allocation.routingNodes().getRelocatingShardCount();
6976
if (relocatingShards >= clusterConcurrentRebalance) {
7077
return allocation.decision(
7178
Decision.THROTTLE,

server/src/test/java/org/elasticsearch/cluster/routing/allocation/IndexBalanceTests.java

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,27 @@
1212
import org.apache.logging.log4j.Logger;
1313
import org.elasticsearch.Version;
1414
import org.elasticsearch.action.ActionListener;
15+
import org.elasticsearch.cluster.ClusterName;
1516
import org.elasticsearch.cluster.ClusterState;
1617
import org.elasticsearch.cluster.ESAllocationTestCase;
1718
import org.elasticsearch.cluster.TestShardRoutingRoleStrategies;
1819
import org.elasticsearch.cluster.metadata.IndexMetadata;
1920
import org.elasticsearch.cluster.metadata.Metadata;
2021
import org.elasticsearch.cluster.node.DiscoveryNodes;
22+
import org.elasticsearch.cluster.routing.AllocationId;
23+
import org.elasticsearch.cluster.routing.IndexRoutingTable;
2124
import org.elasticsearch.cluster.routing.RoutingNodes;
2225
import org.elasticsearch.cluster.routing.RoutingTable;
26+
import org.elasticsearch.cluster.routing.ShardRoutingState;
27+
import org.elasticsearch.cluster.routing.TestShardRouting;
28+
import org.elasticsearch.cluster.routing.allocation.allocator.DesiredBalanceShardsAllocator;
2329
import org.elasticsearch.cluster.routing.allocation.decider.ClusterRebalanceAllocationDecider;
30+
import org.elasticsearch.common.UUIDs;
2431
import org.elasticsearch.common.settings.Settings;
32+
import org.elasticsearch.index.shard.ShardId;
33+
34+
import java.util.Set;
35+
import java.util.function.IntFunction;
2536

2637
import static org.elasticsearch.cluster.routing.ShardRoutingState.INITIALIZING;
2738
import static org.elasticsearch.cluster.routing.ShardRoutingState.STARTED;
@@ -440,4 +451,76 @@ public void testBalanceAllNodesStartedAddIndex() {
440451
assertThat(routingNodes.node("node2").shardsWithState("test1", STARTED).count(), equalTo(2L));
441452
assertThat(routingNodes.node("node3").shardsWithState("test1", STARTED).count(), equalTo(2L));
442453
}
454+
455+
/**
456+
* {@see https://github.com/elastic/elasticsearch/issues/87279}
457+
*/
458+
public void testRebalance() {
459+
final var settings = Settings.builder()
460+
.put("cluster.routing.allocation.cluster_concurrent_rebalance", randomIntBetween(3, 9))
461+
.build();
462+
final var allocationService = createAllocationService(settings);
463+
assumeTrue("", allocationService.shardsAllocator instanceof DesiredBalanceShardsAllocator);
464+
465+
final var discoveryNodesBuilder = DiscoveryNodes.builder();
466+
for (int nodeIndex = 0; nodeIndex < 3; nodeIndex++) {
467+
discoveryNodesBuilder.add(newNode("node-" + nodeIndex));
468+
}
469+
470+
final var metadataBuilder = Metadata.builder();
471+
final var routingTableBuilder = RoutingTable.builder();
472+
473+
addIndex(metadataBuilder, routingTableBuilder, "index-0", 4, shardId -> "node-" + (shardId / 2));
474+
addIndex(metadataBuilder, routingTableBuilder, "index-1", 4, shardId -> "node-" + (shardId / 2));
475+
addIndex(metadataBuilder, routingTableBuilder, "index-2", 1, shardId -> "node-2");
476+
477+
final var clusterState = ClusterState.builder(ClusterName.DEFAULT)
478+
.nodes(discoveryNodesBuilder)
479+
.metadata(metadataBuilder)
480+
.routingTable(routingTableBuilder)
481+
.build();
482+
483+
final var reroutedState = applyStartedShardsUntilNoChange(clusterState, allocationService);
484+
// node-0 and node-1 has 4 shards each. node-2 has only [index-2][0]. It should not be moved to achieve even balance.
485+
assertThat(reroutedState.getRoutingTable().index("index-2").shard(0).primaryShard().currentNodeId(), equalTo("node-2"));
486+
}
487+
488+
private static void addIndex(
489+
Metadata.Builder metadataBuilder,
490+
RoutingTable.Builder routingTableBuilder,
491+
String indexName,
492+
int numberOfShards,
493+
IntFunction<String> assignmentFunction
494+
) {
495+
final var inSyncIds = randomList(numberOfShards, numberOfShards, () -> UUIDs.randomBase64UUID(random()));
496+
final var indexMetadataBuilder = IndexMetadata.builder(indexName)
497+
.settings(
498+
Settings.builder()
499+
.put("index.number_of_shards", numberOfShards)
500+
.put("index.number_of_replicas", 0)
501+
.put("index.version.created", Version.CURRENT)
502+
.build()
503+
);
504+
for (int shardId = 0; shardId < numberOfShards; shardId++) {
505+
indexMetadataBuilder.putInSyncAllocationIds(shardId, Set.of(inSyncIds.get(shardId)));
506+
}
507+
metadataBuilder.put(indexMetadataBuilder);
508+
final var indexId = metadataBuilder.get(indexName).getIndex();
509+
final var indexRoutingTableBuilder = IndexRoutingTable.builder(indexId);
510+
511+
for (int shardId = 0; shardId < numberOfShards; shardId++) {
512+
indexRoutingTableBuilder.addShard(
513+
TestShardRouting.newShardRouting(
514+
new ShardId(indexId, shardId),
515+
assignmentFunction.apply(shardId),
516+
null,
517+
true,
518+
ShardRoutingState.STARTED,
519+
AllocationId.newInitializing(inSyncIds.get(shardId))
520+
)
521+
);
522+
}
523+
524+
routingTableBuilder.add(indexRoutingTableBuilder);
525+
}
443526
}

test/framework/src/main/java/org/elasticsearch/cluster/ESAllocationTestCase.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,8 @@ protected static class MockAllocationService extends AllocationService {
337337

338338
private volatile long nanoTimeOverride = -1L;
339339

340-
private final GatewayAllocator gatewayAllocator;
340+
public final GatewayAllocator gatewayAllocator;
341+
public final ShardsAllocator shardsAllocator;
341342

342343
public MockAllocationService(
343344
AllocationDeciders allocationDeciders,
@@ -355,6 +356,7 @@ public MockAllocationService(
355356
TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY
356357
);
357358
this.gatewayAllocator = gatewayAllocator;
359+
this.shardsAllocator = shardsAllocator;
358360
}
359361

360362
public void setNanoTimeOverride(long nanoTime) {
@@ -365,10 +367,6 @@ public void setNanoTimeOverride(long nanoTime) {
365367
protected long currentNanoTime() {
366368
return nanoTimeOverride == -1L ? super.currentNanoTime() : nanoTimeOverride;
367369
}
368-
369-
public GatewayAllocator getGatewayAllocator() {
370-
return gatewayAllocator;
371-
}
372370
}
373371

374372
/**

0 commit comments

Comments
 (0)