Skip to content

Commit 251f923

Browse files
authored
Take into account expectedShardSize when initializing shard in simulation (#95734)
1 parent e80ccde commit 251f923

File tree

4 files changed

+71
-31
lines changed

4 files changed

+71
-31
lines changed

docs/changelog/95734.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 95734
2+
summary: Take into account `expectedShardSize` when initializing shard in simulation
3+
area: Allocation
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/cluster/ClusterInfoSimulator.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,16 @@ public void simulateShardStarted(ShardRouting shard) {
5757
}
5858
}
5959

60-
private Long getEstimatedShardSize(ShardRouting routing) {
61-
if (routing.relocatingNodeId() != null) {
60+
private Long getEstimatedShardSize(ShardRouting shard) {
61+
if (shard.relocatingNodeId() != null) {
6262
// relocation existing shard, get size of the source shard
63-
return shardSizes.get(ClusterInfo.shardIdentifierFromRouting(routing));
64-
} else if (routing.primary() == false) {
63+
return shardSizes.get(ClusterInfo.shardIdentifierFromRouting(shard));
64+
} else if (shard.primary() == false) {
6565
// initializing new replica, get size of the source primary shard
66-
return shardSizes.get(ClusterInfo.shardIdentifierFromRouting(routing.shardId(), true));
66+
return shardSizes.get(ClusterInfo.shardIdentifierFromRouting(shard.shardId(), true));
6767
} else {
68-
// initializing new (empty) primary
69-
return 0L;
68+
// initializing new (empty?) primary
69+
return shard.getExpectedShardSize();
7070
}
7171
}
7272

server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/ClusterInfoSimulatorTests.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939

4040
import static org.elasticsearch.cluster.routing.ShardRoutingState.INITIALIZING;
4141
import static org.elasticsearch.cluster.routing.ShardRoutingState.STARTED;
42+
import static org.elasticsearch.cluster.routing.ShardRoutingState.UNASSIGNED;
4243
import static org.elasticsearch.cluster.routing.TestShardRouting.newShardRouting;
4344
import static org.hamcrest.Matchers.equalTo;
4445

@@ -69,6 +70,30 @@ public void testInitializeNewPrimary() {
6970
);
7071
}
7172

73+
public void testInitializeNewPrimaryWithKnownExpectedSize() {
74+
75+
var newPrimary = newShardRouting("index-1", 0, null, true, UNASSIGNED).initialize("node-0", null, 100);
76+
77+
var simulator = new ClusterInfoSimulator(
78+
new ClusterInfoTestBuilder() //
79+
.withNode("node-0", new DiskUsageBuilder(1000, 1000))
80+
.withNode("node-1", new DiskUsageBuilder(1000, 1000))
81+
.build()
82+
);
83+
simulator.simulateShardStarted(newPrimary);
84+
85+
assertThat(
86+
simulator.getClusterInfo(),
87+
equalTo(
88+
new ClusterInfoTestBuilder() //
89+
.withNode("node-0", new DiskUsageBuilder(1000, 900))
90+
.withNode("node-1", new DiskUsageBuilder(1000, 1000))
91+
.withShard(newPrimary, 100)
92+
.build()
93+
)
94+
);
95+
}
96+
7297
public void testInitializeNewReplica() {
7398

7499
var existingPrimary = newShardRouting("index-1", 0, "node-0", true, STARTED);

server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputerTests.java

Lines changed: 34 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
import org.elasticsearch.common.logging.Loggers;
4545
import org.elasticsearch.common.settings.ClusterSettings;
4646
import org.elasticsearch.common.settings.Settings;
47+
import org.elasticsearch.common.util.Maps;
4748
import org.elasticsearch.index.shard.ShardId;
4849
import org.elasticsearch.snapshots.SnapshotShardSizeInfo;
4950
import org.elasticsearch.test.ESTestCase;
@@ -72,7 +73,9 @@
7273
import static org.hamcrest.Matchers.aMapWithSize;
7374
import static org.hamcrest.Matchers.allOf;
7475
import static org.hamcrest.Matchers.equalTo;
76+
import static org.hamcrest.Matchers.everyItem;
7577
import static org.hamcrest.Matchers.hasEntry;
78+
import static org.hamcrest.Matchers.lessThanOrEqualTo;
7679
import static org.mockito.Mockito.mock;
7780
import static org.mockito.Mockito.when;
7881

@@ -578,7 +581,7 @@ public void testDesiredBalanceShouldConvergeInABigCluster() {
578581
var nodes = randomIntBetween(3, 7);
579582
var nodeIds = new ArrayList<String>(nodes);
580583
var discoveryNodesBuilder = DiscoveryNodes.builder();
581-
var usedDiskSpace = new HashMap<String, Long>();
584+
var usedDiskSpace = Maps.<String, Long>newMapWithExpectedSize(nodes);
582585
for (int node = 0; node < nodes; node++) {
583586
var nodeId = "node-" + node;
584587
nodeIds.add(nodeId);
@@ -588,6 +591,7 @@ public void testDesiredBalanceShouldConvergeInABigCluster() {
588591

589592
var indices = scaledRandomIntBetween(1, 500);
590593
var totalShards = 0;
594+
var totalShardsSize = 0L;
591595

592596
var shardSizes = new HashMap<String, Long>();
593597
var dataPath = new HashMap<NodeAndShard, String>();
@@ -626,6 +630,7 @@ public void testDesiredBalanceShouldConvergeInABigCluster() {
626630

627631
var primaryNodeId = pickAndRemoveRandomValueFrom(remainingNodeIds);
628632
shardSizes.put(ClusterInfo.shardIdentifierFromRouting(shardId, true), thisShardSize);
633+
totalShardsSize += thisShardSize;
629634
if (primaryNodeId != null) {
630635
dataPath.put(new NodeAndShard(primaryNodeId, shardId), "/data");
631636
usedDiskSpace.compute(primaryNodeId, (k, v) -> v + thisShardSize);
@@ -644,9 +649,10 @@ public void testDesiredBalanceShouldConvergeInABigCluster() {
644649
for (int replica = 0; replica < replicas; replica++) {
645650
var replicaNodeId = primaryNodeId == null ? null : pickAndRemoveRandomValueFrom(remainingNodeIds);
646651
shardSizes.put(ClusterInfo.shardIdentifierFromRouting(shardId, false), thisShardSize);
652+
totalShardsSize += thisShardSize;
647653
if (replicaNodeId != null) {
648654
dataPath.put(new NodeAndShard(replicaNodeId, shardId), "/data");
649-
usedDiskSpace.compute(primaryNodeId, (k, v) -> v + thisShardSize);
655+
usedDiskSpace.compute(replicaNodeId, (k, v) -> v + thisShardSize);
650656
}
651657

652658
indexRoutingTableBuilder.addShard(
@@ -675,7 +681,9 @@ public void testDesiredBalanceShouldConvergeInABigCluster() {
675681

676682
var iteration = new AtomicInteger(0);
677683

678-
long diskSize = usedDiskSpace.values().stream().max(Long::compare).get() * 125 / 100;
684+
long diskSize = Math.max(totalShardsSize / nodes, usedDiskSpace.values().stream().max(Long::compare).get()) * 120 / 100;
685+
assertTrue("Should have enough space for all shards", diskSize * nodes > totalShardsSize);
686+
679687
var diskUsage = usedDiskSpace.entrySet()
680688
.stream()
681689
.collect(toMap(Map.Entry::getKey, it -> new DiskUsage(it.getKey(), it.getKey(), "/data", diskSize, diskSize - it.getValue())));
@@ -691,32 +699,34 @@ public void testDesiredBalanceShouldConvergeInABigCluster() {
691699
new BalancedShardsAllocator(settings)
692700
).compute(DesiredBalance.INITIAL, input, queue(), ignored -> iteration.incrementAndGet() < 1000);
693701

694-
try {
695-
assertThat(
696-
"Balance should converge, but exited by the iteration limit",
697-
desiredBalance.lastConvergedIndex(),
698-
equalTo(input.index())
699-
);
700-
logger.info(
701-
"Balance converged after [{}] iterations for [{}] nodes and [{}] total shards",
702-
iteration.get(),
703-
nodes,
704-
totalShards
705-
);
706-
} catch (AssertionError e) {
707-
logger.error(
708-
"Failed to converge desired balance for [{}] nodes and [{}] total shards:\n {}",
709-
nodes,
710-
totalShards,
711-
clusterState.getRoutingNodes().toString()
702+
var desiredDiskUsage = Maps.<String, Long>newMapWithExpectedSize(nodes);
703+
for (var assignment : desiredBalance.assignments().entrySet()) {
704+
var shardSize = Math.min(
705+
clusterInfo.getShardSize(assignment.getKey(), true),
706+
clusterInfo.getShardSize(assignment.getKey(), false)
712707
);
713-
throw e;
708+
for (String nodeId : assignment.getValue().nodeIds()) {
709+
desiredDiskUsage.compute(nodeId, (key, value) -> (value != null ? value : 0) + shardSize);
710+
}
714711
}
712+
713+
assertThat(
714+
"Balance should converge, but exited by the iteration limit",
715+
desiredBalance.lastConvergedIndex(),
716+
equalTo(input.index())
717+
);
718+
logger.info("Balance converged after [{}] iterations", iteration.get());
719+
720+
assertThat(
721+
"All desired disk usages " + desiredDiskUsage + " should be smaller then actual disk sizes: " + diskSize,
722+
desiredDiskUsage.values(),
723+
everyItem(lessThanOrEqualTo(diskSize))
724+
);
715725
}
716726

717727
private static long smallShardSizeDeviation(long originalSize) {
718-
var deviation = randomIntBetween(0, 50) - 100L;
719-
return originalSize * (1000 + deviation) / 1000;
728+
var deviation = randomIntBetween(-5, 5);
729+
return originalSize * (100 + deviation) / 100;
720730
}
721731

722732
private String pickAndRemoveRandomValueFrom(List<String> values) {

0 commit comments

Comments
 (0)