Skip to content

Commit 6045990

Browse files
committed
Don't break allocation if resize source index is missing (#29311)
DiskThresholdDecider currently assumes that the source index of a resize operation (e.g. shrink) is available, and throws an IndexNotFoundException otherwise, thereby breaking any kind of shard allocation. This can be quite harmful if the source index is deleted during a shrink, or if the source index is unavailable during state recovery. While this behavior has been partly fixed in 6.1 and above (due to #26931), it relies on the order in which AllocationDeciders are executed (i.e. that ResizeAllocationDecider returns NO, ensuring that DiskThresholdDecider does not run, something that for example does not hold for the allocation explain API). This change adds a more complete fix, and also solves the situation for 5.6.
1 parent ef3cdec commit 6045990

File tree

2 files changed

+22
-5
lines changed

2 files changed

+22
-5
lines changed

server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java

+8-5
Original file line numberDiff line numberDiff line change
@@ -409,11 +409,14 @@ public static long getExpectedShardSize(ShardRouting shard, RoutingAllocation al
409409
// the worst case
410410
long targetShardSize = 0;
411411
final Index mergeSourceIndex = metaData.getResizeSourceIndex();
412-
final IndexMetaData sourceIndexMeta = allocation.metaData().getIndexSafe(mergeSourceIndex);
413-
final Set<ShardId> shardIds = IndexMetaData.selectRecoverFromShards(shard.id(), sourceIndexMeta, metaData.getNumberOfShards());
414-
for (IndexShardRoutingTable shardRoutingTable : allocation.routingTable().index(mergeSourceIndex.getName())) {
415-
if (shardIds.contains(shardRoutingTable.shardId())) {
416-
targetShardSize += info.getShardSize(shardRoutingTable.primaryShard(), 0);
412+
final IndexMetaData sourceIndexMeta = allocation.metaData().index(mergeSourceIndex);
413+
if (sourceIndexMeta != null) {
414+
final Set<ShardId> shardIds = IndexMetaData.selectRecoverFromShards(shard.id(),
415+
sourceIndexMeta, metaData.getNumberOfShards());
416+
for (IndexShardRoutingTable shardRoutingTable : allocation.routingTable().index(mergeSourceIndex.getName())) {
417+
if (shardIds.contains(shardRoutingTable.shardId())) {
418+
targetShardSize += info.getShardSize(shardRoutingTable.primaryShard(), 0);
419+
}
417420
}
418421
}
419422
return targetShardSize == 0 ? defaultValue : targetShardSize;

server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java

+14
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,20 @@ public void testSizeShrinkIndex() {
342342
target2 = ShardRouting.newUnassigned(new ShardId(new Index("target2", "9101112"), 1),
343343
true, LocalShardsRecoverySource.INSTANCE, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo"));
344344
assertEquals(1000L, DiskThresholdDecider.getExpectedShardSize(target2, allocation, 0));
345+
346+
// check that the DiskThresholdDecider still works even if the source index has been deleted
347+
ClusterState clusterStateWithMissingSourceIndex = ClusterState.builder(clusterState)
348+
.metaData(MetaData.builder(metaData).remove("test"))
349+
.routingTable(RoutingTable.builder(clusterState.routingTable()).remove("test").build())
350+
.build();
351+
352+
allocationService.reroute(clusterState, "foo");
353+
354+
RoutingAllocation allocationWithMissingSourceIndex = new RoutingAllocation(null,
355+
clusterStateWithMissingSourceIndex.getRoutingNodes(), clusterStateWithMissingSourceIndex, info, 0);
356+
357+
assertEquals(42L, DiskThresholdDecider.getExpectedShardSize(target, allocationWithMissingSourceIndex, 42L));
358+
assertEquals(42L, DiskThresholdDecider.getExpectedShardSize(target2, allocationWithMissingSourceIndex, 42L));
345359
}
346360

347361
}

0 commit comments

Comments
 (0)