Skip to content

Only apply initial recovery filter to shrunk shard #44054

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
import org.elasticsearch.common.settings.Setting.Property;
import org.elasticsearch.common.settings.Settings;

import java.util.EnumSet;
import java.util.Map;

import static org.elasticsearch.cluster.node.DiscoveryNodeFilters.IP_VALIDATOR;
Expand Down Expand Up @@ -81,17 +80,6 @@ public class FilterAllocationDecider extends AllocationDecider {
Setting.prefixKeySetting(CLUSTER_ROUTING_EXCLUDE_GROUP_PREFIX + ".", key ->
Setting.simpleString(key, value -> IP_VALIDATOR.accept(key, value), Property.Dynamic, Property.NodeScope));

/**
* The set of {@link RecoverySource.Type} values for which the
* {@link IndexMetaData#INDEX_ROUTING_INITIAL_RECOVERY_GROUP_SETTING} should apply.
* Note that we do not include the {@link RecoverySource.Type#SNAPSHOT} type here
* because if the snapshot is restored to a different cluster that does not contain
* the initial recovery node id, or to the same cluster where the initial recovery node
* id has been decommissioned, then the primary shards will never be allocated.
*/
static EnumSet<RecoverySource.Type> INITIAL_RECOVERY_TYPES =
EnumSet.of(RecoverySource.Type.EMPTY_STORE, RecoverySource.Type.LOCAL_SHARDS);

private volatile DiscoveryNodeFilters clusterRequireFilters;
private volatile DiscoveryNodeFilters clusterIncludeFilters;
private volatile DiscoveryNodeFilters clusterExcludeFilters;
Expand All @@ -108,17 +96,16 @@ public FilterAllocationDecider(Settings settings, ClusterSettings clusterSetting
@Override
public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
if (shardRouting.unassigned()) {
// only for unassigned - we filter allocation right after the index creation ie. for shard shrinking etc. to ensure
// only for unassigned - we filter allocation right after the index creation (for shard shrinking) to ensure
// that once it has been allocated post API the replicas can be allocated elsewhere without user interaction
// this is a setting that can only be set within the system!
IndexMetaData indexMd = allocation.metaData().getIndexSafe(shardRouting.index());
DiscoveryNodeFilters initialRecoveryFilters = indexMd.getInitialRecoveryFilters();
if (initialRecoveryFilters != null &&
INITIAL_RECOVERY_TYPES.contains(shardRouting.recoverySource().getType()) &&
shardRouting.recoverySource().getType() == RecoverySource.Type.LOCAL_SHARDS &&
initialRecoveryFilters.match(node.node()) == false) {
String explanation = (shardRouting.recoverySource().getType() == RecoverySource.Type.LOCAL_SHARDS) ?
"initial allocation of the shrunken index is only allowed on nodes [%s] that hold a copy of every shard in the index" :
"initial allocation of the index is only allowed on nodes [%s]";
String explanation =
"initial allocation of the shrunken index is only allowed on nodes [%s] that hold a copy of every shard in the index";
return allocation.decision(Decision.NO, NAME, explanation, initialRecoveryFilters);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,39 +139,25 @@ public void testFilterInitialRecovery() {
}

private ClusterState createInitialClusterState(AllocationService service, Settings settings) {
RecoverySource.Type recoveryType = randomFrom(FilterAllocationDecider.INITIAL_RECOVERY_TYPES);
MetaData.Builder metaData = MetaData.builder();
final Settings.Builder indexSettings = settings(Version.CURRENT).put(settings);
final IndexMetaData sourceIndex;
if (recoveryType == RecoverySource.Type.LOCAL_SHARDS) {
//put a fake closed source index
sourceIndex = IndexMetaData.builder("sourceIndex")
.settings(settings(Version.CURRENT)).numberOfShards(2).numberOfReplicas(0)
.putInSyncAllocationIds(0, Collections.singleton("aid0"))
.putInSyncAllocationIds(1, Collections.singleton("aid1"))
.build();
metaData.put(sourceIndex, false);
indexSettings.put(INDEX_RESIZE_SOURCE_UUID.getKey(), sourceIndex.getIndexUUID());
indexSettings.put(INDEX_RESIZE_SOURCE_NAME.getKey(), sourceIndex.getIndex().getName());
} else {
sourceIndex = null;
}
//put a fake closed source index
sourceIndex = IndexMetaData.builder("sourceIndex")
.settings(settings(Version.CURRENT)).numberOfShards(2).numberOfReplicas(0)
.putInSyncAllocationIds(0, Collections.singleton("aid0"))
.putInSyncAllocationIds(1, Collections.singleton("aid1"))
.build();
metaData.put(sourceIndex, false);
indexSettings.put(INDEX_RESIZE_SOURCE_UUID.getKey(), sourceIndex.getIndexUUID());
indexSettings.put(INDEX_RESIZE_SOURCE_NAME.getKey(), sourceIndex.getIndex().getName());
final IndexMetaData.Builder indexMetaDataBuilder = IndexMetaData.builder("idx").settings(indexSettings)
.numberOfShards(1).numberOfReplicas(1);
final IndexMetaData indexMetaData = indexMetaDataBuilder.build();
metaData.put(indexMetaData, false);
RoutingTable.Builder routingTableBuilder = RoutingTable.builder();
switch (recoveryType) {
case EMPTY_STORE:
routingTableBuilder.addAsNew(indexMetaData);
break;
case LOCAL_SHARDS:
routingTableBuilder.addAsFromCloseToOpen(sourceIndex);
routingTableBuilder.addAsNew(indexMetaData);
break;
default:
throw new UnsupportedOperationException(recoveryType + " is not supported");
}
routingTableBuilder.addAsFromCloseToOpen(sourceIndex);
routingTableBuilder.addAsNew(indexMetaData);

RoutingTable routingTable = routingTableBuilder.build();
ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING
Expand Down