-
Notifications
You must be signed in to change notification settings - Fork 25.2k
Automatically prepare indices for splitting #27451
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
005215e
4072379
6ab582e
0e3fbbb
6d90ad8
e4da795
033c6ad
fb13969
27a6d34
71f5c35
9262c9e
8482a60
6fe2bff
b02b9f3
8176378
1f46ce3
a6616cd
b2a9a08
dccad0b
36aca55
1cd9c78
e820289
791c2f1
4db1b97
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -379,15 +379,24 @@ public ClusterState execute(ClusterState currentState) throws Exception { | |
indexSettingsBuilder.put(IndexMetaData.SETTING_INDEX_PROVIDED_NAME, request.getProvidedName()); | ||
indexSettingsBuilder.put(SETTING_INDEX_UUID, UUIDs.randomBase64UUID()); | ||
final IndexMetaData.Builder tmpImdBuilder = IndexMetaData.builder(request.index()); | ||
|
||
final Settings idxSettings = indexSettingsBuilder.build(); | ||
int numTargetShards = IndexMetaData.INDEX_NUMBER_OF_SHARDS_SETTING.get(idxSettings); | ||
final int routingNumShards; | ||
if (recoverFromIndex == null) { | ||
Settings idxSettings = indexSettingsBuilder.build(); | ||
routingNumShards = IndexMetaData.INDEX_NUMBER_OF_ROUTING_SHARDS_SETTING.get(idxSettings); | ||
final Version indexVersionCreated = idxSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, null); | ||
final IndexMetaData sourceMetaData = recoverFromIndex == null ? null : | ||
currentState.metaData().getIndexSafe(recoverFromIndex); | ||
if (sourceMetaData == null || sourceMetaData.getNumberOfShards() == 1) { | ||
// in this case we either have no index to recover from or | ||
// we have a source index with 1 shard and without an explicit split factor | ||
// or one that is valid in that case we can split into whatever and auto-generate a new factor. | ||
if (IndexMetaData.INDEX_NUMBER_OF_ROUTING_SHARDS_SETTING.exists(idxSettings)) { | ||
routingNumShards = IndexMetaData.INDEX_NUMBER_OF_ROUTING_SHARDS_SETTING.get(idxSettings); | ||
} else { | ||
routingNumShards = calculateNumRoutingShards(numTargetShards, indexVersionCreated); | ||
} | ||
} else { | ||
assert IndexMetaData.INDEX_NUMBER_OF_ROUTING_SHARDS_SETTING.exists(indexSettingsBuilder.build()) == false | ||
: "index.number_of_routing_shards should be present on the target index on resize"; | ||
final IndexMetaData sourceMetaData = currentState.metaData().getIndexSafe(recoverFromIndex); | ||
: "index.number_of_routing_shards should not be present on the target index on resize"; | ||
routingNumShards = sourceMetaData.getRoutingNumShards(); | ||
} | ||
// remove the setting it's temporary and is only relevant once we create the index | ||
|
@@ -408,7 +417,6 @@ public ClusterState execute(ClusterState currentState) throws Exception { | |
* the maximum primary term on all the shards in the source index. This ensures that we have correct | ||
* document-level semantics regarding sequence numbers in the shrunken index. | ||
*/ | ||
final IndexMetaData sourceMetaData = currentState.metaData().getIndexSafe(recoverFromIndex); | ||
final long primaryTerm = | ||
IntStream | ||
.range(0, sourceMetaData.getNumberOfShards()) | ||
|
@@ -717,4 +725,27 @@ static void prepareResizeIndexSettings(ClusterState currentState, Set<String> ma | |
.put(IndexMetaData.INDEX_RESIZE_SOURCE_NAME.getKey(), resizeSourceIndex.getName()) | ||
.put(IndexMetaData.INDEX_RESIZE_SOURCE_UUID.getKey(), resizeSourceIndex.getUUID()); | ||
} | ||
|
||
/** | ||
* Returns a default number of routing shards based on the number of shards of the index. The default number of routing shards will | ||
* allow any index to be split at least once and at most 10 times by a factor of two. The closer the number or shards gets to 1024 | ||
* the less default split operations are supported | ||
*/ | ||
public static int calculateNumRoutingShards(int numShards, Version indexVersionCreated) { | ||
if (indexVersionCreated.onOrAfter(Version.V_7_0_0_alpha1)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you clarify why this needs to be version dependent? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is a comment in the line below?! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, I get this means we only do the new behavior until the cluster is fully upgraded, but I don't see why we care? I mean, if the master is a 7.0.0 master, we can start creating indices with a different hashing logic and not worry about it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's mainly for testing purposes and BWC behavior being more predictable otherwise some rest tests will randomly fail There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh well :) |
||
// only select this automatically for indices that are created on or after 7.0 this will prevent this new behaviour | ||
// until we have a fully upgraded cluster. Additionally it will make integratin testing easier since mixed clusters | ||
// will always have the behavior of the min node in the cluster. | ||
// | ||
// We use as a default number of routing shards the higher number that can be expressed | ||
// as {@code numShards * 2^x`} that is less than or equal to the maximum number of shards: 1024. | ||
int log2MaxNumShards = 10; // logBase2(1024) | ||
int log2NumShards = 32 - Integer.numberOfLeadingZeros(numShards - 1); // ceil(logBase2(numShards)) | ||
int numSplits = log2MaxNumShards - log2NumShards; | ||
numSplits = Math.max(1, numSplits); // Ensure the index can be split at least once | ||
return numShards * 1 << numSplits; | ||
} else { | ||
return numShards; | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,6 +39,7 @@ | |
import org.elasticsearch.client.Client; | ||
import org.elasticsearch.cluster.ClusterState; | ||
import org.elasticsearch.cluster.metadata.IndexMetaData; | ||
import org.elasticsearch.cluster.metadata.MetaDataCreateIndexService; | ||
import org.elasticsearch.cluster.node.DiscoveryNode; | ||
import org.elasticsearch.cluster.routing.Murmur3HashFunction; | ||
import org.elasticsearch.cluster.routing.ShardRouting; | ||
|
@@ -66,7 +67,6 @@ | |
import java.util.List; | ||
import java.util.Set; | ||
import java.util.function.BiFunction; | ||
import java.util.function.IntFunction; | ||
import java.util.stream.IntStream; | ||
|
||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; | ||
|
@@ -88,21 +88,40 @@ protected Collection<Class<? extends Plugin>> nodePlugins() { | |
} | ||
|
||
public void testCreateSplitIndexToN() throws IOException { | ||
int[][] possibleShardSplits = new int[][] {{2,4,8}, {3, 6, 12}, {1, 2, 4}}; | ||
int[][] possibleShardSplits = new int[][]{{2, 4, 8}, {3, 6, 12}, {1, 2, 4}}; | ||
int[] shardSplits = randomFrom(possibleShardSplits); | ||
assertEquals(shardSplits[0], (shardSplits[0] * shardSplits[1]) / shardSplits[1]); | ||
assertEquals(shardSplits[1], (shardSplits[1] * shardSplits[2]) / shardSplits[2]); | ||
splitToN(shardSplits[0], shardSplits[1], shardSplits[2]); | ||
} | ||
|
||
public void testSplitFromOneToN() { | ||
splitToN(1, 5, 10); | ||
client().admin().indices().prepareDelete("*").get(); | ||
int randomSplit = randomIntBetween(2, 6); | ||
splitToN(1, randomSplit, randomSplit * 2); | ||
} | ||
|
||
private void splitToN(int sourceShards, int firstSplitShards, int secondSplitShards) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ❤️ There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 😍 |
||
|
||
assertEquals(sourceShards, (sourceShards * firstSplitShards) / firstSplitShards); | ||
assertEquals(firstSplitShards, (firstSplitShards * secondSplitShards) / secondSplitShards); | ||
internalCluster().ensureAtLeastNumDataNodes(2); | ||
final boolean useRouting = randomBoolean(); | ||
final boolean useNested = randomBoolean(); | ||
final boolean useMixedRouting = useRouting ? randomBoolean() : false; | ||
CreateIndexRequestBuilder createInitialIndex = prepareCreate("source"); | ||
final int routingShards = shardSplits[2] * randomIntBetween(1, 10); | ||
Settings.Builder settings = Settings.builder().put(indexSettings()) | ||
.put("number_of_shards", shardSplits[0]) | ||
.put("index.number_of_routing_shards", routingShards); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't we randomly still do this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we can.. I will do it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ++ |
||
if (useRouting && useMixedRouting == false && randomBoolean()) { | ||
settings.put("index.routing_partition_size", randomIntBetween(1, routingShards - 1)); | ||
Settings.Builder settings = Settings.builder().put(indexSettings()).put("number_of_shards", sourceShards); | ||
final boolean useRoutingPartition; | ||
if (randomBoolean()) { | ||
// randomly set the value manually | ||
int routingShards = secondSplitShards * randomIntBetween(1, 10); | ||
settings.put("index.number_of_routing_shards", routingShards); | ||
useRoutingPartition = false; | ||
} else { | ||
useRoutingPartition = randomBoolean(); | ||
} | ||
if (useRouting && useMixedRouting == false && useRoutingPartition) { | ||
settings.put("index.routing_partition_size", | ||
randomIntBetween(1, MetaDataCreateIndexService.calculateNumRoutingShards(sourceShards, Version.CURRENT)-1)); | ||
if (useNested) { | ||
createInitialIndex.addMapping("t1", "_routing", "required=true", "nested1", "type=nested"); | ||
} else { | ||
|
@@ -172,11 +191,15 @@ public void testCreateSplitIndexToN() throws IOException { | |
.setSettings(Settings.builder() | ||
.put("index.blocks.write", true)).get(); | ||
ensureGreen(); | ||
Settings.Builder firstSplitSettingsBuilder = Settings.builder() | ||
.put("index.number_of_replicas", 0) | ||
.put("index.number_of_shards", firstSplitShards); | ||
if (sourceShards == 1 && useRoutingPartition == false && randomBoolean()) { // try to set it if we have a source index with 1 shard | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 |
||
firstSplitSettingsBuilder.put("index.number_of_routing_shards", secondSplitShards); | ||
} | ||
assertAcked(client().admin().indices().prepareResizeIndex("source", "first_split") | ||
.setResizeType(ResizeType.SPLIT) | ||
.setSettings(Settings.builder() | ||
.put("index.number_of_replicas", 0) | ||
.put("index.number_of_shards", shardSplits[1]).build()).get()); | ||
.setSettings(firstSplitSettingsBuilder.build()).get()); | ||
ensureGreen(); | ||
assertHitCount(client().prepareSearch("first_split").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), numDocs); | ||
|
||
|
@@ -204,7 +227,7 @@ public void testCreateSplitIndexToN() throws IOException { | |
.setResizeType(ResizeType.SPLIT) | ||
.setSettings(Settings.builder() | ||
.put("index.number_of_replicas", 0) | ||
.put("index.number_of_shards", shardSplits[2]).build()).get()); | ||
.put("index.number_of_shards", secondSplitShards).build()).get()); | ||
ensureGreen(); | ||
assertHitCount(client().prepareSearch("second_split").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), numDocs); | ||
// let it be allocated anywhere and bump replicas | ||
|
@@ -340,7 +363,6 @@ public void testCreateSplitIndex() { | |
prepareCreate("source").setSettings(Settings.builder().put(indexSettings()) | ||
.put("number_of_shards", 1) | ||
.put("index.version.created", version) | ||
.put("index.number_of_routing_shards", 2) | ||
).get(); | ||
final int docs = randomIntBetween(0, 128); | ||
for (int i = 0; i < docs; i++) { | ||
|
@@ -443,7 +465,6 @@ public void testCreateSplitWithIndexSort() throws Exception { | |
Settings.builder() | ||
.put(indexSettings()) | ||
.put("sort.field", "id") | ||
.put("index.number_of_routing_shards", 16) | ||
.put("sort.order", "desc") | ||
.put("number_of_shards", 2) | ||
.put("number_of_replicas", 0) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: maybe do something like:
will be easier to read , I think.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
also s/form/from/