-
Notifications
You must be signed in to change notification settings - Fork 25.2k
Automatically prepare indices for splitting #27451
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
005215e
4072379
6ab582e
0e3fbbb
6d90ad8
e4da795
033c6ad
fb13969
27a6d34
71f5c35
9262c9e
8482a60
6fe2bff
b02b9f3
8176378
1f46ce3
a6616cd
b2a9a08
dccad0b
36aca55
1cd9c78
e820289
791c2f1
4db1b97
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1344,13 +1344,17 @@ public static ShardId selectSplitShard(int shardId, IndexMetaData sourceIndexMet | |
} | ||
int routingFactor = getRoutingFactor(numSourceShards, numTargetShards); | ||
// now we verify that the numRoutingShards is valid in the source index | ||
int routingNumShards = sourceIndexMetadata.getRoutingNumShards(); | ||
// note: if the number of shards is 1 in the source index we can just assume it's correct since from 1 we can split into anything | ||
// this is important to special case here since we use this to validate this in various places in the code but allow to split form | ||
// 1 to N but we never modify the sourceIndexMetadata to accommodate for that | ||
int routingNumShards = numSourceShards == 1 ? numTargetShards : sourceIndexMetadata.getRoutingNumShards(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: maybe do something like:
will be easier to read , I think. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also s/form/from/ |
||
if (routingNumShards % numTargetShards != 0) { | ||
throw new IllegalStateException("the number of routing shards [" | ||
+ routingNumShards + "] must be a multiple of the target shards [" + numTargetShards + "]"); | ||
} | ||
// this is just an additional assertion that ensures we are a factor of the routing num shards. | ||
assert getRoutingFactor(numTargetShards, sourceIndexMetadata.getRoutingNumShards()) >= 0; | ||
assert sourceIndexMetadata.getNumberOfShards() == 1 // special case - we can split into anything from 1 shard | ||
|| getRoutingFactor(numTargetShards, routingNumShards) >= 0; | ||
return new ShardId(sourceIndexMetadata.getIndex(), shardId/routingFactor); | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -379,16 +379,28 @@ public ClusterState execute(ClusterState currentState) throws Exception { | |
indexSettingsBuilder.put(IndexMetaData.SETTING_INDEX_PROVIDED_NAME, request.getProvidedName()); | ||
indexSettingsBuilder.put(SETTING_INDEX_UUID, UUIDs.randomBase64UUID()); | ||
final IndexMetaData.Builder tmpImdBuilder = IndexMetaData.builder(request.index()); | ||
|
||
final Settings idxSettings = indexSettingsBuilder.build(); | ||
int numTargetShards = IndexMetaData.INDEX_NUMBER_OF_SHARDS_SETTING.get(idxSettings); | ||
final int routingNumShards; | ||
final Version indexVersionCreated = idxSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, null); | ||
if (recoverFromIndex == null) { | ||
Settings idxSettings = indexSettingsBuilder.build(); | ||
routingNumShards = IndexMetaData.INDEX_NUMBER_OF_ROUTING_SHARDS_SETTING.get(idxSettings); | ||
if (IndexMetaData.INDEX_NUMBER_OF_ROUTING_SHARDS_SETTING.exists(idxSettings)) { | ||
routingNumShards = IndexMetaData.INDEX_NUMBER_OF_ROUTING_SHARDS_SETTING.get(idxSettings); | ||
} else { | ||
routingNumShards = calculateNumRoutingShards(numTargetShards, indexVersionCreated); | ||
} | ||
} else { | ||
assert IndexMetaData.INDEX_NUMBER_OF_ROUTING_SHARDS_SETTING.exists(indexSettingsBuilder.build()) == false | ||
: "index.number_of_routing_shards should be present on the target index on resize"; | ||
: "index.number_of_routing_shards should not be present on the target index on resize"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. :) |
||
final IndexMetaData sourceMetaData = currentState.metaData().getIndexSafe(recoverFromIndex); | ||
routingNumShards = sourceMetaData.getRoutingNumShards(); | ||
if (shouldAutoCalculateNumRoutingShards(numTargetShards, sourceMetaData.getNumberOfShards(), | ||
sourceMetaData.getRoutingNumShards())) { | ||
// in this case we have a source index with 1 shard and without an explicit split factor | ||
// or one that is valid in that case we can split into whatever and auto-generate a new factor. | ||
routingNumShards = calculateNumRoutingShards(numTargetShards, indexVersionCreated); | ||
} else { | ||
routingNumShards = sourceMetaData.getRoutingNumShards(); | ||
} | ||
} | ||
// remove the setting it's temporary and is only relevant once we create the index | ||
indexSettingsBuilder.remove(IndexMetaData.INDEX_NUMBER_OF_ROUTING_SHARDS_SETTING.getKey()); | ||
|
@@ -717,4 +729,37 @@ static void prepareResizeIndexSettings(ClusterState currentState, Set<String> ma | |
.put(IndexMetaData.INDEX_RESIZE_SOURCE_NAME.getKey(), resizeSourceIndex.getName()) | ||
.put(IndexMetaData.INDEX_RESIZE_SOURCE_UUID.getKey(), resizeSourceIndex.getUUID()); | ||
} | ||
|
||
/** | ||
* Returns a default number of routing shards based on the number of shards of the index. The default number of routing shards will | ||
* allow any index to be split at least once and at most 10 times by a factor of two. The closer the number or shards gets to 1024 | ||
* the less default split operations are supported | ||
*/ | ||
public static int calculateNumRoutingShards(int numShards, Version indexVersionCreated) { | ||
if (indexVersionCreated.onOrAfter(Version.V_7_0_0_alpha1)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you clarify why this needs to be version dependent? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is a comment in the line below?! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, I get this means we only do the new behavior until the cluster is fully upgraded, but I don't see why we care? I mean, if the master is a 7.0.0 master, we can start creating indices with a different hashing logic and not worry about it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's mainly for testing purposes and BWC behavior being more predictable otherwise some rest tests will randomly fail There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh well :) |
||
// only select this automatically for indices that are created on or after 7.0 | ||
int base = 10; // logBase2(1024) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's worth sharing the 1024 constant with the max value in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure how you envisioned this to work? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was just thinking of having a |
||
return numShards * 1 << Math.max(1, (base - (int) (Math.log(numShards) / Math.log(2)))); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It feels a bit weird to me that this will generate numbers that are greater than the maximum number of shards. Should we change the formula a bit so that the result is always in 513...1024 when the number of shards is in 1..512? This probably deserves some comments as well, eg. I presume that the fact we do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah I will do that. |
||
} else { | ||
return numShards; | ||
} | ||
} | ||
|
||
|
||
/** | ||
* Returns <code>true</code> iff the we should calculate the number of routing shards for the split index based on | ||
* the source index. This only applies to source indices that have only 1 shards. We don't want to recalculate the num routing | ||
* shards on indices that already have a valid number of routing shards. | ||
*/ | ||
static boolean shouldAutoCalculateNumRoutingShards(int numTargetShards, int numSourceShards, int numSourceRoutingShards) { | ||
if (numSourceShards == 1) { | ||
if (numSourceRoutingShards < numTargetShards) { | ||
// we have a source index that has less routing shards than our target shards -- we should reset | ||
return true; | ||
} | ||
int factor = numSourceRoutingShards / numTargetShards; | ||
return factor * numTargetShards != numSourceRoutingShards || factor <= 1; | ||
} | ||
return false; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,6 +39,7 @@ | |
import org.elasticsearch.client.Client; | ||
import org.elasticsearch.cluster.ClusterState; | ||
import org.elasticsearch.cluster.metadata.IndexMetaData; | ||
import org.elasticsearch.cluster.metadata.MetaDataCreateIndexService; | ||
import org.elasticsearch.cluster.node.DiscoveryNode; | ||
import org.elasticsearch.cluster.routing.Murmur3HashFunction; | ||
import org.elasticsearch.cluster.routing.ShardRouting; | ||
|
@@ -66,7 +67,6 @@ | |
import java.util.List; | ||
import java.util.Set; | ||
import java.util.function.BiFunction; | ||
import java.util.function.IntFunction; | ||
import java.util.stream.IntStream; | ||
|
||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; | ||
|
@@ -88,21 +88,31 @@ protected Collection<Class<? extends Plugin>> nodePlugins() { | |
} | ||
|
||
public void testCreateSplitIndexToN() throws IOException { | ||
int[][] possibleShardSplits = new int[][] {{2,4,8}, {3, 6, 12}, {1, 2, 4}}; | ||
int[][] possibleShardSplits = new int[][]{{2, 4, 8}, {3, 6, 12}, {1, 2, 4}}; | ||
int[] shardSplits = randomFrom(possibleShardSplits); | ||
splitToN(shardSplits); | ||
} | ||
|
||
public void testSplitFromOneToN() { | ||
splitToN(1, 5, 10); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess you had an explicit reason not to have a shrink to one shard then split again test (where we can take values in the split that doesn't compute with the source index)? alternatively we can explicitly set the routing shards on the source index to something that doesn't make sense when we start. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure I understand what you mean There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The test now start with 1 shard source and the split twice. both of these always have a valid number of routing shards in the source index. I think the interesting part of the test is see how we reset the number of routing shard. For example, start with a 3 shards index. Shrink to 1 (number of routing shards stays 3) then split to say, 2. Does that help? |
||
client().admin().indices().prepareDelete("*").get(); | ||
int randomSplit = randomIntBetween(2, 6); | ||
splitToN(1, randomSplit, randomSplit * 2); | ||
} | ||
|
||
private void splitToN(int... shardSplits) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see why you did it this way as it was easier to refactor, but I think we should bite the bullet and give these proper variable names. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fair enough, I will do that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what woud you name it |
||
assertEquals(3, shardSplits.length); | ||
assertEquals(shardSplits[0], (shardSplits[0] * shardSplits[1]) / shardSplits[1]); | ||
assertEquals(shardSplits[1], (shardSplits[1] * shardSplits[2]) / shardSplits[2]); | ||
internalCluster().ensureAtLeastNumDataNodes(2); | ||
final boolean useRouting = randomBoolean(); | ||
final boolean useNested = randomBoolean(); | ||
final boolean useMixedRouting = useRouting ? randomBoolean() : false; | ||
CreateIndexRequestBuilder createInitialIndex = prepareCreate("source"); | ||
final int routingShards = shardSplits[2] * randomIntBetween(1, 10); | ||
Settings.Builder settings = Settings.builder().put(indexSettings()) | ||
.put("number_of_shards", shardSplits[0]) | ||
.put("index.number_of_routing_shards", routingShards); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't we randomly still do this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we can.. I will do it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ++ |
||
Settings.Builder settings = Settings.builder().put(indexSettings()).put("number_of_shards", shardSplits[0]); | ||
if (useRouting && useMixedRouting == false && randomBoolean()) { | ||
settings.put("index.routing_partition_size", randomIntBetween(1, routingShards - 1)); | ||
settings.put("index.routing_partition_size", randomIntBetween(1, MetaDataCreateIndexService.calculateNumRoutingShards | ||
(shardSplits[0], Version.CURRENT) -1)); | ||
if (useNested) { | ||
createInitialIndex.addMapping("t1", "_routing", "required=true", "nested1", "type=nested"); | ||
} else { | ||
|
@@ -340,7 +350,6 @@ public void testCreateSplitIndex() { | |
prepareCreate("source").setSettings(Settings.builder().put(indexSettings()) | ||
.put("number_of_shards", 1) | ||
.put("index.version.created", version) | ||
.put("index.number_of_routing_shards", 2) | ||
).get(); | ||
final int docs = randomIntBetween(0, 128); | ||
for (int i = 0; i < docs; i++) { | ||
|
@@ -443,7 +452,6 @@ public void testCreateSplitWithIndexSort() throws Exception { | |
Settings.builder() | ||
.put(indexSettings()) | ||
.put("sort.field", "id") | ||
.put("index.number_of_routing_shards", 16) | ||
.put("sort.order", "desc") | ||
.put("number_of_shards", 2) | ||
.put("number_of_replicas", 0) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,7 +34,6 @@ | |
import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders; | ||
import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; | ||
import org.elasticsearch.common.Strings; | ||
import org.elasticsearch.common.settings.Setting; | ||
import org.elasticsearch.common.settings.Settings; | ||
import org.elasticsearch.index.IndexNotFoundException; | ||
import org.elasticsearch.ResourceAlreadyExistsException; | ||
|
@@ -299,4 +298,39 @@ private void validateIndexName(String indexName, String errorMessage) { | |
.getDefault(Settings.EMPTY)).build())); | ||
assertThat(e.getMessage(), endsWith(errorMessage)); | ||
} | ||
|
||
public void testShouldAutoCalculateNumRoutingShards() { | ||
assertTrue(MetaDataCreateIndexService.shouldAutoCalculateNumRoutingShards(2, 1, 1)); | ||
assertFalse(MetaDataCreateIndexService.shouldAutoCalculateNumRoutingShards(10, 1, 20)); | ||
assertTrue(MetaDataCreateIndexService.shouldAutoCalculateNumRoutingShards(10, 1, 5)); | ||
assertTrue(MetaDataCreateIndexService.shouldAutoCalculateNumRoutingShards(10, 1, 16)); | ||
assertFalse(MetaDataCreateIndexService.shouldAutoCalculateNumRoutingShards(2, 1, 8)); | ||
} | ||
|
||
public void testCalculateNumRoutingShards() { | ||
assertEquals(1024, MetaDataCreateIndexService.calculateNumRoutingShards(1, Version.CURRENT)); | ||
assertEquals(1024, MetaDataCreateIndexService.calculateNumRoutingShards(2, Version.CURRENT)); | ||
assertEquals(1536, MetaDataCreateIndexService.calculateNumRoutingShards(3, Version.CURRENT)); | ||
assertEquals(1152, MetaDataCreateIndexService.calculateNumRoutingShards(9, Version.CURRENT)); | ||
assertEquals(1024, MetaDataCreateIndexService.calculateNumRoutingShards(512, Version.CURRENT)); | ||
assertEquals(2048, MetaDataCreateIndexService.calculateNumRoutingShards(1024, Version.CURRENT)); | ||
assertEquals(4096, MetaDataCreateIndexService.calculateNumRoutingShards(2048, Version.CURRENT)); | ||
|
||
Version latestV6 = VersionUtils.getPreviousVersion(Version.V_7_0_0_alpha1); | ||
int numShards = randomIntBetween(1, 1000); | ||
assertEquals(numShards, MetaDataCreateIndexService.calculateNumRoutingShards(numShards, latestV6)); | ||
assertEquals(numShards, MetaDataCreateIndexService.calculateNumRoutingShards(numShards, | ||
VersionUtils.randomVersionBetween(random(), VersionUtils.getFirstVersion(), latestV6))); | ||
|
||
for (int i = 0; i < 1000; i++) { | ||
int randomNumShards = randomIntBetween(1, 10000); | ||
int numRoutingShards = MetaDataCreateIndexService.calculateNumRoutingShards(randomNumShards, Version.CURRENT); | ||
double ratio = numRoutingShards / randomNumShards; | ||
int intRatio = (int) ratio; | ||
assertEquals(ratio, (double)(intRatio), 0.0d); | ||
assertTrue(1 < ratio); | ||
assertTrue(ratio <= 1024); | ||
assertEquals(0, intRatio % 2); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we assert that intRatio is a power of two by checking that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I understand ot's important here to bypass the assertions as we don't have any relationship between the source routing shards and the target one in the case where the source has only one physical shards. I think the "validate this in various places in the code" part is maybe a leftover from a previous iteration?