Skip to content

Commit cc67d02

Browse files
authored
Initialize sequence numbers on a shrunken index
Bringing together shards in a shrunken index means that we need to address the start of history for the shrunken index. The problem here is that sequence numbers before the maximum of the maximum sequence numbers on the source shards can collide in the target shards in the shrunken index. To address this, we set the maximum sequence number and the local checkpoint on the target shards to this maximum of the maximum sequence numbers. This enables correct document-level semantics for documents indexed before the shrink, and history on the shrunken index will effectively start from here. Relates #25321
1 parent 4bbb7e8 commit cc67d02

File tree

5 files changed

+62
-22
lines changed

5 files changed

+62
-22
lines changed

core/src/main/java/org/elasticsearch/cluster/metadata/IndexMetaData.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1325,7 +1325,7 @@ public static Set<ShardId> selectShrinkShards(int shardId, IndexMetaData sourceI
13251325
* @param sourceIndexMetadata the metadata of the source index
13261326
* @param targetNumberOfShards the total number of shards in the target index
13271327
* @return the routing factor for and shrunk index with the given number of target shards.
1328-
* @throws IllegalArgumentException if the number of source shards is greater than the number of target shards or if the source shards
1328+
* @throws IllegalArgumentException if the number of source shards is less than the number of target shards or if the source shards
13291329
* are not divisible by the number of target shards.
13301330
*/
13311331
public static int getRoutingFactor(IndexMetaData sourceIndexMetadata, int targetNumberOfShards) {

core/src/main/java/org/elasticsearch/index/shard/LocalShardSnapshot.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ Index getIndex() {
6060
return shard.indexSettings().getIndex();
6161
}
6262

63+
long maxSeqNo() {
64+
return shard.getEngine().seqNoService().getMaxSeqNo();
65+
}
66+
6367
Directory getSnapshotDirectory() {
6468
/* this directory will not be used for anything else but reading / copying files to another directory
6569
* we prevent all write operations on this directory with UOE - nobody should close it either. */

core/src/main/java/org/elasticsearch/index/shard/StoreRecovery.java

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import org.elasticsearch.index.Index;
4242
import org.elasticsearch.index.engine.EngineException;
4343
import org.elasticsearch.index.mapper.MapperService;
44+
import org.elasticsearch.index.seqno.SequenceNumbers;
4445
import org.elasticsearch.index.snapshots.IndexShardRestoreFailedException;
4546
import org.elasticsearch.index.store.Store;
4647
import org.elasticsearch.indices.recovery.RecoveryState;
@@ -49,6 +50,8 @@
4950

5051
import java.io.IOException;
5152
import java.util.Arrays;
53+
import java.util.Collections;
54+
import java.util.HashMap;
5255
import java.util.List;
5356
import java.util.Set;
5457
import java.util.concurrent.atomic.AtomicBoolean;
@@ -115,9 +118,9 @@ boolean recoverFromLocalShards(BiConsumer<String, MappingMetaData> mappingUpdate
115118
logger.debug("starting recovery from local shards {}", shards);
116119
try {
117120
final Directory directory = indexShard.store().directory(); // don't close this directory!!
118-
addIndices(indexShard.recoveryState().getIndex(), directory, indexSort,
119-
shards.stream().map(s -> s.getSnapshotDirectory())
120-
.collect(Collectors.toList()).toArray(new Directory[shards.size()]));
121+
final Directory[] sources = shards.stream().map(LocalShardSnapshot::getSnapshotDirectory).toArray(Directory[]::new);
122+
final long maxSeqNo = shards.stream().mapToLong(LocalShardSnapshot::maxSeqNo).max().getAsLong();
123+
addIndices(indexShard.recoveryState().getIndex(), directory, indexSort, sources, maxSeqNo);
121124
internalRecoverFromStore(indexShard);
122125
// just trigger a merge to do housekeeping on the
123126
// copied segments - we will also see them in stats etc.
@@ -131,8 +134,13 @@ boolean recoverFromLocalShards(BiConsumer<String, MappingMetaData> mappingUpdate
131134
return false;
132135
}
133136

134-
void addIndices(RecoveryState.Index indexRecoveryStats, Directory target, Sort indexSort, Directory... sources) throws IOException {
135-
target = new org.apache.lucene.store.HardlinkCopyDirectoryWrapper(target);
137+
void addIndices(
138+
final RecoveryState.Index indexRecoveryStats,
139+
final Directory target,
140+
final Sort indexSort,
141+
final Directory[] sources,
142+
final long maxSeqNo) throws IOException {
143+
final Directory hardLinkOrCopyTarget = new org.apache.lucene.store.HardlinkCopyDirectoryWrapper(target);
136144
IndexWriterConfig iwc = new IndexWriterConfig(null)
137145
.setCommitOnClose(false)
138146
// we don't want merges to happen here - we call maybe merge on the engine
@@ -143,8 +151,19 @@ void addIndices(RecoveryState.Index indexRecoveryStats, Directory target, Sort i
143151
if (indexSort != null) {
144152
iwc.setIndexSort(indexSort);
145153
}
146-
try (IndexWriter writer = new IndexWriter(new StatsDirectoryWrapper(target, indexRecoveryStats), iwc)) {
154+
try (IndexWriter writer = new IndexWriter(new StatsDirectoryWrapper(hardLinkOrCopyTarget, indexRecoveryStats), iwc)) {
147155
writer.addIndexes(sources);
156+
/*
157+
* We set the maximum sequence number and the local checkpoint on the target to the maximum of the maximum sequence numbers on
158+
* the source shards. This ensures that history after this maximum sequence number can advance and we have correct
159+
* document-level semantics.
160+
*/
161+
writer.setLiveCommitData(() -> {
162+
final HashMap<String, String> liveCommitData = new HashMap<>(2);
163+
liveCommitData.put(SequenceNumbers.MAX_SEQ_NO, Long.toString(maxSeqNo));
164+
liveCommitData.put(SequenceNumbers.LOCAL_CHECKPOINT_KEY, Long.toString(maxSeqNo));
165+
return liveCommitData.entrySet().iterator();
166+
});
148167
writer.commit();
149168
}
150169
}

core/src/test/java/org/elasticsearch/action/admin/indices/create/ShrinkIndexIT.java

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
package org.elasticsearch.action.admin.indices.create;
2121

22-
import org.apache.lucene.index.CorruptIndexException;
2322
import org.apache.lucene.search.Sort;
2423
import org.apache.lucene.search.SortField;
2524
import org.apache.lucene.search.SortedSetSelector;
@@ -29,17 +28,17 @@
2928
import org.elasticsearch.action.admin.cluster.state.ClusterStateRequest;
3029
import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
3130
import org.elasticsearch.action.admin.indices.settings.get.GetSettingsResponse;
31+
import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse;
32+
import org.elasticsearch.action.admin.indices.stats.ShardStats;
3233
import org.elasticsearch.action.index.IndexRequest;
3334
import org.elasticsearch.action.support.ActiveShardCount;
3435
import org.elasticsearch.client.Client;
3536
import org.elasticsearch.cluster.ClusterInfoService;
3637
import org.elasticsearch.cluster.InternalClusterInfoService;
3738
import org.elasticsearch.cluster.metadata.IndexMetaData;
3839
import org.elasticsearch.cluster.node.DiscoveryNode;
39-
import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
4040
import org.elasticsearch.cluster.routing.Murmur3HashFunction;
4141
import org.elasticsearch.cluster.routing.RoutingTable;
42-
import org.elasticsearch.cluster.routing.ShardRouting;
4342
import org.elasticsearch.cluster.routing.UnassignedInfo;
4443
import org.elasticsearch.common.Priority;
4544
import org.elasticsearch.common.collect.ImmutableOpenMap;
@@ -48,8 +47,8 @@
4847
import org.elasticsearch.index.Index;
4948
import org.elasticsearch.index.IndexService;
5049
import org.elasticsearch.index.query.TermsQueryBuilder;
50+
import org.elasticsearch.index.seqno.SeqNoStats;
5151
import org.elasticsearch.index.shard.IndexShard;
52-
import org.elasticsearch.index.shard.ShardId;
5352
import org.elasticsearch.indices.IndicesService;
5453
import org.elasticsearch.plugins.Plugin;
5554
import org.elasticsearch.test.ESIntegTestCase;
@@ -58,15 +57,11 @@
5857

5958
import java.util.Arrays;
6059
import java.util.Collection;
61-
import java.util.HashSet;
6260
import java.util.List;
63-
import java.util.Set;
64-
import java.util.stream.Collectors;
6561
import java.util.stream.IntStream;
6662

6763
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
6864
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
69-
import static org.hamcrest.CoreMatchers.not;
7065
import static org.hamcrest.Matchers.containsString;
7166
import static org.hamcrest.Matchers.equalTo;
7267
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
@@ -233,7 +228,8 @@ public void testCreateShrinkIndex() {
233228
.put("number_of_shards", randomIntBetween(2, 7))
234229
.put("index.version.created", version)
235230
).get();
236-
for (int i = 0; i < 20; i++) {
231+
final int docs = randomIntBetween(0, 128);
232+
for (int i = 0; i < docs; i++) {
237233
client().prepareIndex("source", "type")
238234
.setSource("{\"foo\" : \"bar\", \"i\" : " + i + "}", XContentType.JSON).get();
239235
}
@@ -252,30 +248,43 @@ public void testCreateShrinkIndex() {
252248
.put("index.routing.allocation.require._name", mergeNode)
253249
.put("index.blocks.write", true)).get();
254250
ensureGreen();
251+
252+
final IndicesStatsResponse sourceStats = client().admin().indices().prepareStats("source").get();
253+
final long maxSeqNo =
254+
Arrays.stream(sourceStats.getShards()).map(ShardStats::getSeqNoStats).mapToLong(SeqNoStats::getMaxSeqNo).max().getAsLong();
255255
// now merge source into a single shard index
256256

257257
final boolean createWithReplicas = randomBoolean();
258258
assertAcked(client().admin().indices().prepareShrinkIndex("source", "target")
259259
.setSettings(Settings.builder().put("index.number_of_replicas", createWithReplicas ? 1 : 0).build()).get());
260260
ensureGreen();
261-
assertHitCount(client().prepareSearch("target").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), 20);
261+
262+
final IndicesStatsResponse targetStats = client().admin().indices().prepareStats("target").get();
263+
for (final ShardStats shardStats : targetStats.getShards()) {
264+
final SeqNoStats seqNoStats = shardStats.getSeqNoStats();
265+
assertThat(seqNoStats.getMaxSeqNo(), equalTo(maxSeqNo));
266+
assertThat(seqNoStats.getLocalCheckpoint(), equalTo(maxSeqNo));
267+
}
268+
269+
final int size = docs > 0 ? 2 * docs : 1;
270+
assertHitCount(client().prepareSearch("target").setSize(size).setQuery(new TermsQueryBuilder("foo", "bar")).get(), docs);
262271

263272
if (createWithReplicas == false) {
264273
// bump replicas
265274
client().admin().indices().prepareUpdateSettings("target")
266275
.setSettings(Settings.builder()
267276
.put("index.number_of_replicas", 1)).get();
268277
ensureGreen();
269-
assertHitCount(client().prepareSearch("target").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), 20);
278+
assertHitCount(client().prepareSearch("target").setSize(size).setQuery(new TermsQueryBuilder("foo", "bar")).get(), docs);
270279
}
271280

272-
for (int i = 20; i < 40; i++) {
281+
for (int i = docs; i < 2 * docs; i++) {
273282
client().prepareIndex("target", "type")
274283
.setSource("{\"foo\" : \"bar\", \"i\" : " + i + "}", XContentType.JSON).get();
275284
}
276285
flushAndRefresh();
277-
assertHitCount(client().prepareSearch("target").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), 40);
278-
assertHitCount(client().prepareSearch("source").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), 20);
286+
assertHitCount(client().prepareSearch("target").setSize(2 * size).setQuery(new TermsQueryBuilder("foo", "bar")).get(), 2 * docs);
287+
assertHitCount(client().prepareSearch("source").setSize(size).setQuery(new TermsQueryBuilder("foo", "bar")).get(), docs);
279288
GetSettingsResponse target = client().admin().indices().prepareGetSettings("target").get();
280289
assertEquals(version, target.getIndexToSettings().get("target").getAsVersion("index.version.created", null));
281290
}

core/src/test/java/org/elasticsearch/index/shard/StoreRecoveryTests.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.apache.lucene.store.IOContext;
3838
import org.apache.lucene.store.IndexOutput;
3939
import org.apache.lucene.util.IOUtils;
40+
import org.elasticsearch.index.seqno.SequenceNumbers;
4041
import org.elasticsearch.indices.recovery.RecoveryState;
4142
import org.elasticsearch.test.ESTestCase;
4243

@@ -46,8 +47,11 @@
4647
import java.nio.file.attribute.BasicFileAttributes;
4748
import java.security.AccessControlException;
4849
import java.util.Arrays;
50+
import java.util.Map;
4951
import java.util.function.Predicate;
5052

53+
import static org.hamcrest.CoreMatchers.equalTo;
54+
5155
public class StoreRecoveryTests extends ESTestCase {
5256

5357
public void testAddIndices() throws IOException {
@@ -82,7 +86,8 @@ public void testAddIndices() throws IOException {
8286
StoreRecovery storeRecovery = new StoreRecovery(new ShardId("foo", "bar", 1), logger);
8387
RecoveryState.Index indexStats = new RecoveryState.Index();
8488
Directory target = newFSDirectory(createTempDir());
85-
storeRecovery.addIndices(indexStats, target, indexSort, dirs);
89+
final long maxSeqNo = randomNonNegativeLong();
90+
storeRecovery.addIndices(indexStats, target, indexSort, dirs, maxSeqNo);
8691
int numFiles = 0;
8792
Predicate<String> filesFilter = (f) -> f.startsWith("segments") == false && f.equals("write.lock") == false
8893
&& f.startsWith("extra") == false;
@@ -99,6 +104,9 @@ public void testAddIndices() throws IOException {
99104
}
100105
DirectoryReader reader = DirectoryReader.open(target);
101106
SegmentInfos segmentCommitInfos = SegmentInfos.readLatestCommit(target);
107+
final Map<String, String> userData = segmentCommitInfos.getUserData();
108+
assertThat(userData.get(SequenceNumbers.MAX_SEQ_NO), equalTo(Long.toString(maxSeqNo)));
109+
assertThat(userData.get(SequenceNumbers.LOCAL_CHECKPOINT_KEY), equalTo(Long.toString(maxSeqNo)));
102110
for (SegmentCommitInfo info : segmentCommitInfos) { // check that we didn't merge
103111
assertEquals("all sources must be flush",
104112
info.info.getDiagnostics().get("source"), "flush");

0 commit comments

Comments
 (0)