Skip to content

Commit d02afcc

Browse files
authored
Ensure relocating shards establish peer recovery retention leases (#50486)
We forgot to establish peer recovery retention leases for relocating primaries without soft-deletes. Relates #50351
1 parent 50bd584 commit d02afcc

File tree

2 files changed

+55
-5
lines changed

2 files changed

+55
-5
lines changed

server/src/main/java/org/elasticsearch/index/seqno/ReplicationTracker.java

+1-5
Original file line numberDiff line numberDiff line change
@@ -1334,11 +1334,7 @@ public synchronized void activateWithPrimaryContext(PrimaryContext primaryContex
13341334
// note that if there was no cluster state update between start of the engine of this shard and the call to
13351335
// initializeWithPrimaryContext, we might still have missed a cluster state update. This is best effort.
13361336
runAfter.run();
1337-
1338-
if (indexSettings.isSoftDeleteEnabled()) {
1339-
addPeerRecoveryRetentionLeaseForSolePrimary();
1340-
}
1341-
1337+
addPeerRecoveryRetentionLeaseForSolePrimary();
13421338
assert invariant();
13431339
}
13441340

server/src/test/java/org/elasticsearch/recovery/RelocationIT.java

+54
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,14 @@
2020
package org.elasticsearch.recovery;
2121

2222
import com.carrotsearch.hppc.IntHashSet;
23+
import com.carrotsearch.hppc.cursors.ObjectCursor;
2324
import com.carrotsearch.hppc.procedures.IntProcedure;
2425
import org.apache.lucene.index.IndexFileNames;
2526
import org.apache.lucene.util.English;
2627
import org.elasticsearch.action.ActionFuture;
2728
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
2829
import org.elasticsearch.action.admin.cluster.reroute.ClusterRerouteResponse;
30+
import org.elasticsearch.action.admin.indices.stats.ShardStats;
2931
import org.elasticsearch.action.index.IndexRequestBuilder;
3032
import org.elasticsearch.action.index.IndexResponse;
3133
import org.elasticsearch.action.search.SearchResponse;
@@ -45,6 +47,9 @@
4547
import org.elasticsearch.common.xcontent.XContentType;
4648
import org.elasticsearch.env.NodeEnvironment;
4749
import org.elasticsearch.index.IndexService;
50+
import org.elasticsearch.index.IndexSettings;
51+
import org.elasticsearch.index.seqno.ReplicationTracker;
52+
import org.elasticsearch.index.seqno.RetentionLease;
4853
import org.elasticsearch.index.shard.IndexEventListener;
4954
import org.elasticsearch.index.shard.IndexShard;
5055
import org.elasticsearch.index.shard.IndexShardState;
@@ -77,9 +82,12 @@
7782
import java.util.Arrays;
7883
import java.util.Collection;
7984
import java.util.List;
85+
import java.util.Map;
86+
import java.util.Set;
8087
import java.util.concurrent.CountDownLatch;
8188
import java.util.concurrent.Semaphore;
8289
import java.util.concurrent.TimeUnit;
90+
import java.util.stream.Collectors;
8391
import java.util.stream.Stream;
8492

8593
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
@@ -88,6 +96,8 @@
8896
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
8997
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits;
9098
import static org.hamcrest.Matchers.equalTo;
99+
import static org.hamcrest.Matchers.everyItem;
100+
import static org.hamcrest.Matchers.in;
91101
import static org.hamcrest.Matchers.not;
92102
import static org.hamcrest.Matchers.startsWith;
93103

@@ -103,6 +113,7 @@ protected Collection<Class<? extends Plugin>> nodePlugins() {
103113
@Override
104114
protected void beforeIndexDeletion() throws Exception {
105115
super.beforeIndexDeletion();
116+
assertActiveCopiesEstablishedPeerRecoveryRetentionLeases();
106117
internalCluster().assertSeqNos();
107118
internalCluster().assertSameDocIdsOnShards();
108119
}
@@ -603,6 +614,49 @@ public void testRelocateWhileContinuouslyIndexingAndWaitingForRefresh() throws E
603614
assertThat(client().prepareSearch("test").setSize(0).execute().actionGet().getHits().getTotalHits().value, equalTo(120L));
604615
}
605616

617+
public void testRelocationEstablishedPeerRecoveryRetentionLeases() throws Exception {
618+
int halfNodes = randomIntBetween(1, 3);
619+
String indexName = "test";
620+
Settings[] nodeSettings = Stream.concat(
621+
Stream.generate(() -> Settings.builder().put("node.attr.color", "blue").build()).limit(halfNodes),
622+
Stream.generate(() -> Settings.builder().put("node.attr.color", "red").build()).limit(halfNodes)).toArray(Settings[]::new);
623+
List<String> nodes = internalCluster().startNodes(nodeSettings);
624+
String[] blueNodes = nodes.subList(0, halfNodes).toArray(String[]::new);
625+
String[] redNodes = nodes.subList(0, halfNodes).toArray(String[]::new);
626+
ensureStableCluster(halfNodes * 2);
627+
assertAcked(
628+
client().admin().indices().prepareCreate(indexName).setSettings(Settings.builder()
629+
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), randomBoolean())
630+
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, randomIntBetween(0, halfNodes - 1))
631+
.put("index.routing.allocation.include.color", "blue")));
632+
ensureGreen("test");
633+
assertBusy(() -> assertAllShardsOnNodes(indexName, blueNodes));
634+
assertActiveCopiesEstablishedPeerRecoveryRetentionLeases();
635+
client().admin().indices().prepareUpdateSettings(indexName)
636+
.setSettings(Settings.builder().put("index.routing.allocation.include.color", "red")).get();
637+
assertBusy(() -> assertAllShardsOnNodes(indexName, redNodes));
638+
ensureGreen("test");
639+
assertActiveCopiesEstablishedPeerRecoveryRetentionLeases();
640+
}
641+
642+
private void assertActiveCopiesEstablishedPeerRecoveryRetentionLeases() throws Exception {
643+
assertBusy(() -> {
644+
for (ObjectCursor<String> it : client().admin().cluster().prepareState().get().getState().metaData().indices().keys()) {
645+
Map<ShardId, List<ShardStats>> byShardId = Stream.of(client().admin().indices().prepareStats(it.value).get().getShards())
646+
.collect(Collectors.groupingBy(l -> l.getShardRouting().shardId()));
647+
for (List<ShardStats> shardStats : byShardId.values()) {
648+
Set<String> expectedLeaseIds = shardStats.stream()
649+
.map(s -> ReplicationTracker.getPeerRecoveryRetentionLeaseId(s.getShardRouting())).collect(Collectors.toSet());
650+
for (ShardStats shardStat : shardStats) {
651+
Set<String> actualLeaseIds = shardStat.getRetentionLeaseStats().retentionLeases().leases().stream()
652+
.map(RetentionLease::id).collect(Collectors.toSet());
653+
assertThat(expectedLeaseIds, everyItem(in(actualLeaseIds)));
654+
}
655+
}
656+
}
657+
});
658+
}
659+
606660
class RecoveryCorruption implements StubbableTransport.SendRequestBehavior {
607661

608662
private final CountDownLatch corruptionCount;

0 commit comments

Comments
 (0)