Skip to content

Commit 44fc57f

Browse files
committed
Fix retention leases sync on recovery test
This test had a bug. We attempt to allow only the primary to be allocated, to force all replicas to recovery from the primary after we had set the state of the retention leases on the primary. However, in building the index settings, we were overwriting the settings that exclude the replicas from being allocated. This means that some of the replicas would end up assigned and rather than receive retention leases during recovery, they would be part of the replication group receiving retention leases as they are manipulated. Since retention lease renewals are only synced periodically, this means that the replica could be lagging a little behind in some cases leading to an assertion tripping in the test. This commit addresses this by ensuring that the replicas are indeed not allocated until after the retention leases are done being manipulated on the replica. We did this by not overwriting the exclude settings. Closes #39105
1 parent fbabd81 commit 44fc57f

File tree

1 file changed

+10
-14
lines changed

1 file changed

+10
-14
lines changed

server/src/test/java/org/elasticsearch/index/seqno/RetentionLeaseIT.java

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
import java.util.ArrayList;
4040
import java.util.Collection;
4141
import java.util.Collections;
42-
import java.util.HashMap;
42+
import java.util.LinkedHashMap;
4343
import java.util.List;
4444
import java.util.Map;
4545
import java.util.concurrent.CountDownLatch;
@@ -89,7 +89,7 @@ public void testRetentionLeasesSyncedOnAdd() throws Exception {
8989
.getShardOrNull(new ShardId(resolveIndex("index"), 0));
9090
// we will add multiple retention leases and expect to see them synced to all replicas
9191
final int length = randomIntBetween(1, 8);
92-
final Map<String, RetentionLease> currentRetentionLeases = new HashMap<>();
92+
final Map<String, RetentionLease> currentRetentionLeases = new LinkedHashMap<>();
9393
for (int i = 0; i < length; i++) {
9494
final String id = randomValueOtherThanMany(currentRetentionLeases.keySet()::contains, () -> randomAlphaOfLength(8));
9595
final long retainingSequenceNumber = randomLongBetween(0, Long.MAX_VALUE);
@@ -136,7 +136,7 @@ public void testRetentionLeaseSyncedOnRemove() throws Exception {
136136
.getInstance(IndicesService.class, primaryShardNodeName)
137137
.getShardOrNull(new ShardId(resolveIndex("index"), 0));
138138
final int length = randomIntBetween(1, 8);
139-
final Map<String, RetentionLease> currentRetentionLeases = new HashMap<>();
139+
final Map<String, RetentionLease> currentRetentionLeases = new LinkedHashMap<>();
140140
for (int i = 0; i < length; i++) {
141141
final String id = randomValueOtherThanMany(currentRetentionLeases.keySet()::contains, () -> randomAlphaOfLength(8));
142142
final long retainingSequenceNumber = randomLongBetween(0, Long.MAX_VALUE);
@@ -277,7 +277,7 @@ public void testBackgroundRetentionLeaseSync() throws Exception {
277277
.getShardOrNull(new ShardId(resolveIndex("index"), 0));
278278
// we will add multiple retention leases and expect to see them synced to all replicas
279279
final int length = randomIntBetween(1, 8);
280-
final Map<String, RetentionLease> currentRetentionLeases = new HashMap<>(length);
280+
final Map<String, RetentionLease> currentRetentionLeases = new LinkedHashMap<>(length);
281281
final List<String> ids = new ArrayList<>(length);
282282
for (int i = 0; i < length; i++) {
283283
final String id = randomValueOtherThanMany(currentRetentionLeases.keySet()::contains, () -> randomAlphaOfLength(8));
@@ -318,15 +318,15 @@ public void testRetentionLeasesSyncOnRecovery() throws Exception {
318318
internalCluster().ensureAtLeastNumDataNodes(1 + numberOfReplicas);
319319
/*
320320
* We effectively disable the background sync to ensure that the retention leases are not synced in the background so that the only
321-
* source of retention leases on the replicas would be from the commit point and recovery.
321+
* source of retention leases on the replicas would be from recovery.
322322
*/
323-
final Settings settings = Settings.builder()
323+
final Settings.Builder settings = Settings.builder()
324324
.put("index.number_of_shards", 1)
325325
.put("index.number_of_replicas", 0)
326-
.put(IndexService.RETENTION_LEASE_SYNC_INTERVAL_SETTING.getKey(), TimeValue.timeValueHours(24))
327-
.build();
326+
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true)
327+
.put(IndexService.RETENTION_LEASE_SYNC_INTERVAL_SETTING.getKey(), TimeValue.timeValueHours(24));
328328
// when we increase the number of replicas below we want to exclude the replicas from being allocated so that they do not recover
329-
assertAcked(prepareCreate("index", 1).setSettings(settings));
329+
assertAcked(prepareCreate("index", 1, settings));
330330
ensureYellow("index");
331331
final AcknowledgedResponse response = client().admin()
332332
.indices()
@@ -339,7 +339,7 @@ public void testRetentionLeasesSyncOnRecovery() throws Exception {
339339
.getInstance(IndicesService.class, primaryShardNodeName)
340340
.getShardOrNull(new ShardId(resolveIndex("index"), 0));
341341
final int length = randomIntBetween(1, 8);
342-
final Map<String, RetentionLease> currentRetentionLeases = new HashMap<>();
342+
final Map<String, RetentionLease> currentRetentionLeases = new LinkedHashMap<>();
343343
for (int i = 0; i < length; i++) {
344344
final String id = randomValueOtherThanMany(currentRetentionLeases.keySet()::contains, () -> randomAlphaOfLength(8));
345345
final long retainingSequenceNumber = randomLongBetween(0, Long.MAX_VALUE);
@@ -348,10 +348,6 @@ public void testRetentionLeasesSyncOnRecovery() throws Exception {
348348
final ActionListener<ReplicationResponse> listener = ActionListener.wrap(r -> latch.countDown(), e -> fail(e.toString()));
349349
currentRetentionLeases.put(id, primary.addRetentionLease(id, retainingSequenceNumber, source, listener));
350350
latch.await();
351-
/*
352-
* Now renew the leases; since we do not flush immediately on renewal, this means that the latest retention leases will not be
353-
* in the latest commit point and therefore not transferred during the file-copy phase of recovery.
354-
*/
355351
currentRetentionLeases.put(id, primary.renewRetentionLease(id, retainingSequenceNumber, source));
356352
}
357353

0 commit comments

Comments
 (0)