Skip to content

Commit 8e933f5

Browse files
committed
Avoid unnecessary persistence of retention leases (#42299)
Today we are persisting the retention leases at least every thirty seconds by a scheduled background sync. This sync causes an fsync to disk and when there are a large number of shards allocated to slow disks, these fsyncs can pile up and can severely impact the system. This commit addresses this by only persisting and fsyncing the retention leases if they have changed since the last time that we persisted and fsynced the retention leases.
1 parent ae5e171 commit 8e933f5

File tree

5 files changed

+107
-11
lines changed

5 files changed

+107
-11
lines changed

server/src/main/java/org/elasticsearch/gateway/MetaDataStateFormat.java

+22-7
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import org.apache.lucene.store.OutputStreamIndexOutput;
3232
import org.apache.lucene.store.SimpleFSDirectory;
3333
import org.elasticsearch.ExceptionsHelper;
34+
import org.elasticsearch.common.collect.Tuple;
3435
import org.elasticsearch.common.lucene.store.IndexOutputOutputStream;
3536
import org.elasticsearch.common.lucene.store.InputStreamIndexInput;
3637
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
@@ -246,15 +247,16 @@ long findMaxStateId(final String prefix, Path... locations) throws IOException {
246247
}
247248

248249
/**
249-
* Tries to load the latest state from the given data-locations. It tries to load the latest state determined by
250-
* the states version from one or more data directories and if none of the latest states can be loaded an exception
251-
* is thrown to prevent accidentally loading a previous state and silently omitting the latest state.
250+
* Tries to load the latest state from the given data-locations.
252251
*
253-
* @param logger a logger instance
252+
* @param logger a logger instance.
254253
* @param dataLocations the data-locations to try.
255-
* @return the latest state or <code>null</code> if no state was found.
254+
* @return tuple of the latest state and generation. (-1, null) if no state is found.
256255
*/
257-
public T loadLatestState(Logger logger, NamedXContentRegistry namedXContentRegistry, Path... dataLocations) throws IOException {
256+
public Tuple<T, Long> loadLatestStateWithGeneration(
257+
final Logger logger,
258+
final NamedXContentRegistry namedXContentRegistry,
259+
final Path... dataLocations) throws IOException {
258260
List<PathAndStateId> files = new ArrayList<>();
259261
long maxStateId = -1;
260262
if (dataLocations != null) { // select all eligible files first
@@ -293,7 +295,7 @@ public T loadLatestState(Logger logger, NamedXContentRegistry namedXContentRegis
293295
try {
294296
T state = read(namedXContentRegistry, pathAndStateId.file);
295297
logger.trace("state id [{}] read from [{}]", pathAndStateId.id, pathAndStateId.file.getFileName());
296-
return state;
298+
return Tuple.tuple(state, pathAndStateId.id);
297299
} catch (Exception e) {
298300
exceptions.add(new IOException("failed to read " + pathAndStateId.toString(), e));
299301
logger.debug(() -> new ParameterizedMessage(
@@ -309,6 +311,19 @@ public T loadLatestState(Logger logger, NamedXContentRegistry namedXContentRegis
309311
return null;
310312
}
311313

314+
/**
315+
* Tries to load the latest state from the given data-locations. It tries to load the latest state determined by
316+
* the states version from one or more data directories and if none of the latest states can be loaded an exception
317+
* is thrown to prevent accidentally loading a previous state and silently omitting the latest state.
318+
*
319+
* @param logger a logger instance
320+
* @param dataLocations the data-locations to try.
321+
* @return the latest state or <code>null</code> if no state was found.
322+
*/
323+
public T loadLatestState(Logger logger, NamedXContentRegistry namedXContentRegistry, Path... dataLocations) throws IOException {
324+
return loadLatestStateWithGeneration(logger, namedXContentRegistry, dataLocations).v1();
325+
}
326+
312327
/**
313328
* Internal struct-like class that holds the parsed state id and the file
314329
*/

server/src/main/java/org/elasticsearch/index/seqno/ReplicationTracker.java

+20-1
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,18 @@ public class ReplicationTracker extends AbstractIndexShardComponent implements L
179179
*/
180180
private RetentionLeases retentionLeases = RetentionLeases.EMPTY;
181181

182+
/**
183+
* The primary term of the most-recently persisted retention leases. This is used to check if we need to persist the current retention
184+
* leases.
185+
*/
186+
private long persistedRetentionLeasesPrimaryTerm;
187+
188+
/**
189+
* The version of the most-recently persisted retention leases. This is used to check if we need to persist the current retention
190+
* leases.
191+
*/
192+
private long persistedRetentionLeasesVersion;
193+
182194
/**
183195
* Get all retention leases tracked on this shard.
184196
*
@@ -341,7 +353,8 @@ public RetentionLeases loadRetentionLeases(final Path path) throws IOException {
341353
private final Object retentionLeasePersistenceLock = new Object();
342354

343355
/**
344-
* Persists the current retention leases to their dedicated state file.
356+
* Persists the current retention leases to their dedicated state file. If this version of the retention leases are already persisted
357+
* then persistence is skipped.
345358
*
346359
* @param path the path to the directory containing the state file
347360
* @throws IOException if an exception occurs writing the state file
@@ -350,10 +363,16 @@ public void persistRetentionLeases(final Path path) throws IOException {
350363
synchronized (retentionLeasePersistenceLock) {
351364
final RetentionLeases currentRetentionLeases;
352365
synchronized (this) {
366+
if (retentionLeases.supersedes(persistedRetentionLeasesPrimaryTerm, persistedRetentionLeasesVersion) == false) {
367+
logger.trace("skipping persisting retention leases [{}], already persisted", retentionLeases);
368+
return;
369+
}
353370
currentRetentionLeases = retentionLeases;
354371
}
355372
logger.trace("persisting retention leases [{}]", currentRetentionLeases);
356373
RetentionLeases.FORMAT.write(currentRetentionLeases, path);
374+
persistedRetentionLeasesPrimaryTerm = currentRetentionLeases.primaryTerm();
375+
persistedRetentionLeasesVersion = currentRetentionLeases.version();
357376
}
358377
}
359378

server/src/main/java/org/elasticsearch/index/seqno/RetentionLeases.java

+17-3
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,27 @@ public long version() {
6969

7070
/**
7171
* Checks if this retention leases collection supersedes the specified retention leases collection. A retention leases collection
72-
* supersedes another retention leases collection if its primary term is higher, or if for equal primary terms its version is higher
72+
* supersedes another retention leases collection if its primary term is higher, or if for equal primary terms its version is higher.
7373
*
7474
* @param that the retention leases collection to test against
7575
* @return true if this retention leases collection supercedes the specified retention lease collection, otherwise false
7676
*/
77-
public boolean supersedes(final RetentionLeases that) {
78-
return primaryTerm > that.primaryTerm || primaryTerm == that.primaryTerm && version > that.version;
77+
boolean supersedes(final RetentionLeases that) {
78+
return supersedes(that.primaryTerm, that.version);
79+
}
80+
81+
/**
82+
* Checks if this retention leases collection would supersede a retention leases collection with the specified primary term and version.
83+
* A retention leases collection supersedes another retention leases collection if its primary term is higher, or if for equal primary
84+
* terms its version is higher.
85+
*
86+
* @param primaryTerm the primary term
87+
* @param version the version
88+
* @return true if this retention leases collection would supercedes a retention lease collection with the specified primary term and
89+
* version
90+
*/
91+
boolean supersedes(final long primaryTerm, final long version) {
92+
return this.primaryTerm > primaryTerm || this.primaryTerm == primaryTerm && this.version > version;
7993
}
8094

8195
private final Map<String, RetentionLease> leases;

server/src/test/java/org/elasticsearch/index/seqno/ReplicationTrackerRetentionLeaseTests.java

+44
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.elasticsearch.common.collect.Tuple;
2525
import org.elasticsearch.common.settings.Settings;
2626
import org.elasticsearch.common.unit.TimeValue;
27+
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
2728
import org.elasticsearch.index.IndexSettings;
2829
import org.elasticsearch.index.shard.ShardId;
2930
import org.elasticsearch.test.IndexSettingsModule;
@@ -498,6 +499,49 @@ public void testLoadAndPersistRetentionLeases() throws IOException {
498499
assertThat(replicationTracker.loadRetentionLeases(path), equalTo(replicationTracker.getRetentionLeases()));
499500
}
500501

502+
public void testUnnecessaryPersistenceOfRetentionLeases() throws IOException {
503+
final AllocationId allocationId = AllocationId.newInitializing();
504+
long primaryTerm = randomLongBetween(1, Long.MAX_VALUE);
505+
final ReplicationTracker replicationTracker = new ReplicationTracker(
506+
new ShardId("test", "_na", 0),
507+
allocationId.getId(),
508+
IndexSettingsModule.newIndexSettings("test", Settings.EMPTY),
509+
primaryTerm,
510+
UNASSIGNED_SEQ_NO,
511+
value -> {},
512+
() -> 0L,
513+
(leases, listener) -> {});
514+
replicationTracker.updateFromMaster(
515+
randomNonNegativeLong(),
516+
Collections.singleton(allocationId.getId()),
517+
routingTable(Collections.emptySet(), allocationId),
518+
Collections.emptySet());
519+
replicationTracker.activatePrimaryMode(SequenceNumbers.NO_OPS_PERFORMED);
520+
final int length = randomIntBetween(0, 8);
521+
for (int i = 0; i < length; i++) {
522+
if (rarely() && primaryTerm < Long.MAX_VALUE) {
523+
primaryTerm = randomLongBetween(primaryTerm + 1, Long.MAX_VALUE);
524+
replicationTracker.setOperationPrimaryTerm(primaryTerm);
525+
}
526+
final long retainingSequenceNumber = randomLongBetween(SequenceNumbers.NO_OPS_PERFORMED, Long.MAX_VALUE);
527+
replicationTracker.addRetentionLease(
528+
Integer.toString(i), retainingSequenceNumber, "test-" + i, ActionListener.wrap(() -> {}));
529+
}
530+
531+
final Path path = createTempDir();
532+
replicationTracker.persistRetentionLeases(path);
533+
534+
final Tuple<RetentionLeases, Long> retentionLeasesWithGeneration =
535+
RetentionLeases.FORMAT.loadLatestStateWithGeneration(logger, NamedXContentRegistry.EMPTY, path);
536+
537+
replicationTracker.persistRetentionLeases(path);
538+
final Tuple<RetentionLeases, Long> retentionLeasesWithGenerationAfterUnnecessaryPersistence =
539+
RetentionLeases.FORMAT.loadLatestStateWithGeneration(logger, NamedXContentRegistry.EMPTY, path);
540+
541+
assertThat(retentionLeasesWithGenerationAfterUnnecessaryPersistence.v1(), equalTo(retentionLeasesWithGeneration.v1()));
542+
assertThat(retentionLeasesWithGenerationAfterUnnecessaryPersistence.v2(), equalTo(retentionLeasesWithGeneration.v2()));
543+
}
544+
501545
/**
502546
* Test that we correctly synchronize writing the retention lease state file in {@link ReplicationTracker#persistRetentionLeases(Path)}.
503547
* This test can fail without the synchronization block in that method.

server/src/test/java/org/elasticsearch/index/seqno/RetentionLeasesTests.java

+4
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,9 @@ public void testSupersedesByPrimaryTerm() {
6060
final long higherPrimaryTerm = randomLongBetween(lowerPrimaryTerm + 1, Long.MAX_VALUE);
6161
final RetentionLeases right = new RetentionLeases(higherPrimaryTerm, randomLongBetween(1, Long.MAX_VALUE), Collections.emptyList());
6262
assertTrue(right.supersedes(left));
63+
assertTrue(right.supersedes(left.primaryTerm(), left.version()));
6364
assertFalse(left.supersedes(right));
65+
assertFalse(left.supersedes(right.primaryTerm(), right.version()));
6466
}
6567

6668
public void testSupersedesByVersion() {
@@ -70,7 +72,9 @@ public void testSupersedesByVersion() {
7072
final RetentionLeases left = new RetentionLeases(primaryTerm, lowerVersion, Collections.emptyList());
7173
final RetentionLeases right = new RetentionLeases(primaryTerm, higherVersion, Collections.emptyList());
7274
assertTrue(right.supersedes(left));
75+
assertTrue(right.supersedes(left.primaryTerm(), left.version()));
7376
assertFalse(left.supersedes(right));
77+
assertFalse(left.supersedes(right.primaryTerm(), right.version()));
7478
}
7579

7680
public void testRetentionLeasesRejectsDuplicates() {

0 commit comments

Comments
 (0)