Skip to content

Commit b08ad02

Browse files
committed
Ignore translog retention policy if soft-deletes enabled (elastic#45473)
Since elastic#45136, we use soft-deletes instead of translog in peer recovery. There's no need to retain extra translog to increase a chance of operation-based recoveries. This commit ignores the translog retention policy if soft-deletes is enabled so we can discard translog more quickly. Co-authored-by: David Turner <[email protected]> Relates elastic#45136
1 parent e50a78c commit b08ad02

File tree

17 files changed

+342
-68
lines changed

17 files changed

+342
-68
lines changed

docs/reference/index-modules/translog.asciidoc

+16-5
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,23 @@ commit point. Defaults to `512mb`.
7676

7777
`index.translog.retention.size`::
7878

79-
The total size of translog files to keep. Keeping more translog files increases
80-
the chance of performing an operation based sync when recovering replicas. If
81-
the translog files are not sufficient, replica recovery will fall back to a
82-
file based sync. Defaults to `512mb`
79+
When soft deletes is disabled (enabled by default in 7.0 or later),
80+
`index.translog.retention.size` controls the total size of translog files to keep.
81+
Keeping more translog files increases the chance of performing an operation based
82+
sync when recovering replicas. If the translog files are not sufficient,
83+
replica recovery will fall back to a file based sync. Defaults to `512mb`
84+
85+
Both `index.translog.retention.size` and `index.translog.retention.age` should not
86+
be specified unless soft deletes is disabled as they will be ignored.
8387

8488

8589
`index.translog.retention.age`::
8690

87-
The maximum duration for which translog files will be kept. Defaults to `12h`.
91+
When soft deletes is disabled (enabled by default in 7.0 or later),
92+
`index.translog.retention.age` controls the maximum duration for which translog
93+
files to keep. Keeping more translog files increases the chance of performing an
94+
operation based sync when recovering replicas. If the translog files are not sufficient,
95+
replica recovery will fall back to a file based sync. Defaults to `12h`
96+
97+
Both `index.translog.retention.size` and `index.translog.retention.age` should not
98+
be specified unless soft deletes is disabled as they will be ignored.

rest-api-spec/src/main/resources/rest-api-spec/test/indices.stats/20_translog.yml

+99-6
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
---
2-
setup:
2+
"Translog retention without soft_deletes":
33
- do:
44
indices.create:
5-
index: test
5+
index: test
6+
body:
7+
settings:
8+
soft_deletes.enabled: false
69
- do:
710
cluster.health:
811
wait_for_no_initializing_shards: true
9-
10-
---
11-
"Translog retention":
1212
- do:
1313
indices.stats:
1414
metric: [ translog ]
@@ -64,6 +64,53 @@ setup:
6464
- lte: { indices.test.primaries.translog.uncommitted_size_in_bytes: $creation_size }
6565
- match: { indices.test.primaries.translog.uncommitted_operations: 0 }
6666

67+
---
68+
"Translog retention with soft_deletes":
69+
- skip:
70+
version: " - 7.3.99"
71+
reason: "start ignoring translog retention policy with soft-deletes enabled in 7.4"
72+
- do:
73+
indices.create:
74+
index: test
75+
body:
76+
settings:
77+
soft_deletes.enabled: true
78+
- do:
79+
cluster.health:
80+
wait_for_no_initializing_shards: true
81+
- do:
82+
indices.stats:
83+
metric: [ translog ]
84+
- set: { indices.test.primaries.translog.size_in_bytes: creation_size }
85+
86+
- do:
87+
index:
88+
index: test
89+
id: 1
90+
body: { "foo": "bar" }
91+
92+
- do:
93+
indices.stats:
94+
metric: [ translog ]
95+
- gt: { indices.test.primaries.translog.size_in_bytes: $creation_size }
96+
- match: { indices.test.primaries.translog.operations: 1 }
97+
- match: { indices.test.primaries.translog.uncommitted_operations: 1 }
98+
# call flush twice to sync the global checkpoint after the last operation so that we can have the safe commit
99+
- do:
100+
indices.flush:
101+
index: test
102+
- do:
103+
indices.flush:
104+
index: test
105+
- do:
106+
indices.stats:
107+
metric: [ translog ]
108+
# after flushing we have one empty translog file while an empty index before flushing has two empty translog files.
109+
- lt: { indices.test.primaries.translog.size_in_bytes: $creation_size }
110+
- match: { indices.test.primaries.translog.operations: 0 }
111+
- lt: { indices.test.primaries.translog.uncommitted_size_in_bytes: $creation_size }
112+
- match: { indices.test.primaries.translog.uncommitted_operations: 0 }
113+
67114
---
68115
"Translog last modified age stats":
69116
- skip:
@@ -81,11 +128,20 @@ setup:
81128
- gte: { indices.test.primaries.translog.earliest_last_modified_age: 0 }
82129

83130
---
84-
"Translog stats on closed indices":
131+
"Translog stats on closed indices without soft-deletes":
85132
- skip:
86133
version: " - 7.2.99"
87134
reason: "closed indices have translog stats starting version 7.3.0"
88135

136+
- do:
137+
indices.create:
138+
index: test
139+
body:
140+
settings:
141+
soft_deletes.enabled: false
142+
- do:
143+
cluster.health:
144+
wait_for_no_initializing_shards: true
89145
- do:
90146
index:
91147
index: test
@@ -123,3 +179,40 @@ setup:
123179
forbid_closed_indices: false
124180
- match: { indices.test.primaries.translog.operations: 3 }
125181
- match: { indices.test.primaries.translog.uncommitted_operations: 0 }
182+
183+
---
184+
"Translog stats on closed indices with soft-deletes":
185+
- skip:
186+
version: " - 7.3.99"
187+
reason: "start ignoring translog retention policy with soft-deletes enabled in 7.4"
188+
- do:
189+
indices.create:
190+
index: test
191+
body:
192+
settings:
193+
soft_deletes.enabled: true
194+
- do:
195+
cluster.health:
196+
wait_for_no_initializing_shards: true
197+
- do:
198+
index:
199+
index: test
200+
id: 1
201+
body: { "foo": "bar" }
202+
- do:
203+
indices.stats:
204+
metric: [ translog ]
205+
- match: { indices.test.primaries.translog.operations: 1 }
206+
- match: { indices.test.primaries.translog.uncommitted_operations: 1 }
207+
- do:
208+
indices.close:
209+
index: test
210+
wait_for_active_shards: 1
211+
- is_true: acknowledged
212+
- do:
213+
indices.stats:
214+
metric: [ translog ]
215+
expand_wildcards: all
216+
forbid_closed_indices: false
217+
- match: { indices.test.primaries.translog.operations: 0 }
218+
- match: { indices.test.primaries.translog.uncommitted_operations: 0 }

server/src/main/java/org/elasticsearch/index/IndexSettings.java

+43-24
Original file line numberDiff line numberDiff line change
@@ -195,24 +195,6 @@ public final class IndexSettings {
195195
new ByteSizeValue(Long.MAX_VALUE, ByteSizeUnit.BYTES),
196196
Property.Dynamic, Property.IndexScope);
197197

198-
/**
199-
* Controls how long translog files that are no longer needed for persistence reasons
200-
* will be kept around before being deleted. A longer retention policy is useful to increase
201-
* the chance of ops based recoveries.
202-
**/
203-
public static final Setting<TimeValue> INDEX_TRANSLOG_RETENTION_AGE_SETTING =
204-
Setting.timeSetting("index.translog.retention.age", TimeValue.timeValueHours(12), TimeValue.timeValueMillis(-1),
205-
Property.Dynamic, Property.IndexScope);
206-
207-
/**
208-
* Controls how many translog files that are no longer needed for persistence reasons
209-
* will be kept around before being deleted. Keeping more files is useful to increase
210-
* the chance of ops based recoveries.
211-
**/
212-
public static final Setting<ByteSizeValue> INDEX_TRANSLOG_RETENTION_SIZE_SETTING =
213-
Setting.byteSizeSetting("index.translog.retention.size", new ByteSizeValue(512, ByteSizeUnit.MB), Property.Dynamic,
214-
Property.IndexScope);
215-
216198
/**
217199
* The maximum size of a translog generation. This is independent of the maximum size of
218200
* translog operations that have not been flushed.
@@ -258,6 +240,27 @@ public final class IndexSettings {
258240
Setting.longSetting("index.soft_deletes.retention.operations", 0, 0,
259241
Property.IndexScope, Property.Dynamic);
260242

243+
/**
244+
* Controls how long translog files that are no longer needed for persistence reasons
245+
* will be kept around before being deleted. Keeping more files is useful to increase
246+
* the chance of ops based recoveries for indices with soft-deletes disabled.
247+
* This setting will be ignored if soft-deletes is enabled.
248+
**/
249+
public static final Setting<TimeValue> INDEX_TRANSLOG_RETENTION_AGE_SETTING =
250+
Setting.timeSetting("index.translog.retention.age",
251+
settings -> INDEX_SOFT_DELETES_SETTING.get(settings) ? TimeValue.MINUS_ONE : TimeValue.timeValueHours(12), TimeValue.MINUS_ONE,
252+
Property.Dynamic, Property.IndexScope);
253+
254+
/**
255+
* Controls how many translog files that are no longer needed for persistence reasons
256+
* will be kept around before being deleted. Keeping more files is useful to increase
257+
* the chance of ops based recoveries for indices with soft-deletes disabled.
258+
* This setting will be ignored if soft-deletes is enabled.
259+
**/
260+
public static final Setting<ByteSizeValue> INDEX_TRANSLOG_RETENTION_SIZE_SETTING =
261+
Setting.byteSizeSetting("index.translog.retention.size", settings -> INDEX_SOFT_DELETES_SETTING.get(settings) ? "-1" : "512MB",
262+
Property.Dynamic, Property.IndexScope);
263+
261264
/**
262265
* Controls the maximum length of time since a retention lease is created or renewed before it is considered expired.
263266
*/
@@ -466,8 +469,6 @@ public IndexSettings(final IndexMetaData indexMetaData, final Settings nodeSetti
466469
syncInterval = INDEX_TRANSLOG_SYNC_INTERVAL_SETTING.get(settings);
467470
refreshInterval = scopedSettings.get(INDEX_REFRESH_INTERVAL_SETTING);
468471
flushThresholdSize = scopedSettings.get(INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING);
469-
translogRetentionAge = scopedSettings.get(INDEX_TRANSLOG_RETENTION_AGE_SETTING);
470-
translogRetentionSize = scopedSettings.get(INDEX_TRANSLOG_RETENTION_SIZE_SETTING);
471472
generationThresholdSize = scopedSettings.get(INDEX_TRANSLOG_GENERATION_THRESHOLD_SIZE_SETTING);
472473
mergeSchedulerConfig = new MergeSchedulerConfig(this);
473474
gcDeletesInMillis = scopedSettings.get(INDEX_GC_DELETES_SETTING).getMillis();
@@ -493,6 +494,8 @@ public IndexSettings(final IndexMetaData indexMetaData, final Settings nodeSetti
493494
this.indexSortConfig = new IndexSortConfig(this);
494495
searchIdleAfter = scopedSettings.get(INDEX_SEARCH_IDLE_AFTER);
495496
defaultPipeline = scopedSettings.get(DEFAULT_PIPELINE);
497+
setTranslogRetentionAge(scopedSettings.get(INDEX_TRANSLOG_RETENTION_AGE_SETTING));
498+
setTranslogRetentionSize(scopedSettings.get(INDEX_TRANSLOG_RETENTION_SIZE_SETTING));
496499

497500
scopedSettings.addSettingsUpdateConsumer(MergePolicyConfig.INDEX_COMPOUND_FORMAT_SETTING, mergePolicyConfig::setNoCFSRatio);
498501
scopedSettings.addSettingsUpdateConsumer(MergePolicyConfig.INDEX_MERGE_POLICY_DELETES_PCT_ALLOWED_SETTING,
@@ -553,11 +556,21 @@ private void setTranslogFlushThresholdSize(ByteSizeValue byteSizeValue) {
553556
}
554557

555558
private void setTranslogRetentionSize(ByteSizeValue byteSizeValue) {
556-
this.translogRetentionSize = byteSizeValue;
559+
if (softDeleteEnabled && byteSizeValue.getBytes() >= 0) {
560+
// ignore the translog retention settings if soft-deletes enabled
561+
this.translogRetentionSize = new ByteSizeValue(-1);
562+
} else {
563+
this.translogRetentionSize = byteSizeValue;
564+
}
557565
}
558566

559567
private void setTranslogRetentionAge(TimeValue age) {
560-
this.translogRetentionAge = age;
568+
if (softDeleteEnabled && age.millis() >= 0) {
569+
// ignore the translog retention settings if soft-deletes enabled
570+
this.translogRetentionAge = TimeValue.MINUS_ONE;
571+
} else {
572+
this.translogRetentionAge = age;
573+
}
561574
}
562575

563576
private void setGenerationThresholdSize(final ByteSizeValue generationThresholdSize) {
@@ -734,13 +747,19 @@ public TimeValue getRefreshInterval() {
734747
/**
735748
* Returns the transaction log retention size which controls how much of the translog is kept around to allow for ops based recoveries
736749
*/
737-
public ByteSizeValue getTranslogRetentionSize() { return translogRetentionSize; }
750+
public ByteSizeValue getTranslogRetentionSize() {
751+
assert softDeleteEnabled == false || translogRetentionSize.getBytes() == -1L : translogRetentionSize;
752+
return translogRetentionSize;
753+
}
738754

739755
/**
740756
* Returns the transaction log retention age which controls the maximum age (time from creation) that translog files will be kept
741757
* around
742758
*/
743-
public TimeValue getTranslogRetentionAge() { return translogRetentionAge; }
759+
public TimeValue getTranslogRetentionAge() {
760+
assert softDeleteEnabled == false || translogRetentionAge.millis() == -1L : translogRetentionSize;
761+
return translogRetentionAge;
762+
}
744763

745764
/**
746765
* Returns the generation threshold size. As sequence numbers can cause multiple generations to

server/src/main/java/org/elasticsearch/index/translog/TruncateTranslogAction.java

+12-4
Original file line numberDiff line numberDiff line change
@@ -177,11 +177,19 @@ private boolean isTranslogClean(ShardPath shardPath, String translogUUID) throws
177177
final TranslogConfig translogConfig = new TranslogConfig(shardPath.getShardId(), translogPath,
178178
indexSettings, BigArrays.NON_RECYCLING_INSTANCE);
179179
long primaryTerm = indexSettings.getIndexMetaData().primaryTerm(shardPath.getShardId().id());
180-
final TranslogDeletionPolicy translogDeletionPolicy =
181-
new TranslogDeletionPolicy(indexSettings.getTranslogRetentionSize().getBytes(),
182-
indexSettings.getTranslogRetentionAge().getMillis());
180+
// We open translog to check for corruption, do not clean anything.
181+
final TranslogDeletionPolicy retainAllTranslogPolicy = new TranslogDeletionPolicy(Long.MAX_VALUE, Long.MAX_VALUE) {
182+
@Override
183+
long minTranslogGenRequired(List<TranslogReader> readers, TranslogWriter writer) {
184+
long minGen = writer.generation;
185+
for (TranslogReader reader : readers) {
186+
minGen = Math.min(reader.generation, minGen);
187+
}
188+
return minGen;
189+
}
190+
};
183191
try (Translog translog = new Translog(translogConfig, translogUUID,
184-
translogDeletionPolicy, () -> translogGlobalCheckpoint, () -> primaryTerm, seqNo -> {});
192+
retainAllTranslogPolicy, () -> translogGlobalCheckpoint, () -> primaryTerm, seqNo -> {});
185193
Translog.Snapshot snapshot = translog.newSnapshot()) {
186194
//noinspection StatementWithEmptyBody we are just checking that we can iterate through the whole snapshot
187195
while (snapshot.next() != null) {

server/src/test/java/org/elasticsearch/index/IndexServiceTests.java

+7-2
Original file line numberDiff line numberDiff line change
@@ -396,15 +396,20 @@ public void testAsyncTranslogTrimTaskOnClosedIndex() throws Exception {
396396
final Path translogPath = translog.getConfig().getTranslogPath();
397397
final String translogUuid = translog.getTranslogUUID();
398398

399+
int translogOps = 0;
399400
final int numDocs = scaledRandomIntBetween(10, 100);
400401
for (int i = 0; i < numDocs; i++) {
401402
client().prepareIndex().setIndex(indexName).setId(String.valueOf(i)).setSource("{\"foo\": \"bar\"}", XContentType.JSON).get();
403+
translogOps++;
402404
if (randomBoolean()) {
403405
client().admin().indices().prepareFlush(indexName).get();
406+
if (indexService.getIndexSettings().isSoftDeleteEnabled()) {
407+
translogOps = 0;
408+
}
404409
}
405410
}
406-
assertThat(translog.totalOperations(), equalTo(numDocs));
407-
assertThat(translog.stats().estimatedNumberOfOperations(), equalTo(numDocs));
411+
assertThat(translog.totalOperations(), equalTo(translogOps));
412+
assertThat(translog.stats().estimatedNumberOfOperations(), equalTo(translogOps));
408413
assertAcked(client().admin().indices().prepareClose("test").setWaitForActiveShards(ActiveShardCount.DEFAULT));
409414

410415
indexService = getInstanceFromNode(IndicesService.class).indexServiceSafe(indexService.index());

0 commit comments

Comments
 (0)