Skip to content

Commit a86c0f8

Browse files
Allow to trim all ops above a certain seq# with a term lower than X (#30176)
Allow to trim all ops above a certain seq# with a term lower than X Relates to #10708
1 parent 01140a3 commit a86c0f8

20 files changed

+579
-69
lines changed

server/src/main/java/org/elasticsearch/action/resync/ResyncReplicationRequest.java

+21-3
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.elasticsearch.action.support.replication.ReplicatedWriteRequest;
2323
import org.elasticsearch.common.io.stream.StreamInput;
2424
import org.elasticsearch.common.io.stream.StreamOutput;
25+
import org.elasticsearch.index.seqno.SequenceNumbers;
2526
import org.elasticsearch.index.shard.ShardId;
2627
import org.elasticsearch.index.translog.Translog;
2728

@@ -33,17 +34,24 @@
3334
*/
3435
public final class ResyncReplicationRequest extends ReplicatedWriteRequest<ResyncReplicationRequest> {
3536

37+
private long trimAboveSeqNo;
3638
private Translog.Operation[] operations;
3739

3840
ResyncReplicationRequest() {
3941
super();
4042
}
4143

42-
public ResyncReplicationRequest(final ShardId shardId, final Translog.Operation[] operations) {
44+
public ResyncReplicationRequest(final ShardId shardId, final long trimAboveSeqNo,
45+
final Translog.Operation[] operations) {
4346
super(shardId);
47+
this.trimAboveSeqNo = trimAboveSeqNo;
4448
this.operations = operations;
4549
}
4650

51+
public long getTrimAboveSeqNo() {
52+
return trimAboveSeqNo;
53+
}
54+
4755
public Translog.Operation[] getOperations() {
4856
return operations;
4957
}
@@ -60,12 +68,20 @@ public void readFrom(final StreamInput in) throws IOException {
6068
throw new IllegalStateException("resync replication request serialization is broken in 6.0.0");
6169
}
6270
super.readFrom(in);
71+
if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
72+
trimAboveSeqNo = in.readZLong();
73+
} else {
74+
trimAboveSeqNo = SequenceNumbers.UNASSIGNED_SEQ_NO;
75+
}
6376
operations = in.readArray(Translog.Operation::readOperation, Translog.Operation[]::new);
6477
}
6578

6679
@Override
6780
public void writeTo(final StreamOutput out) throws IOException {
6881
super.writeTo(out);
82+
if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
83+
out.writeZLong(trimAboveSeqNo);
84+
}
6985
out.writeArray(Translog.Operation::writeOperation, operations);
7086
}
7187

@@ -74,12 +90,13 @@ public boolean equals(final Object o) {
7490
if (this == o) return true;
7591
if (o == null || getClass() != o.getClass()) return false;
7692
final ResyncReplicationRequest that = (ResyncReplicationRequest) o;
77-
return Arrays.equals(operations, that.operations);
93+
return trimAboveSeqNo == that.trimAboveSeqNo
94+
&& Arrays.equals(operations, that.operations);
7895
}
7996

8097
@Override
8198
public int hashCode() {
82-
return Arrays.hashCode(operations);
99+
return Long.hashCode(trimAboveSeqNo) + 31 * Arrays.hashCode(operations);
83100
}
84101

85102
@Override
@@ -88,6 +105,7 @@ public String toString() {
88105
"shardId=" + shardId +
89106
", timeout=" + timeout +
90107
", index='" + index + '\'' +
108+
", trimAboveSeqNo=" + trimAboveSeqNo +
91109
", ops=" + operations.length +
92110
"}";
93111
}

server/src/main/java/org/elasticsearch/action/resync/TransportResyncReplicationAction.java

+3
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,9 @@ public static Translog.Location performOnReplica(ResyncReplicationRequest reques
135135
}
136136
}
137137
}
138+
if (request.getTrimAboveSeqNo() != SequenceNumbers.UNASSIGNED_SEQ_NO) {
139+
replica.trimOperationOfPreviousPrimaryTerms(request.getTrimAboveSeqNo());
140+
}
138141
return location;
139142
}
140143

server/src/main/java/org/elasticsearch/index/engine/Engine.java

+7-1
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,12 @@ boolean isThrottled() {
236236
*/
237237
public abstract boolean isThrottled();
238238

239+
/**
240+
* Trims translog for terms below <code>belowTerm</code> and seq# above <code>aboveSeqNo</code>
241+
* @see Translog#trimOperations(long, long)
242+
*/
243+
public abstract void trimOperationsFromTranslog(long belowTerm, long aboveSeqNo) throws EngineException;
244+
239245
/** A Lock implementation that always allows the lock to be acquired */
240246
protected static final class NoOpLock implements Lock {
241247

@@ -904,7 +910,7 @@ public final boolean refreshNeeded() {
904910
* checks and removes translog files that no longer need to be retained. See
905911
* {@link org.elasticsearch.index.translog.TranslogDeletionPolicy} for details
906912
*/
907-
public abstract void trimTranslog() throws EngineException;
913+
public abstract void trimUnreferencedTranslogFiles() throws EngineException;
908914

909915
/**
910916
* Tests whether or not the translog generation should be rolled to a new generation.

server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java

+19-1
Original file line numberDiff line numberDiff line change
@@ -1552,7 +1552,7 @@ public void rollTranslogGeneration() throws EngineException {
15521552
}
15531553

15541554
@Override
1555-
public void trimTranslog() throws EngineException {
1555+
public void trimUnreferencedTranslogFiles() throws EngineException {
15561556
try (ReleasableLock lock = readLock.acquire()) {
15571557
ensureOpen();
15581558
translog.trimUnreferencedReaders();
@@ -1569,6 +1569,24 @@ public void trimTranslog() throws EngineException {
15691569
}
15701570
}
15711571

1572+
@Override
1573+
public void trimOperationsFromTranslog(long belowTerm, long aboveSeqNo) throws EngineException {
1574+
try (ReleasableLock lock = readLock.acquire()) {
1575+
ensureOpen();
1576+
translog.trimOperations(belowTerm, aboveSeqNo);
1577+
} catch (AlreadyClosedException e) {
1578+
failOnTragicEvent(e);
1579+
throw e;
1580+
} catch (Exception e) {
1581+
try {
1582+
failEngine("translog operations trimming failed", e);
1583+
} catch (Exception inner) {
1584+
e.addSuppressed(inner);
1585+
}
1586+
throw new EngineException(shardId, "failed to trim translog operations", e);
1587+
}
1588+
}
1589+
15721590
private void pruneDeletedTombstones() {
15731591
/*
15741592
* We need to deploy two different trimming strategies for GC deletes on primary and replicas. Delete operations on primary

server/src/main/java/org/elasticsearch/index/seqno/SequenceNumbers.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public class SequenceNumbers {
3737
*/
3838
public static final long UNASSIGNED_SEQ_NO = -2L;
3939
/**
40-
* Represents no operations have been performed on the shard.
40+
* Represents no operations have been performed on the shard. Initial value of a sequence number.
4141
*/
4242
public static final long NO_OPS_PERFORMED = -1L;
4343

server/src/main/java/org/elasticsearch/index/shard/IndexShard.java

+5-1
Original file line numberDiff line numberDiff line change
@@ -992,7 +992,7 @@ public Engine.CommitId flush(FlushRequest request) {
992992
public void trimTranslog() {
993993
verifyNotClosed();
994994
final Engine engine = getEngine();
995-
engine.trimTranslog();
995+
engine.trimUnreferencedTranslogFiles();
996996
}
997997

998998
/**
@@ -1194,6 +1194,10 @@ public void prepareForIndexRecovery() {
11941194
assert currentEngineReference.get() == null;
11951195
}
11961196

1197+
public void trimOperationOfPreviousPrimaryTerms(long aboveSeqNo) {
1198+
getEngine().trimOperationsFromTranslog(primaryTerm, aboveSeqNo);
1199+
}
1200+
11971201
public Engine.Result applyTranslogOperation(Translog.Operation operation, Engine.Operation.Origin origin) throws IOException {
11981202
final Engine.Result result;
11991203
switch (operation.opType()) {

server/src/main/java/org/elasticsearch/index/shard/PrimaryReplicaSyncer.java

+15-6
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.elasticsearch.common.unit.ByteSizeValue;
3636
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
3737
import org.elasticsearch.common.xcontent.XContentBuilder;
38+
import org.elasticsearch.index.seqno.SeqNoStats;
3839
import org.elasticsearch.index.seqno.SequenceNumbers;
3940
import org.elasticsearch.index.translog.Translog;
4041
import org.elasticsearch.tasks.Task;
@@ -84,6 +85,7 @@ public void resync(final IndexShard indexShard, final ActionListener<ResyncTask>
8485
try {
8586
final long startingSeqNo = indexShard.getGlobalCheckpoint() + 1;
8687
Translog.Snapshot snapshot = indexShard.newTranslogSnapshotFromMinSeqNo(startingSeqNo);
88+
final long maxSeqNo = indexShard.seqNoStats().getMaxSeqNo();
8789
resyncListener = new ActionListener<ResyncTask>() {
8890
@Override
8991
public void onResponse(final ResyncTask resyncTask) {
@@ -135,7 +137,7 @@ public synchronized Translog.Operation next() throws IOException {
135137
}
136138
};
137139
resync(shardId, indexShard.routingEntry().allocationId().getId(), indexShard.getPrimaryTerm(), wrappedSnapshot,
138-
startingSeqNo, resyncListener);
140+
startingSeqNo, maxSeqNo, resyncListener);
139141
} catch (Exception e) {
140142
if (resyncListener != null) {
141143
resyncListener.onFailure(e);
@@ -146,7 +148,7 @@ public synchronized Translog.Operation next() throws IOException {
146148
}
147149

148150
private void resync(final ShardId shardId, final String primaryAllocationId, final long primaryTerm, final Translog.Snapshot snapshot,
149-
long startingSeqNo, ActionListener<ResyncTask> listener) {
151+
long startingSeqNo, long maxSeqNo, ActionListener<ResyncTask> listener) {
150152
ResyncRequest request = new ResyncRequest(shardId, primaryAllocationId);
151153
ResyncTask resyncTask = (ResyncTask) taskManager.register("transport", "resync", request); // it's not transport :-)
152154
ActionListener<Void> wrappedListener = new ActionListener<Void>() {
@@ -166,7 +168,7 @@ public void onFailure(Exception e) {
166168
};
167169
try {
168170
new SnapshotSender(logger, syncAction, resyncTask, shardId, primaryAllocationId, primaryTerm, snapshot, chunkSize.bytesAsInt(),
169-
startingSeqNo, wrappedListener).run();
171+
startingSeqNo, maxSeqNo, wrappedListener).run();
170172
} catch (Exception e) {
171173
wrappedListener.onFailure(e);
172174
}
@@ -186,14 +188,16 @@ static class SnapshotSender extends AbstractRunnable implements ActionListener<R
186188
private final ShardId shardId;
187189
private final Translog.Snapshot snapshot;
188190
private final long startingSeqNo;
191+
private final long maxSeqNo;
189192
private final int chunkSizeInBytes;
190193
private final ActionListener<Void> listener;
194+
private final AtomicBoolean firstMessage = new AtomicBoolean(true);
191195
private final AtomicInteger totalSentOps = new AtomicInteger();
192196
private final AtomicInteger totalSkippedOps = new AtomicInteger();
193197
private AtomicBoolean closed = new AtomicBoolean();
194198

195199
SnapshotSender(Logger logger, SyncAction syncAction, ResyncTask task, ShardId shardId, String primaryAllocationId, long primaryTerm,
196-
Translog.Snapshot snapshot, int chunkSizeInBytes, long startingSeqNo, ActionListener<Void> listener) {
200+
Translog.Snapshot snapshot, int chunkSizeInBytes, long startingSeqNo, long maxSeqNo, ActionListener<Void> listener) {
197201
this.logger = logger;
198202
this.syncAction = syncAction;
199203
this.task = task;
@@ -203,6 +207,7 @@ static class SnapshotSender extends AbstractRunnable implements ActionListener<R
203207
this.snapshot = snapshot;
204208
this.chunkSizeInBytes = chunkSizeInBytes;
205209
this.startingSeqNo = startingSeqNo;
210+
this.maxSeqNo = maxSeqNo;
206211
this.listener = listener;
207212
task.setTotalOperations(snapshot.totalOperations());
208213
}
@@ -248,11 +253,15 @@ protected void doRun() throws Exception {
248253
}
249254
}
250255

251-
if (!operations.isEmpty()) {
256+
final long trimmedAboveSeqNo = firstMessage.get() ? maxSeqNo : SequenceNumbers.UNASSIGNED_SEQ_NO;
257+
// have to send sync request even in case of there are no operations to sync - have to sync trimmedAboveSeqNo at least
258+
if (!operations.isEmpty() || trimmedAboveSeqNo != SequenceNumbers.UNASSIGNED_SEQ_NO) {
252259
task.setPhase("sending_ops");
253-
ResyncReplicationRequest request = new ResyncReplicationRequest(shardId, operations.toArray(EMPTY_ARRAY));
260+
ResyncReplicationRequest request =
261+
new ResyncReplicationRequest(shardId, trimmedAboveSeqNo, operations.toArray(EMPTY_ARRAY));
254262
logger.trace("{} sending batch of [{}][{}] (total sent: [{}], skipped: [{}])", shardId, operations.size(),
255263
new ByteSizeValue(size), totalSentOps.get(), totalSkippedOps.get());
264+
firstMessage.set(false);
256265
syncAction.sync(request, task, primaryAllocationId, primaryTerm, this);
257266
} else if (closed.compareAndSet(false, true)) {
258267
logger.trace("{} resync completed (total sent: [{}], skipped: [{}])", shardId, totalSentOps.get(), totalSkippedOps.get());

0 commit comments

Comments
 (0)