Skip to content

Commit b41e3fc

Browse files
authored
Move primary term from replicas proxy to repl op (#41119)
A small refactoring that removes the primaryTerm field from ReplicasProxy and instead passes it directly in to the methods that need it. Relates #40706.
1 parent 3ec0cc5 commit b41e3fc

File tree

10 files changed

+173
-174
lines changed

10 files changed

+173
-174
lines changed

server/src/main/java/org/elasticsearch/action/admin/indices/close/TransportVerifyShardBeforeCloseAction.java

+4-8
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,8 @@ private void executeShardOperation(final ShardRequest request, final IndexShard
115115
}
116116

117117
@Override
118-
protected ReplicationOperation.Replicas<ShardRequest> newReplicasProxy(final long primaryTerm) {
119-
return new VerifyShardBeforeCloseActionReplicasProxy(primaryTerm);
118+
protected ReplicationOperation.Replicas<ShardRequest> newReplicasProxy() {
119+
return new VerifyShardBeforeCloseActionReplicasProxy();
120120
}
121121

122122
/**
@@ -125,13 +125,9 @@ protected ReplicationOperation.Replicas<ShardRequest> newReplicasProxy(final lon
125125
* or reopened in an unverified state with potential non flushed translog operations.
126126
*/
127127
class VerifyShardBeforeCloseActionReplicasProxy extends ReplicasProxy {
128-
129-
VerifyShardBeforeCloseActionReplicasProxy(final long primaryTerm) {
130-
super(primaryTerm);
131-
}
132-
133128
@Override
134-
public void markShardCopyAsStaleIfNeeded(final ShardId shardId, final String allocationId, final ActionListener<Void> listener) {
129+
public void markShardCopyAsStaleIfNeeded(final ShardId shardId, final String allocationId, final long primaryTerm,
130+
final ActionListener<Void> listener) {
135131
shardStateAction.remoteShardFailed(shardId, allocationId, primaryTerm, true, "mark copy as stale", null, listener);
136132
}
137133
}

server/src/main/java/org/elasticsearch/action/resync/TransportResyncReplicationAction.java

+7-9
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,8 @@ protected ResyncReplicationResponse newResponseInstance() {
6868
}
6969

7070
@Override
71-
protected ReplicationOperation.Replicas newReplicasProxy(long primaryTerm) {
72-
return new ResyncActionReplicasProxy(primaryTerm);
71+
protected ReplicationOperation.Replicas newReplicasProxy() {
72+
return new ResyncActionReplicasProxy();
7373
}
7474

7575
@Override
@@ -96,9 +96,10 @@ public static ResyncReplicationRequest performOnPrimary(ResyncReplicationRequest
9696
}
9797

9898
@Override
99-
protected WriteReplicaResult shardOperationOnReplica(ResyncReplicationRequest request, IndexShard replica) throws Exception {
99+
protected WriteReplicaResult<ResyncReplicationRequest> shardOperationOnReplica(ResyncReplicationRequest request,
100+
IndexShard replica) throws Exception {
100101
Translog.Location location = performOnReplica(request, replica);
101-
return new WriteReplicaResult(request, location, null, replica, logger);
102+
return new WriteReplicaResult<>(request, location, null, replica, logger);
102103
}
103104

104105
public static Translog.Location performOnReplica(ResyncReplicationRequest request, IndexShard replica) throws Exception {
@@ -174,12 +175,9 @@ public void handleException(TransportException exp) {
174175
*/
175176
class ResyncActionReplicasProxy extends ReplicasProxy {
176177

177-
ResyncActionReplicasProxy(long primaryTerm) {
178-
super(primaryTerm);
179-
}
180-
181178
@Override
182-
public void failShardIfNeeded(ShardRouting replica, String message, Exception exception, ActionListener<Void> listener) {
179+
public void failShardIfNeeded(ShardRouting replica, long primaryTerm, String message, Exception exception,
180+
ActionListener<Void> listener) {
183181
shardStateAction.remoteShardFailed(
184182
replica.shardId(), replica.allocationId().getId(), primaryTerm, false, message, exception, listener);
185183
}

server/src/main/java/org/elasticsearch/action/support/replication/ReplicationOperation.java

+55-47
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,10 @@ public class ReplicationOperation<
7171
private final Primary<Request, ReplicaRequest, PrimaryResultT> primary;
7272
private final Replicas<ReplicaRequest> replicasProxy;
7373
private final AtomicBoolean finished = new AtomicBoolean();
74-
protected final ActionListener<PrimaryResultT> resultListener;
74+
private final long primaryTerm;
75+
76+
// exposed for tests
77+
final ActionListener<PrimaryResultT> resultListener;
7578

7679
private volatile PrimaryResultT primaryResult = null;
7780

@@ -80,13 +83,14 @@ public class ReplicationOperation<
8083
public ReplicationOperation(Request request, Primary<Request, ReplicaRequest, PrimaryResultT> primary,
8184
ActionListener<PrimaryResultT> listener,
8285
Replicas<ReplicaRequest> replicas,
83-
Logger logger, String opType) {
86+
Logger logger, String opType, long primaryTerm) {
8487
this.replicasProxy = replicas;
8588
this.primary = primary;
8689
this.resultListener = listener;
8790
this.logger = logger;
8891
this.request = request;
8992
this.opType = opType;
93+
this.primaryTerm = primaryTerm;
9094
}
9195

9296
public void execute() throws Exception {
@@ -137,7 +141,7 @@ private void markUnavailableShardsAsStale(ReplicaRequest replicaRequest, Replica
137141
// if inSyncAllocationIds contains allocation ids of shards that don't exist in RoutingTable, mark copies as stale
138142
for (String allocationId : replicationGroup.getUnavailableInSyncShards()) {
139143
pendingActions.incrementAndGet();
140-
replicasProxy.markShardCopyAsStaleIfNeeded(replicaRequest.shardId(), allocationId,
144+
replicasProxy.markShardCopyAsStaleIfNeeded(replicaRequest.shardId(), allocationId, primaryTerm,
141145
ActionListener.wrap(r -> decPendingAndFinishIfNeeded(), ReplicationOperation.this::onNoLongerPrimary));
142146
}
143147
}
@@ -165,44 +169,45 @@ private void performOnReplica(final ShardRouting shard, final ReplicaRequest rep
165169

166170
totalShards.incrementAndGet();
167171
pendingActions.incrementAndGet();
168-
replicasProxy.performOn(shard, replicaRequest, globalCheckpoint, maxSeqNoOfUpdatesOrDeletes, new ActionListener<ReplicaResponse>() {
169-
@Override
170-
public void onResponse(ReplicaResponse response) {
171-
successfulShards.incrementAndGet();
172-
try {
173-
primary.updateLocalCheckpointForShard(shard.allocationId().getId(), response.localCheckpoint());
174-
primary.updateGlobalCheckpointForShard(shard.allocationId().getId(), response.globalCheckpoint());
175-
} catch (final AlreadyClosedException e) {
176-
// okay, the index was deleted or this shard was never activated after a relocation; fall through and finish normally
177-
} catch (final Exception e) {
178-
// fail the primary but fall through and let the rest of operation processing complete
179-
final String message = String.format(Locale.ROOT, "primary failed updating local checkpoint for replica %s", shard);
180-
primary.failShard(message, e);
172+
replicasProxy.performOn(shard, replicaRequest, primaryTerm, globalCheckpoint, maxSeqNoOfUpdatesOrDeletes,
173+
new ActionListener<>() {
174+
@Override
175+
public void onResponse(ReplicaResponse response) {
176+
successfulShards.incrementAndGet();
177+
try {
178+
primary.updateLocalCheckpointForShard(shard.allocationId().getId(), response.localCheckpoint());
179+
primary.updateGlobalCheckpointForShard(shard.allocationId().getId(), response.globalCheckpoint());
180+
} catch (final AlreadyClosedException e) {
181+
// the index was deleted or this shard was never activated after a relocation; fall through and finish normally
182+
} catch (final Exception e) {
183+
// fail the primary but fall through and let the rest of operation processing complete
184+
final String message = String.format(Locale.ROOT, "primary failed updating local checkpoint for replica %s", shard);
185+
primary.failShard(message, e);
186+
}
187+
decPendingAndFinishIfNeeded();
181188
}
182-
decPendingAndFinishIfNeeded();
183-
}
184189

185-
@Override
186-
public void onFailure(Exception replicaException) {
187-
logger.trace(() -> new ParameterizedMessage(
188-
"[{}] failure while performing [{}] on replica {}, request [{}]",
189-
shard.shardId(), opType, shard, replicaRequest), replicaException);
190-
// Only report "critical" exceptions - TODO: Reach out to the master node to get the latest shard state then report.
191-
if (TransportActions.isShardNotAvailableException(replicaException) == false) {
192-
RestStatus restStatus = ExceptionsHelper.status(replicaException);
193-
shardReplicaFailures.add(new ReplicationResponse.ShardInfo.Failure(
194-
shard.shardId(), shard.currentNodeId(), replicaException, restStatus, false));
190+
@Override
191+
public void onFailure(Exception replicaException) {
192+
logger.trace(() -> new ParameterizedMessage(
193+
"[{}] failure while performing [{}] on replica {}, request [{}]",
194+
shard.shardId(), opType, shard, replicaRequest), replicaException);
195+
// Only report "critical" exceptions - TODO: Reach out to the master node to get the latest shard state then report.
196+
if (TransportActions.isShardNotAvailableException(replicaException) == false) {
197+
RestStatus restStatus = ExceptionsHelper.status(replicaException);
198+
shardReplicaFailures.add(new ReplicationResponse.ShardInfo.Failure(
199+
shard.shardId(), shard.currentNodeId(), replicaException, restStatus, false));
200+
}
201+
String message = String.format(Locale.ROOT, "failed to perform %s on replica %s", opType, shard);
202+
replicasProxy.failShardIfNeeded(shard, primaryTerm, message, replicaException,
203+
ActionListener.wrap(r -> decPendingAndFinishIfNeeded(), ReplicationOperation.this::onNoLongerPrimary));
195204
}
196-
String message = String.format(Locale.ROOT, "failed to perform %s on replica %s", opType, shard);
197-
replicasProxy.failShardIfNeeded(shard, message, replicaException,
198-
ActionListener.wrap(r -> decPendingAndFinishIfNeeded(), ReplicationOperation.this::onNoLongerPrimary));
199-
}
200205

201-
@Override
202-
public String toString() {
203-
return "[" + replicaRequest + "][" + shard + "]";
204-
}
205-
});
206+
@Override
207+
public String toString() {
208+
return "[" + replicaRequest + "][" + shard + "]";
209+
}
210+
});
206211
}
207212

208213
private void onNoLongerPrimary(Exception failure) {
@@ -373,25 +378,27 @@ public interface Replicas<RequestT extends ReplicationRequest<RequestT>> {
373378
*
374379
* @param replica the shard this request should be executed on
375380
* @param replicaRequest the operation to perform
381+
* @param primaryTerm the primary term
376382
* @param globalCheckpoint the global checkpoint on the primary
377383
* @param maxSeqNoOfUpdatesOrDeletes the max seq_no of updates (index operations overwriting Lucene) or deletes on primary
378384
* after this replication was executed on it.
379385
* @param listener callback for handling the response or failure
380386
*/
381-
void performOn(ShardRouting replica, RequestT replicaRequest, long globalCheckpoint,
382-
long maxSeqNoOfUpdatesOrDeletes, ActionListener<ReplicaResponse> listener);
387+
void performOn(ShardRouting replica, RequestT replicaRequest,
388+
long primaryTerm, long globalCheckpoint, long maxSeqNoOfUpdatesOrDeletes, ActionListener<ReplicaResponse> listener);
383389

384390
/**
385391
* Fail the specified shard if needed, removing it from the current set
386392
* of active shards. Whether a failure is needed is left up to the
387393
* implementation.
388394
*
389-
* @param replica shard to fail
390-
* @param message a (short) description of the reason
391-
* @param exception the original exception which caused the ReplicationOperation to request the shard to be failed
392-
* @param listener a listener that will be notified when the failing shard has been removed from the in-sync set
395+
* @param replica shard to fail
396+
* @param primaryTerm the primary term
397+
* @param message a (short) description of the reason
398+
* @param exception the original exception which caused the ReplicationOperation to request the shard to be failed
399+
* @param listener a listener that will be notified when the failing shard has been removed from the in-sync set
393400
*/
394-
void failShardIfNeeded(ShardRouting replica, String message, Exception exception, ActionListener<Void> listener);
401+
void failShardIfNeeded(ShardRouting replica, long primaryTerm, String message, Exception exception, ActionListener<Void> listener);
395402

396403
/**
397404
* Marks shard copy as stale if needed, removing its allocation id from
@@ -400,9 +407,10 @@ void performOn(ShardRouting replica, RequestT replicaRequest, long globalCheckpo
400407
*
401408
* @param shardId shard id
402409
* @param allocationId allocation id to remove from the set of in-sync allocation ids
410+
* @param primaryTerm the primary term
403411
* @param listener a listener that will be notified when the failing shard has been removed from the in-sync set
404412
*/
405-
void markShardCopyAsStaleIfNeeded(ShardId shardId, String allocationId, ActionListener<Void> listener);
413+
void markShardCopyAsStaleIfNeeded(ShardId shardId, String allocationId, long primaryTerm, ActionListener<Void> listener);
406414
}
407415

408416
/**
@@ -427,11 +435,11 @@ public interface ReplicaResponse {
427435
}
428436

429437
public static class RetryOnPrimaryException extends ElasticsearchException {
430-
public RetryOnPrimaryException(ShardId shardId, String msg) {
438+
RetryOnPrimaryException(ShardId shardId, String msg) {
431439
this(shardId, msg, null);
432440
}
433441

434-
public RetryOnPrimaryException(ShardId shardId, String msg, Throwable cause) {
442+
RetryOnPrimaryException(ShardId shardId, String msg, Throwable cause) {
435443
super(msg, cause);
436444
setShard(shardId);
437445
}

server/src/main/java/org/elasticsearch/action/support/replication/TransportReplicationAction.java

+7-11
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,8 @@ protected void doExecute(Task task, Request request, ActionListener<Response> li
163163
new ReroutePhase((ReplicationTask) task, request, listener).run();
164164
}
165165

166-
protected ReplicationOperation.Replicas<ReplicaRequest> newReplicasProxy(long primaryTerm) {
167-
return new ReplicasProxy(primaryTerm);
166+
protected ReplicationOperation.Replicas<ReplicaRequest> newReplicasProxy() {
167+
return new ReplicasProxy();
168168
}
169169

170170
protected abstract Response newResponseInstance();
@@ -409,7 +409,7 @@ protected ReplicationOperation<Request, ReplicaRequest, PrimaryResult<ReplicaReq
409409
Request request, ActionListener<PrimaryResult<ReplicaRequest, Response>> listener,
410410
PrimaryShardReference primaryShardReference) {
411411
return new ReplicationOperation<>(request, primaryShardReference, listener,
412-
newReplicasProxy(primaryRequest.getPrimaryTerm()), logger, actionName);
412+
newReplicasProxy(), logger, actionName, primaryRequest.getPrimaryTerm());
413413
}
414414
}
415415

@@ -1021,16 +1021,11 @@ public int hashCode() {
10211021
*/
10221022
protected class ReplicasProxy implements ReplicationOperation.Replicas<ReplicaRequest> {
10231023

1024-
protected final long primaryTerm;
1025-
1026-
public ReplicasProxy(long primaryTerm) {
1027-
this.primaryTerm = primaryTerm;
1028-
}
1029-
10301024
@Override
10311025
public void performOn(
10321026
final ShardRouting replica,
10331027
final ReplicaRequest request,
1028+
final long primaryTerm,
10341029
final long globalCheckpoint,
10351030
final long maxSeqNoOfUpdatesOrDeletes,
10361031
final ActionListener<ReplicationOperation.ReplicaResponse> listener) {
@@ -1051,7 +1046,8 @@ public void performOn(
10511046
}
10521047

10531048
@Override
1054-
public void failShardIfNeeded(ShardRouting replica, String message, Exception exception, ActionListener<Void> listener) {
1049+
public void failShardIfNeeded(ShardRouting replica, long primaryTerm, String message, Exception exception,
1050+
ActionListener<Void> listener) {
10551051
// This does not need to fail the shard. The idea is that this
10561052
// is a non-write operation (something like a refresh or a global
10571053
// checkpoint sync) and therefore the replica should still be
@@ -1060,7 +1056,7 @@ public void failShardIfNeeded(ShardRouting replica, String message, Exception ex
10601056
}
10611057

10621058
@Override
1063-
public void markShardCopyAsStaleIfNeeded(ShardId shardId, String allocationId, ActionListener<Void> listener) {
1059+
public void markShardCopyAsStaleIfNeeded(ShardId shardId, String allocationId, long primaryTerm, ActionListener<Void> listener) {
10641060
// This does not need to make the shard stale. The idea is that this
10651061
// is a non-write operation (something like a refresh or a global
10661062
// checkpoint sync) and therefore the replica should still be

server/src/main/java/org/elasticsearch/action/support/replication/TransportWriteAction.java

+5-8
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,8 @@ public static Location locationToSync(Location current, Location next) {
9696
}
9797

9898
@Override
99-
protected ReplicationOperation.Replicas newReplicasProxy(long primaryTerm) {
100-
return new WriteActionReplicasProxy(primaryTerm);
99+
protected ReplicationOperation.Replicas<ReplicaRequest> newReplicasProxy() {
100+
return new WriteActionReplicasProxy();
101101
}
102102

103103
/**
@@ -371,12 +371,9 @@ void run() {
371371
*/
372372
class WriteActionReplicasProxy extends ReplicasProxy {
373373

374-
WriteActionReplicasProxy(long primaryTerm) {
375-
super(primaryTerm);
376-
}
377-
378374
@Override
379-
public void failShardIfNeeded(ShardRouting replica, String message, Exception exception, ActionListener<Void> listener) {
375+
public void failShardIfNeeded(ShardRouting replica, long primaryTerm, String message, Exception exception,
376+
ActionListener<Void> listener) {
380377
if (TransportActions.isShardNotAvailableException(exception) == false) {
381378
logger.warn(new ParameterizedMessage("[{}] {}", replica.shardId(), message), exception);
382379
}
@@ -385,7 +382,7 @@ public void failShardIfNeeded(ShardRouting replica, String message, Exception ex
385382
}
386383

387384
@Override
388-
public void markShardCopyAsStaleIfNeeded(ShardId shardId, String allocationId, ActionListener<Void> listener) {
385+
public void markShardCopyAsStaleIfNeeded(ShardId shardId, String allocationId, long primaryTerm, ActionListener<Void> listener) {
389386
shardStateAction.remoteShardFailed(shardId, allocationId, primaryTerm, true, "mark copy as stale", null, listener);
390387
}
391388
}

0 commit comments

Comments
 (0)