Skip to content

Commit 0a604e3

Browse files
Fix Two Races that Lead to Stuck Snapshots (#37686)
* Fixes two broken spots: 1. Master failover while deleting a snapshot that has no shards will get stuck if the new master finds the 0-shard snapshot in `INIT` when deleting 2. Aborted shards that were never seen in `INIT` state by the `SnapshotsShardService` will not be notified as failed, leading to the snapshot staying in `ABORTED` state and never getting deleted with one or more shards stuck in `ABORTED` state * Tried to make fixes as short as possible so we can backport to `6.x` with the least amount of risk * Significantly extended test infrastructure to reproduce the above two issues * Two new test runs: 1. Reproducing the effects of node disconnects/restarts in isolation 2. Reproducing the effects of disconnects/restarts in parallel with shard relocations and deletes * Relates #32265 * Closes #32348
1 parent 0d56955 commit 0a604e3

File tree

6 files changed

+1096
-638
lines changed

6 files changed

+1096
-638
lines changed

server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java

+46-10
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@
6767
import org.elasticsearch.repositories.IndexId;
6868
import org.elasticsearch.repositories.Repository;
6969
import org.elasticsearch.threadpool.ThreadPool;
70+
import org.elasticsearch.transport.EmptyTransportResponseHandler;
71+
import org.elasticsearch.transport.TransportException;
72+
import org.elasticsearch.transport.TransportRequestDeduplicator;
73+
import org.elasticsearch.transport.TransportResponse;
7074
import org.elasticsearch.transport.TransportService;
7175

7276
import java.io.IOException;
@@ -85,7 +89,6 @@
8589
import static java.util.Collections.emptyMap;
8690
import static java.util.Collections.unmodifiableMap;
8791
import static org.elasticsearch.cluster.SnapshotsInProgress.completed;
88-
import static org.elasticsearch.transport.EmptyTransportResponseHandler.INSTANCE_SAME;
8992

9093
/**
9194
* This service runs on data and master nodes and controls currently snapshotted shards on these nodes. It is responsible for
@@ -112,6 +115,10 @@ public class SnapshotShardsService extends AbstractLifecycleComponent implements
112115

113116
private volatile Map<Snapshot, Map<ShardId, IndexShardSnapshotStatus>> shardSnapshots = emptyMap();
114117

118+
// A map of snapshots to the shardIds that we already reported to the master as failed
119+
private final TransportRequestDeduplicator<UpdateIndexShardSnapshotStatusRequest> remoteFailedRequestDeduplicator =
120+
new TransportRequestDeduplicator<>();
121+
115122
private final SnapshotStateExecutor snapshotStateExecutor = new SnapshotStateExecutor();
116123
private final UpdateSnapshotStatusAction updateSnapshotStatusHandler;
117124

@@ -272,12 +279,11 @@ private void processIndexShardSnapshots(ClusterChangedEvent event) {
272279
// Abort all running shards for this snapshot
273280
Map<ShardId, IndexShardSnapshotStatus> snapshotShards = shardSnapshots.get(entry.snapshot());
274281
if (snapshotShards != null) {
275-
final String failure = "snapshot has been aborted";
276282
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shard : entry.shards()) {
277-
278283
final IndexShardSnapshotStatus snapshotStatus = snapshotShards.get(shard.key);
279284
if (snapshotStatus != null) {
280-
final IndexShardSnapshotStatus.Copy lastSnapshotStatus = snapshotStatus.abortIfNotCompleted(failure);
285+
final IndexShardSnapshotStatus.Copy lastSnapshotStatus =
286+
snapshotStatus.abortIfNotCompleted("snapshot has been aborted");
281287
final Stage stage = lastSnapshotStatus.getStage();
282288
if (stage == Stage.FINALIZE) {
283289
logger.debug("[{}] trying to cancel snapshot on shard [{}] that is finalizing, " +
@@ -295,6 +301,15 @@ private void processIndexShardSnapshots(ClusterChangedEvent event) {
295301
}
296302
}
297303
}
304+
} else {
305+
final Snapshot snapshot = entry.snapshot();
306+
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> curr : entry.shards()) {
307+
// due to CS batching we might have missed the INIT state and straight went into ABORTED
308+
// notify master that abort has completed by moving to FAILED
309+
if (curr.value.state() == State.ABORTED) {
310+
notifyFailedSnapshotShard(snapshot, curr.key, localNodeId, curr.value.reason());
311+
}
312+
}
298313
}
299314
}
300315
}
@@ -515,12 +530,33 @@ void notifyFailedSnapshotShard(final Snapshot snapshot, final ShardId shardId, f
515530

516531
/** Updates the shard snapshot status by sending a {@link UpdateIndexShardSnapshotStatusRequest} to the master node */
517532
void sendSnapshotShardUpdate(final Snapshot snapshot, final ShardId shardId, final ShardSnapshotStatus status) {
518-
try {
519-
UpdateIndexShardSnapshotStatusRequest request = new UpdateIndexShardSnapshotStatusRequest(snapshot, shardId, status);
520-
transportService.sendRequest(transportService.getLocalNode(), UPDATE_SNAPSHOT_STATUS_ACTION_NAME, request, INSTANCE_SAME);
521-
} catch (Exception e) {
522-
logger.warn(() -> new ParameterizedMessage("[{}] [{}] failed to update snapshot state", snapshot, status), e);
523-
}
533+
remoteFailedRequestDeduplicator.executeOnce(
534+
new UpdateIndexShardSnapshotStatusRequest(snapshot, shardId, status),
535+
new ActionListener<Void>() {
536+
@Override
537+
public void onResponse(Void aVoid) {
538+
logger.trace("[{}] [{}] updated snapshot state", snapshot, status);
539+
}
540+
541+
@Override
542+
public void onFailure(Exception e) {
543+
logger.warn(
544+
() -> new ParameterizedMessage("[{}] [{}] failed to update snapshot state", snapshot, status), e);
545+
}
546+
},
547+
(req, reqListener) -> transportService.sendRequest(transportService.getLocalNode(), UPDATE_SNAPSHOT_STATUS_ACTION_NAME, req,
548+
new EmptyTransportResponseHandler(ThreadPool.Names.SAME) {
549+
@Override
550+
public void handleResponse(TransportResponse.Empty response) {
551+
reqListener.onResponse(null);
552+
}
553+
554+
@Override
555+
public void handleException(TransportException exp) {
556+
reqListener.onFailure(exp);
557+
}
558+
})
559+
);
524560
}
525561

526562
/**

server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java

+4-1
Original file line numberDiff line numberDiff line change
@@ -1210,7 +1210,10 @@ public ClusterState execute(ClusterState currentState) throws Exception {
12101210
if (state == State.INIT) {
12111211
// snapshot is still initializing, mark it as aborted
12121212
shards = snapshotEntry.shards();
1213-
1213+
assert shards.isEmpty();
1214+
// No shards in this snapshot, we delete it right away since the SnapshotShardsService
1215+
// has no work to do.
1216+
endSnapshot(snapshotEntry);
12141217
} else if (state == State.STARTED) {
12151218
// snapshot is started - mark every non completed shard as aborted
12161219
final ImmutableOpenMap.Builder<ShardId, ShardSnapshotStatus> shardsBuilder = ImmutableOpenMap.builder();

0 commit comments

Comments
 (0)