From a2c7a31d97cc3713025562492948aecdc2ae7516 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 21 Jan 2020 21:05:21 +0100 Subject: [PATCH] Fix Overly Optimistic Request Deduplication On master failover we have to resent all the shard failed messages, but the transport requests remain the same in the eyes of `equals`. If the master failover is registered and the requests to the new master are sent before all the callbacks have executed and the request to the old master removed from the deduplicator then the requuests to the new master will incorrectly fail and the snapshot get stuck. Closes #51253 --- .../elasticsearch/snapshots/SnapshotShardsService.java | 3 +++ .../transport/TransportRequestDeduplicator.java | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java index 18a9e99c791d3..046b75a156f7d 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java @@ -358,6 +358,9 @@ private void syncShardStatsOnNewMaster(ClusterChangedEvent event) { return; } + // Clear request deduplicator since we need to send all requests that were potentially not handled by the previous + // master again + remoteFailedRequestDeduplicator.clear(); for (SnapshotsInProgress.Entry snapshot : snapshotsInProgress.entries()) { if (snapshot.state() == State.STARTED || snapshot.state() == State.ABORTED) { Map localShards = currentSnapshotShards(snapshot.snapshot()); diff --git a/server/src/main/java/org/elasticsearch/transport/TransportRequestDeduplicator.java b/server/src/main/java/org/elasticsearch/transport/TransportRequestDeduplicator.java index d929ef34ce2c3..6249975bf3ef6 100644 --- a/server/src/main/java/org/elasticsearch/transport/TransportRequestDeduplicator.java +++ b/server/src/main/java/org/elasticsearch/transport/TransportRequestDeduplicator.java @@ -53,6 +53,14 @@ public void executeOnce(T request, ActionListener listener, BiConsumer