Skip to content

Commit 87c889f

Browse files
committed
CCR should retry on CircuitBreakingException (#62013)
CCR shard follow task can hit CircuitBreakingException on the leader cluster (read changes requests) or the follower cluster (bulk requests). CCR should retry on CircuitBreakingException as it's a transient error.
1 parent ac23380 commit 87c889f

File tree

2 files changed

+10
-2
lines changed

2 files changed

+10
-2
lines changed

x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/ShardFollowNodeTask.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import org.elasticsearch.action.UnavailableShardsException;
1919
import org.elasticsearch.cluster.block.ClusterBlockException;
2020
import org.elasticsearch.common.Randomness;
21+
import org.elasticsearch.common.breaker.CircuitBreakingException;
2122
import org.elasticsearch.common.collect.Tuple;
2223
import org.elasticsearch.common.transport.NetworkExceptionHelper;
2324
import org.elasticsearch.common.unit.TimeValue;
@@ -567,7 +568,8 @@ static boolean shouldRetry(final Exception e) {
567568
actual instanceof ConnectTransportException ||
568569
actual instanceof NodeClosedException ||
569570
actual instanceof NoSuchRemoteClusterException ||
570-
actual instanceof EsRejectedExecutionException;
571+
actual instanceof EsRejectedExecutionException ||
572+
actual instanceof CircuitBreakingException;
571573
}
572574

573575
// These methods are protected for testing purposes:

x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/action/ShardFollowNodeTaskRandomTests.java

+7-1
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,12 @@
77

88
import org.elasticsearch.action.UnavailableShardsException;
99
import org.elasticsearch.common.UUIDs;
10+
import org.elasticsearch.common.breaker.CircuitBreaker;
11+
import org.elasticsearch.common.breaker.CircuitBreakingException;
1012
import org.elasticsearch.common.unit.ByteSizeUnit;
1113
import org.elasticsearch.common.unit.ByteSizeValue;
1214
import org.elasticsearch.common.unit.TimeValue;
15+
import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
1316
import org.elasticsearch.index.seqno.LocalCheckpointTracker;
1417
import org.elasticsearch.index.shard.ShardId;
1518
import org.elasticsearch.index.translog.Translog;
@@ -311,7 +314,10 @@ private static TestRun createTestRun(
311314
List<TestResponse> item = new ArrayList<>();
312315
// Sometimes add a random retryable error
313316
if (sometimes()) {
314-
Exception error = new UnavailableShardsException(new ShardId("test", "test", 0), "");
317+
Exception error = randomFrom(
318+
new UnavailableShardsException(new ShardId("test", "test", 0), ""),
319+
new CircuitBreakingException("test", randomInt(), randomInt(), randomFrom(CircuitBreaker.Durability.values())),
320+
new EsRejectedExecutionException("test"));
315321
item.add(new TestResponse(error, mappingVersion, settingsVersion, null));
316322
}
317323
// Sometimes add an empty shard changes response to also simulate a leader shard lagging behind

0 commit comments

Comments
 (0)