Skip to content

Commit e242fd2

Browse files
authored
CCR: Add TransportService closed to retryable errors (#34722)
Both testFollowIndexAndCloseNode and testFailOverOnFollower failed because they responded to the FollowTask a TransportService closed exception which is currently considered as a fatal error. This behavior is not desirable since a closing node can throw that exception, and we should retry in that case. This change adds TransportService closed error to the list of retryable errors. Closes #34694
1 parent 90fd15b commit e242fd2

File tree

2 files changed

+7
-3
lines changed

2 files changed

+7
-3
lines changed

x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/ShardFollowNodeTask.java

+7-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
import org.elasticsearch.indices.IndexClosedException;
2929
import org.elasticsearch.persistent.AllocatedPersistentTask;
3030
import org.elasticsearch.tasks.TaskId;
31+
import org.elasticsearch.transport.NodeDisconnectedException;
32+
import org.elasticsearch.transport.NodeNotConnectedException;
3133
import org.elasticsearch.xpack.ccr.action.bulk.BulkShardOperationsResponse;
3234
import org.elasticsearch.xpack.core.ccr.ShardFollowNodeTaskStatus;
3335

@@ -371,6 +373,7 @@ private void handleFailure(Exception e, AtomicInteger retryCounter, Runnable tas
371373
scheduler.accept(TimeValue.timeValueMillis(delay), task);
372374
} else {
373375
fatalException = ExceptionsHelper.convertToElastic(e);
376+
LOGGER.warn("shard follow task encounter non-retryable error", e);
374377
}
375378
}
376379

@@ -399,7 +402,10 @@ static boolean shouldRetry(Exception e) {
399402
actual instanceof AlreadyClosedException ||
400403
actual instanceof ElasticsearchSecurityException || // If user does not have sufficient privileges
401404
actual instanceof ClusterBlockException || // If leader index is closed or no elected master
402-
actual instanceof IndexClosedException; // If follow index is closed
405+
actual instanceof IndexClosedException || // If follow index is closed
406+
actual instanceof NodeDisconnectedException ||
407+
actual instanceof NodeNotConnectedException ||
408+
(actual.getMessage() != null && actual.getMessage().contains("TransportService is closed"));
403409
}
404410

405411
// These methods are protected for testing purposes:

x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/IndexFollowingIT.java

-2
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,6 @@ public void afterBulk(long executionId, BulkRequest request, Throwable failure)
270270
assertMaxSeqNoOfUpdatesIsTransferred(resolveLeaderIndex("index1"), resolveFollowerIndex("index2"), numberOfShards);
271271
}
272272

273-
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/34696")
274273
public void testFollowIndexAndCloseNode() throws Exception {
275274
getFollowerCluster().ensureAtLeastNumDataNodes(3);
276275
String leaderIndexSettings = getIndexSettings(3, 1, singletonMap(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), "true"));
@@ -587,7 +586,6 @@ public void testUnfollowIndex() throws Exception {
587586
assertThat(followerClient().prepareSearch("index2").get().getHits().getTotalHits(), equalTo(2L));
588587
}
589588

590-
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/34696")
591589
public void testFailOverOnFollower() throws Exception {
592590
int numberOfReplicas = between(1, 2);
593591
getFollowerCluster().startMasterOnlyNode();

0 commit comments

Comments
 (0)