Skip to content

Commit 3c2ee2b

Browse files
committed
Fix shard follow task startup error handling (#39053)
Prior to this commit, if during fetch leader / follower GCP a fatal error occurred, then the shard follow task was removed. This is unexpected, because if such an error occurs during the lifetime of shard follow task then replication is stopped and the fatal error flag is set. This allows the ccr stats api to report the fatal exception that has occurred (instead of the user grepping through the elasticsearch logs). This issue was found by a rare failure of the `FollowStatsIT#testFollowStatsApiIncludeShardFollowStatsWithRemovedFollowerIndex` test. Closes #38779
1 parent 4a72be3 commit 3c2ee2b

File tree

3 files changed

+7
-4
lines changed

3 files changed

+7
-4
lines changed

x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/ShardFollowNodeTask.java

+6-2
Original file line numberDiff line numberDiff line change
@@ -452,11 +452,15 @@ private void handleFailure(Exception e, AtomicInteger retryCounter, Runnable tas
452452
scheduler.accept(TimeValue.timeValueMillis(delay), task);
453453
}
454454
} else {
455-
fatalException = ExceptionsHelper.convertToElastic(e);
456-
LOGGER.warn("shard follow task encounter non-retryable error", e);
455+
setFatalException(e);
457456
}
458457
}
459458

459+
void setFatalException(Exception e) {
460+
fatalException = ExceptionsHelper.convertToElastic(e);
461+
LOGGER.warn("shard follow task encounter non-retryable error", e);
462+
}
463+
460464
static long computeDelay(int currentRetry, long maxRetryDelayInMillis) {
461465
// Cap currentRetry to avoid overflow when computing n variable
462466
int maxCurrentRetry = Math.min(currentRetry, 24);

x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/ShardFollowTasksExecutor.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ protected void nodeOperation(final AllocatedPersistentTask task, final ShardFoll
282282
shardFollowNodeTask), e);
283283
threadPool.schedule(() -> nodeOperation(task, params, state), params.getMaxRetryDelay(), Ccr.CCR_THREAD_POOL_NAME);
284284
} else {
285-
shardFollowNodeTask.markAsFailed(e);
285+
shardFollowNodeTask.setFatalException(e);
286286
}
287287
};
288288

x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/FollowStatsIT.java

-1
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,6 @@ public void testFollowStatsApiResourceNotFound() throws Exception {
149149
assertAcked(client().execute(PauseFollowAction.INSTANCE, new PauseFollowAction.Request("follower1")).actionGet());
150150
}
151151

152-
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/38779")
153152
public void testFollowStatsApiIncludeShardFollowStatsWithRemovedFollowerIndex() throws Exception {
154153
final String leaderIndexSettings = getIndexSettings(1, 0,
155154
singletonMap(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), "true"));

0 commit comments

Comments
 (0)