Skip to content

Commit 217aeaa

Browse files
committed
Don't schedule SLM jobs when services have been stopped (elastic#48658)
This adds a guard for the SLM lifecycle and retention service that prevents new jobs from being scheduled once the service has been stopped. Previous if the node were shut down the service would be stopped, but a cluster state or local master election would cause a job to attempt to be scheduled. This could lead to an uncaught `RejectedExecutionException`. Resolves elastic#47749
1 parent 569fb14 commit 217aeaa

File tree

2 files changed

+16
-1
lines changed

2 files changed

+16
-1
lines changed

x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/slm/SnapshotLifecycleService.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import java.util.Map;
3030
import java.util.Optional;
3131
import java.util.Set;
32+
import java.util.concurrent.atomic.AtomicBoolean;
3233
import java.util.function.Supplier;
3334
import java.util.regex.Pattern;
3435
import java.util.stream.Collectors;
@@ -47,6 +48,7 @@ public class SnapshotLifecycleService implements LocalNodeMasterListener, Closea
4748
private final ClusterService clusterService;
4849
private final SnapshotLifecycleTask snapshotTask;
4950
private final Map<String, SchedulerEngine.Job> scheduledTasks = ConcurrentCollections.newConcurrentMap();
51+
private final AtomicBoolean running = new AtomicBoolean(true);
5052
private volatile boolean isMaster = false;
5153

5254
public SnapshotLifecycleService(Settings settings,
@@ -142,6 +144,10 @@ public void cleanupDeletedPolicies(final ClusterState state) {
142144
* the same version of a policy has already been scheduled it does not overwrite the job.
143145
*/
144146
public void maybeScheduleSnapshot(final SnapshotLifecyclePolicyMetadata snapshotLifecyclePolicy) {
147+
if (this.running.get() == false) {
148+
return;
149+
}
150+
145151
final String jobId = getJobId(snapshotLifecyclePolicy);
146152
final Pattern existingJobPattern = Pattern.compile(snapshotLifecyclePolicy.getPolicy().getId() + JOB_PATTERN_SUFFIX);
147153

@@ -219,6 +225,8 @@ public String executorName() {
219225

220226
@Override
221227
public void close() {
222-
this.scheduler.stop();
228+
if (this.running.compareAndSet(true, false)) {
229+
this.scheduler.stop();
230+
}
223231
}
224232
}

x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/slm/SnapshotLifecycleServiceTests.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,13 @@ public void testNothingScheduledWhenNotRunning() {
129129
// Since the service is stopped, jobs should have been cancelled
130130
assertThat(sls.getScheduler().scheduledJobIds(), equalTo(Collections.emptySet()));
131131

132+
// No jobs should be scheduled when service is closed
133+
state = createState(new SnapshotLifecycleMetadata(policies, OperationMode.RUNNING, new SnapshotLifecycleStats()));
134+
sls.close();
135+
sls.onMaster();
136+
sls.clusterChanged(new ClusterChangedEvent("1", state, emptyState));
137+
assertThat(sls.getScheduler().scheduledJobIds(), equalTo(Collections.emptySet()));
138+
132139
threadPool.shutdownNow();
133140
}
134141
}

0 commit comments

Comments
 (0)