Skip to content

Commit d8ff124

Browse files
authored
[7.2] Narrow period of Shrink action in which ILM prevents stopping (#43254) (#43394)
* Narrow period of Shrink action in which ILM prevents stopping Prior to this change, we would prevent stopping of ILM if the index was anywhere in the shrink action. This commit changes `IndexLifecycleService` to allow stopping when in any of the innocuous steps during shrink. This changes ILM only to prevent stopping if absolutely necessary. Resolves #43253 * Rename variable for ignore actions -> ignore steps * Fix comment * Factor test out to test *all* stoppable steps
1 parent 4818b3d commit d8ff124

File tree

2 files changed

+75
-20
lines changed

2 files changed

+75
-20
lines changed

x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/indexlifecycle/IndexLifecycleService.java

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
import org.elasticsearch.xpack.core.indexlifecycle.LifecyclePolicy;
3030
import org.elasticsearch.xpack.core.indexlifecycle.LifecycleSettings;
3131
import org.elasticsearch.xpack.core.indexlifecycle.OperationMode;
32-
import org.elasticsearch.xpack.core.indexlifecycle.ShrinkAction;
32+
import org.elasticsearch.xpack.core.indexlifecycle.ShrinkStep;
3333
import org.elasticsearch.xpack.core.indexlifecycle.Step.StepKey;
3434
import org.elasticsearch.xpack.core.scheduler.SchedulerEngine;
3535

@@ -45,7 +45,7 @@
4545
public class IndexLifecycleService
4646
implements ClusterStateListener, ClusterStateApplier, SchedulerEngine.Listener, Closeable, LocalNodeMasterListener {
4747
private static final Logger logger = LogManager.getLogger(IndexLifecycleService.class);
48-
private static final Set<String> IGNORE_ACTIONS_MAINTENANCE_REQUESTED = Collections.singleton(ShrinkAction.NAME);
48+
private static final Set<String> IGNORE_STEPS_MAINTENANCE_REQUESTED = Collections.singleton(ShrinkStep.NAME);
4949
private volatile boolean isMaster = false;
5050
private volatile TimeValue pollInterval;
5151

@@ -115,15 +115,15 @@ public void onMaster() {
115115
StepKey stepKey = IndexLifecycleRunner.getCurrentStepKey(lifecycleState);
116116

117117
if (OperationMode.STOPPING == currentMode) {
118-
if (stepKey != null && IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getAction())) {
119-
logger.info("waiting to stop ILM because index [{}] with policy [{}] is currently in action [{}]",
120-
idxMeta.getIndex().getName(), policyName, stepKey.getAction());
118+
if (stepKey != null && IGNORE_STEPS_MAINTENANCE_REQUESTED.contains(stepKey.getName())) {
119+
logger.info("waiting to stop ILM because index [{}] with policy [{}] is currently in step [{}]",
120+
idxMeta.getIndex().getName(), policyName, stepKey.getName());
121121
lifecycleRunner.maybeRunAsyncAction(clusterState, idxMeta, policyName, stepKey);
122-
// ILM is trying to stop, but this index is in a Shrink action (or other dangerous action) so we can't stop
122+
// ILM is trying to stop, but this index is in a Shrink step (or other dangerous step) so we can't stop
123123
safeToStop = false;
124124
} else {
125-
logger.info("skipping policy execution for index [{}] with policy [{}] because ILM is stopping",
126-
idxMeta.getIndex().getName(), policyName);
125+
logger.info("skipping policy execution of step [{}] for index [{}] with policy [{}] because ILM is stopping",
126+
stepKey == null ? "n/a" : stepKey.getName(), idxMeta.getIndex().getName(), policyName);
127127
}
128128
} else {
129129
lifecycleRunner.maybeRunAsyncAction(clusterState, idxMeta, policyName, stepKey);
@@ -249,19 +249,19 @@ void triggerPolicies(ClusterState clusterState, boolean fromClusterStateChange)
249249
StepKey stepKey = IndexLifecycleRunner.getCurrentStepKey(lifecycleState);
250250

251251
if (OperationMode.STOPPING == currentMode) {
252-
if (stepKey != null && IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getAction())) {
253-
logger.info("waiting to stop ILM because index [{}] with policy [{}] is currently in action [{}]",
254-
idxMeta.getIndex().getName(), policyName, stepKey.getAction());
252+
if (stepKey != null && IGNORE_STEPS_MAINTENANCE_REQUESTED.contains(stepKey.getName())) {
253+
logger.info("waiting to stop ILM because index [{}] with policy [{}] is currently in step [{}]",
254+
idxMeta.getIndex().getName(), policyName, stepKey.getName());
255255
if (fromClusterStateChange) {
256256
lifecycleRunner.runPolicyAfterStateChange(policyName, idxMeta);
257257
} else {
258258
lifecycleRunner.runPeriodicStep(policyName, idxMeta);
259259
}
260-
// ILM is trying to stop, but this index is in a Shrink action (or other dangerous action) so we can't stop
260+
// ILM is trying to stop, but this index is in a Shrink step (or other dangerous step) so we can't stop
261261
safeToStop = false;
262262
} else {
263-
logger.info("skipping policy execution for index [{}] with policy [{}] because ILM is stopping",
264-
idxMeta.getIndex().getName(), policyName);
263+
logger.info("skipping policy execution of step [{}] for index [{}] with policy [{}] because ILM is stopping",
264+
stepKey == null ? "n/a" : stepKey.getName(), idxMeta.getIndex().getName(), policyName);
265265
}
266266
} else {
267267
if (fromClusterStateChange) {

x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/indexlifecycle/IndexLifecycleServiceTests.java

Lines changed: 61 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.elasticsearch.xpack.core.indexlifecycle.OperationMode;
3838
import org.elasticsearch.xpack.core.indexlifecycle.Phase;
3939
import org.elasticsearch.xpack.core.indexlifecycle.ShrinkAction;
40+
import org.elasticsearch.xpack.core.indexlifecycle.ShrinkStep;
4041
import org.elasticsearch.xpack.core.indexlifecycle.Step;
4142
import org.elasticsearch.xpack.core.scheduler.SchedulerEngine;
4243
import org.junit.After;
@@ -58,6 +59,7 @@
5859
import static org.hamcrest.Matchers.equalTo;
5960
import static org.mockito.Matchers.any;
6061
import static org.mockito.Matchers.anyString;
62+
import static org.mockito.Matchers.eq;
6163
import static org.mockito.Mockito.doAnswer;
6264
import static org.mockito.Mockito.mock;
6365
import static org.mockito.Mockito.when;
@@ -148,7 +150,7 @@ public void testStoppedModeSkip() {
148150
}
149151

150152
public void testRequestedStopOnShrink() {
151-
Step.StepKey mockShrinkStep = new Step.StepKey(randomAlphaOfLength(4), ShrinkAction.NAME, randomAlphaOfLength(5));
153+
Step.StepKey mockShrinkStep = new Step.StepKey(randomAlphaOfLength(4), ShrinkAction.NAME, ShrinkStep.NAME);
152154
String policyName = randomAlphaOfLengthBetween(1, 20);
153155
IndexLifecycleRunnerTests.MockClusterStateActionStep mockStep =
154156
new IndexLifecycleRunnerTests.MockClusterStateActionStep(mockShrinkStep, randomStepKey());
@@ -180,14 +182,67 @@ public void testRequestedStopOnShrink() {
180182
.build();
181183

182184
ClusterChangedEvent event = new ClusterChangedEvent("_source", currentState, ClusterState.EMPTY_STATE);
183-
SetOnce<Boolean> executedShrink = new SetOnce<>();
185+
SetOnce<Boolean> changedOperationMode = new SetOnce<>();
184186
doAnswer(invocationOnMock -> {
185-
executedShrink.set(true);
187+
changedOperationMode.set(true);
186188
return null;
187-
}).when(clusterService).submitStateUpdateTask(anyString(), any(ExecuteStepsUpdateTask.class));
189+
}).when(clusterService).submitStateUpdateTask(eq("ilm_operation_mode_update"), any(OperationModeUpdateTask.class));
190+
indexLifecycleService.applyClusterState(event);
191+
indexLifecycleService.triggerPolicies(currentState, true);
192+
assertNull(changedOperationMode.get());
193+
}
194+
195+
public void testRequestedStopInShrinkActionButNotShrinkStep() {
196+
// test all the shrink action steps that ILM can be stopped during (basically all of them minus the actual shrink)
197+
ShrinkAction action = new ShrinkAction(1);
198+
action.toSteps(mock(Client.class), "warm", randomStepKey()).stream()
199+
.map(sk -> sk.getKey().getName())
200+
.filter(name -> name.equals(ShrinkStep.NAME) == false)
201+
.forEach(this::verifyCanStopWithStep);
202+
}
203+
204+
// Check that ILM can stop when in the shrink action on the provided step
205+
private void verifyCanStopWithStep(String stoppableStep) {
206+
Step.StepKey mockShrinkStep = new Step.StepKey(randomAlphaOfLength(4), ShrinkAction.NAME, stoppableStep);
207+
String policyName = randomAlphaOfLengthBetween(1, 20);
208+
IndexLifecycleRunnerTests.MockClusterStateActionStep mockStep =
209+
new IndexLifecycleRunnerTests.MockClusterStateActionStep(mockShrinkStep, randomStepKey());
210+
MockAction mockAction = new MockAction(Collections.singletonList(mockStep));
211+
Phase phase = new Phase("phase", TimeValue.ZERO, Collections.singletonMap("action", mockAction));
212+
LifecyclePolicy policy = newTestLifecyclePolicy(policyName, Collections.singletonMap(phase.getName(), phase));
213+
SortedMap<String, LifecyclePolicyMetadata> policyMap = new TreeMap<>();
214+
policyMap.put(policyName, new LifecyclePolicyMetadata(policy, Collections.emptyMap(),
215+
randomNonNegativeLong(), randomNonNegativeLong()));
216+
Index index = new Index(randomAlphaOfLengthBetween(1, 20), randomAlphaOfLengthBetween(1, 20));
217+
LifecycleExecutionState.Builder lifecycleState = LifecycleExecutionState.builder();
218+
lifecycleState.setPhase(mockShrinkStep.getPhase());
219+
lifecycleState.setAction(mockShrinkStep.getAction());
220+
lifecycleState.setStep(mockShrinkStep.getName());
221+
IndexMetaData indexMetadata = IndexMetaData.builder(index.getName())
222+
.settings(settings(Version.CURRENT).put(LifecycleSettings.LIFECYCLE_NAME_SETTING.getKey(), policyName))
223+
.putCustom(ILM_CUSTOM_METADATA_KEY, lifecycleState.build().asMap())
224+
.numberOfShards(randomIntBetween(1, 5)).numberOfReplicas(randomIntBetween(0, 5)).build();
225+
ImmutableOpenMap.Builder<String, IndexMetaData> indices = ImmutableOpenMap.<String, IndexMetaData> builder()
226+
.fPut(index.getName(), indexMetadata);
227+
MetaData metaData = MetaData.builder()
228+
.putCustom(IndexLifecycleMetadata.TYPE, new IndexLifecycleMetadata(policyMap, OperationMode.STOPPING))
229+
.indices(indices.build())
230+
.persistentSettings(settings(Version.CURRENT).build())
231+
.build();
232+
ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT)
233+
.metaData(metaData)
234+
.nodes(DiscoveryNodes.builder().localNodeId(nodeId).masterNodeId(nodeId).add(masterNode).build())
235+
.build();
236+
237+
ClusterChangedEvent event = new ClusterChangedEvent("_source", currentState, ClusterState.EMPTY_STATE);
238+
SetOnce<Boolean> changedOperationMode = new SetOnce<>();
239+
doAnswer(invocationOnMock -> {
240+
changedOperationMode.set(true);
241+
return null;
242+
}).when(clusterService).submitStateUpdateTask(eq("ilm_operation_mode_update"), any(OperationModeUpdateTask.class));
188243
indexLifecycleService.applyClusterState(event);
189244
indexLifecycleService.triggerPolicies(currentState, true);
190-
assertTrue(executedShrink.get());
245+
assertTrue(changedOperationMode.get());
191246
}
192247

193248
public void testRequestedStopOnSafeAction() {
@@ -236,7 +291,7 @@ public void testRequestedStopOnSafeAction() {
236291
assertThat(task.getOperationMode(), equalTo(OperationMode.STOPPED));
237292
moveToMaintenance.set(true);
238293
return null;
239-
}).when(clusterService).submitStateUpdateTask(anyString(), any(OperationModeUpdateTask.class));
294+
}).when(clusterService).submitStateUpdateTask(eq("ilm_operation_mode_update"), any(OperationModeUpdateTask.class));
240295

241296
indexLifecycleService.applyClusterState(event);
242297
indexLifecycleService.triggerPolicies(currentState, randomBoolean());

0 commit comments

Comments
 (0)