Skip to content

Commit 27890db

Browse files
committed
Narrow period of Shrink action in which ILM prevents stopping
Prior to this change, we would prevent stopping of ILM if the index was anywhere in the shrink action. This commit changes `IndexLifecycleService` to allow stopping when in any of the innocuous steps during shrink. This changes ILM only to prevent stopping if absolutely necessary. Resolves elastic#43253
1 parent 7870ae2 commit 27890db

File tree

2 files changed

+72
-18
lines changed

2 files changed

+72
-18
lines changed

x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/indexlifecycle/IndexLifecycleService.java

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
import org.elasticsearch.xpack.core.indexlifecycle.LifecyclePolicy;
3030
import org.elasticsearch.xpack.core.indexlifecycle.LifecycleSettings;
3131
import org.elasticsearch.xpack.core.indexlifecycle.OperationMode;
32-
import org.elasticsearch.xpack.core.indexlifecycle.ShrinkAction;
32+
import org.elasticsearch.xpack.core.indexlifecycle.ShrinkStep;
3333
import org.elasticsearch.xpack.core.indexlifecycle.Step.StepKey;
3434
import org.elasticsearch.xpack.core.scheduler.SchedulerEngine;
3535

@@ -45,7 +45,7 @@
4545
public class IndexLifecycleService
4646
implements ClusterStateListener, ClusterStateApplier, SchedulerEngine.Listener, Closeable, LocalNodeMasterListener {
4747
private static final Logger logger = LogManager.getLogger(IndexLifecycleService.class);
48-
private static final Set<String> IGNORE_ACTIONS_MAINTENANCE_REQUESTED = Collections.singleton(ShrinkAction.NAME);
48+
private static final Set<String> IGNORE_ACTIONS_MAINTENANCE_REQUESTED = Collections.singleton(ShrinkStep.NAME);
4949
private volatile boolean isMaster = false;
5050
private volatile TimeValue pollInterval;
5151

@@ -115,15 +115,15 @@ public void onMaster() {
115115
StepKey stepKey = IndexLifecycleRunner.getCurrentStepKey(lifecycleState);
116116

117117
if (OperationMode.STOPPING == currentMode) {
118-
if (stepKey != null && IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getAction())) {
119-
logger.info("waiting to stop ILM because index [{}] with policy [{}] is currently in action [{}]",
120-
idxMeta.getIndex().getName(), policyName, stepKey.getAction());
118+
if (stepKey != null && IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getName())) {
119+
logger.info("waiting to stop ILM because index [{}] with policy [{}] is currently in step [{}]",
120+
idxMeta.getIndex().getName(), policyName, stepKey.getName());
121121
lifecycleRunner.maybeRunAsyncAction(clusterState, idxMeta, policyName, stepKey);
122122
// ILM is trying to stop, but this index is in a Shrink action (or other dangerous action) so we can't stop
123123
safeToStop = false;
124124
} else {
125-
logger.info("skipping policy execution for index [{}] with policy [{}] because ILM is stopping",
126-
idxMeta.getIndex().getName(), policyName);
125+
logger.info("skipping policy execution of step [{}] for index [{}] with policy [{}] because ILM is stopping",
126+
stepKey == null ? "n/a" : stepKey.getName(), idxMeta.getIndex().getName(), policyName);
127127
}
128128
} else {
129129
lifecycleRunner.maybeRunAsyncAction(clusterState, idxMeta, policyName, stepKey);
@@ -249,9 +249,9 @@ void triggerPolicies(ClusterState clusterState, boolean fromClusterStateChange)
249249
StepKey stepKey = IndexLifecycleRunner.getCurrentStepKey(lifecycleState);
250250

251251
if (OperationMode.STOPPING == currentMode) {
252-
if (stepKey != null && IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getAction())) {
253-
logger.info("waiting to stop ILM because index [{}] with policy [{}] is currently in action [{}]",
254-
idxMeta.getIndex().getName(), policyName, stepKey.getAction());
252+
if (stepKey != null && IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getName())) {
253+
logger.info("waiting to stop ILM because index [{}] with policy [{}] is currently in step [{}]",
254+
idxMeta.getIndex().getName(), policyName, stepKey.getName());
255255
if (fromClusterStateChange) {
256256
lifecycleRunner.runPolicyAfterStateChange(policyName, idxMeta);
257257
} else {
@@ -260,8 +260,8 @@ void triggerPolicies(ClusterState clusterState, boolean fromClusterStateChange)
260260
// ILM is trying to stop, but this index is in a Shrink action (or other dangerous action) so we can't stop
261261
safeToStop = false;
262262
} else {
263-
logger.info("skipping policy execution for index [{}] with policy [{}] because ILM is stopping",
264-
idxMeta.getIndex().getName(), policyName);
263+
logger.info("skipping policy execution of step [{}] for index [{}] with policy [{}] because ILM is stopping",
264+
stepKey == null ? "n/a" : stepKey.getName(), idxMeta.getIndex().getName(), policyName);
265265
}
266266
} else {
267267
if (fromClusterStateChange) {

x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/indexlifecycle/IndexLifecycleServiceTests.java

Lines changed: 60 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.elasticsearch.xpack.core.indexlifecycle.OperationMode;
3838
import org.elasticsearch.xpack.core.indexlifecycle.Phase;
3939
import org.elasticsearch.xpack.core.indexlifecycle.ShrinkAction;
40+
import org.elasticsearch.xpack.core.indexlifecycle.ShrinkStep;
4041
import org.elasticsearch.xpack.core.indexlifecycle.Step;
4142
import org.elasticsearch.xpack.core.scheduler.SchedulerEngine;
4243
import org.junit.After;
@@ -47,9 +48,11 @@
4748
import java.time.Instant;
4849
import java.time.ZoneId;
4950
import java.util.Collections;
51+
import java.util.List;
5052
import java.util.SortedMap;
5153
import java.util.TreeMap;
5254
import java.util.concurrent.ExecutorService;
55+
import java.util.stream.Collectors;
5356

5457
import static org.elasticsearch.node.Node.NODE_MASTER_SETTING;
5558
import static org.elasticsearch.xpack.core.indexlifecycle.AbstractStepTestCase.randomStepKey;
@@ -58,6 +61,7 @@
5861
import static org.hamcrest.Matchers.equalTo;
5962
import static org.mockito.Matchers.any;
6063
import static org.mockito.Matchers.anyString;
64+
import static org.mockito.Matchers.eq;
6165
import static org.mockito.Mockito.doAnswer;
6266
import static org.mockito.Mockito.mock;
6367
import static org.mockito.Mockito.when;
@@ -148,7 +152,7 @@ public void testStoppedModeSkip() {
148152
}
149153

150154
public void testRequestedStopOnShrink() {
151-
Step.StepKey mockShrinkStep = new Step.StepKey(randomAlphaOfLength(4), ShrinkAction.NAME, randomAlphaOfLength(5));
155+
Step.StepKey mockShrinkStep = new Step.StepKey(randomAlphaOfLength(4), ShrinkAction.NAME, ShrinkStep.NAME);
152156
String policyName = randomAlphaOfLengthBetween(1, 20);
153157
IndexLifecycleRunnerTests.MockClusterStateActionStep mockStep =
154158
new IndexLifecycleRunnerTests.MockClusterStateActionStep(mockShrinkStep, randomStepKey());
@@ -180,14 +184,64 @@ public void testRequestedStopOnShrink() {
180184
.build();
181185

182186
ClusterChangedEvent event = new ClusterChangedEvent("_source", currentState, ClusterState.EMPTY_STATE);
183-
SetOnce<Boolean> executedShrink = new SetOnce<>();
187+
SetOnce<Boolean> changedOperationMode = new SetOnce<>();
184188
doAnswer(invocationOnMock -> {
185-
executedShrink.set(true);
189+
changedOperationMode.set(true);
186190
return null;
187-
}).when(clusterService).submitStateUpdateTask(anyString(), any(ExecuteStepsUpdateTask.class));
191+
}).when(clusterService).submitStateUpdateTask(eq("ilm_operation_mode_update"), any(OperationModeUpdateTask.class));
192+
indexLifecycleService.applyClusterState(event);
193+
indexLifecycleService.triggerPolicies(currentState, true);
194+
assertNull(changedOperationMode.get());
195+
}
196+
197+
public void testRequestedStopInShrinkActionButNotShrinkStep() {
198+
// Create a list of all the shrink action steps that can be stopped during (basically all of them minus the actual shrink)
199+
ShrinkAction action = new ShrinkAction(1);
200+
List<String> stoppableSteps = action.toSteps(mock(Client.class), "warm", randomStepKey()).stream()
201+
.map(sk -> sk.getKey().getName())
202+
.filter(name -> name.equals(ShrinkStep.NAME) == false)
203+
.collect(Collectors.toList());
204+
205+
Step.StepKey mockShrinkStep = new Step.StepKey(randomAlphaOfLength(4), ShrinkAction.NAME, randomFrom(stoppableSteps));
206+
String policyName = randomAlphaOfLengthBetween(1, 20);
207+
IndexLifecycleRunnerTests.MockClusterStateActionStep mockStep =
208+
new IndexLifecycleRunnerTests.MockClusterStateActionStep(mockShrinkStep, randomStepKey());
209+
MockAction mockAction = new MockAction(Collections.singletonList(mockStep));
210+
Phase phase = new Phase("phase", TimeValue.ZERO, Collections.singletonMap("action", mockAction));
211+
LifecyclePolicy policy = newTestLifecyclePolicy(policyName, Collections.singletonMap(phase.getName(), phase));
212+
SortedMap<String, LifecyclePolicyMetadata> policyMap = new TreeMap<>();
213+
policyMap.put(policyName, new LifecyclePolicyMetadata(policy, Collections.emptyMap(),
214+
randomNonNegativeLong(), randomNonNegativeLong()));
215+
Index index = new Index(randomAlphaOfLengthBetween(1, 20), randomAlphaOfLengthBetween(1, 20));
216+
LifecycleExecutionState.Builder lifecycleState = LifecycleExecutionState.builder();
217+
lifecycleState.setPhase(mockShrinkStep.getPhase());
218+
lifecycleState.setAction(mockShrinkStep.getAction());
219+
lifecycleState.setStep(mockShrinkStep.getName());
220+
IndexMetaData indexMetadata = IndexMetaData.builder(index.getName())
221+
.settings(settings(Version.CURRENT).put(LifecycleSettings.LIFECYCLE_NAME_SETTING.getKey(), policyName))
222+
.putCustom(ILM_CUSTOM_METADATA_KEY, lifecycleState.build().asMap())
223+
.numberOfShards(randomIntBetween(1, 5)).numberOfReplicas(randomIntBetween(0, 5)).build();
224+
ImmutableOpenMap.Builder<String, IndexMetaData> indices = ImmutableOpenMap.<String, IndexMetaData> builder()
225+
.fPut(index.getName(), indexMetadata);
226+
MetaData metaData = MetaData.builder()
227+
.putCustom(IndexLifecycleMetadata.TYPE, new IndexLifecycleMetadata(policyMap, OperationMode.STOPPING))
228+
.indices(indices.build())
229+
.persistentSettings(settings(Version.CURRENT).build())
230+
.build();
231+
ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT)
232+
.metaData(metaData)
233+
.nodes(DiscoveryNodes.builder().localNodeId(nodeId).masterNodeId(nodeId).add(masterNode).build())
234+
.build();
235+
236+
ClusterChangedEvent event = new ClusterChangedEvent("_source", currentState, ClusterState.EMPTY_STATE);
237+
SetOnce<Boolean> changedOperationMode = new SetOnce<>();
238+
doAnswer(invocationOnMock -> {
239+
changedOperationMode.set(true);
240+
return null;
241+
}).when(clusterService).submitStateUpdateTask(eq("ilm_operation_mode_update"), any(OperationModeUpdateTask.class));
188242
indexLifecycleService.applyClusterState(event);
189243
indexLifecycleService.triggerPolicies(currentState, true);
190-
assertTrue(executedShrink.get());
244+
assertTrue(changedOperationMode.get());
191245
}
192246

193247
public void testRequestedStopOnSafeAction() {
@@ -236,7 +290,7 @@ public void testRequestedStopOnSafeAction() {
236290
assertThat(task.getOperationMode(), equalTo(OperationMode.STOPPED));
237291
moveToMaintenance.set(true);
238292
return null;
239-
}).when(clusterService).submitStateUpdateTask(anyString(), any(OperationModeUpdateTask.class));
293+
}).when(clusterService).submitStateUpdateTask(eq("ilm_operation_mode_update"), any(OperationModeUpdateTask.class));
240294

241295
indexLifecycleService.applyClusterState(event);
242296
indexLifecycleService.triggerPolicies(currentState, randomBoolean());

0 commit comments

Comments
 (0)