Skip to content

Commit 01dc45a

Browse files
authored
ILM: ignore clusterChanged events if state not recovered (#67507)
IndexLifecycleService will listen for clusterChanged events in order to trigger the async actions when a new master is elected. During the same event processing routine determines when a new master was elected (to avoid triggering the async actions multiple times). However, if it doesn't exclude the events based on not fully recovered cluster states it will get into a situation where it records the "new master elected" but it cannot trigger the async actions because the cluster state does not contain the `IndexLifecycleMetadata` custom.
1 parent 6f02735 commit 01dc45a

File tree

2 files changed

+30
-0
lines changed

2 files changed

+30
-0
lines changed

x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleService.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.elasticsearch.common.settings.Settings;
2424
import org.elasticsearch.common.unit.TimeValue;
2525
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
26+
import org.elasticsearch.gateway.GatewayService;
2627
import org.elasticsearch.index.Index;
2728
import org.elasticsearch.index.shard.IndexEventListener;
2829
import org.elasticsearch.threadpool.ThreadPool;
@@ -222,6 +223,11 @@ private synchronized void maybeScheduleJob() {
222223

223224
@Override
224225
public void clusterChanged(ClusterChangedEvent event) {
226+
// wait for the cluster state to be recovered so the ILM policies are present
227+
if (event.state().blocks().hasGlobalBlock(GatewayService.STATE_NOT_RECOVERED_BLOCK)) {
228+
return;
229+
}
230+
225231
// Instead of using a LocalNodeMasterListener to track master changes, this service will
226232
// track them here to avoid conditions where master listener events run after other
227233
// listeners that depend on what happened in the master listener

x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/IndexLifecycleServiceTests.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import org.elasticsearch.cluster.ClusterChangedEvent;
1414
import org.elasticsearch.cluster.ClusterName;
1515
import org.elasticsearch.cluster.ClusterState;
16+
import org.elasticsearch.cluster.block.ClusterBlocks;
1617
import org.elasticsearch.cluster.metadata.IndexMetadata;
1718
import org.elasticsearch.cluster.metadata.Metadata;
1819
import org.elasticsearch.cluster.node.DiscoveryNode;
@@ -59,6 +60,8 @@
5960
import java.util.concurrent.ExecutorService;
6061
import java.util.concurrent.TimeUnit;
6162

63+
import static java.time.Clock.systemUTC;
64+
import static org.elasticsearch.gateway.GatewayService.STATE_NOT_RECOVERED_BLOCK;
6265
import static org.elasticsearch.xpack.core.ilm.AbstractStepTestCase.randomStepKey;
6366
import static org.elasticsearch.xpack.core.ilm.LifecycleExecutionState.ILM_CUSTOM_METADATA_KEY;
6467
import static org.elasticsearch.xpack.ilm.LifecyclePolicyTestsUtils.newTestLifecyclePolicy;
@@ -449,6 +452,27 @@ public void doTestExceptionStillProcessesOtherIndices(boolean useOnMaster) {
449452
}
450453
}
451454

455+
public void testClusterChangedWaitsForTheStateToBeRecovered() {
456+
IndexLifecycleService ilmService = new IndexLifecycleService(Settings.EMPTY, mock(Client.class), clusterService, threadPool,
457+
systemUTC(), () -> now, null, null) {
458+
459+
@Override
460+
void onMaster(ClusterState clusterState) {
461+
fail("IndexLifecycleService ignored the global [state not recovered / initialized] cluster block");
462+
}
463+
464+
@Override
465+
void triggerPolicies(ClusterState clusterState, boolean fromClusterStateChange) {
466+
fail("IndexLifecycleService ignored the global [state not recovered / initialized] cluster block");
467+
}
468+
};
469+
470+
ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT)
471+
.blocks(ClusterBlocks.builder().addGlobalBlock(STATE_NOT_RECOVERED_BLOCK).build())
472+
.build();
473+
ilmService.clusterChanged(new ClusterChangedEvent("_source", currentState, ClusterState.EMPTY_STATE));
474+
}
475+
452476
public void testTriggeredDifferentJob() {
453477
Mockito.reset(clusterService);
454478
SchedulerEngine.Event schedulerEvent = new SchedulerEngine.Event("foo", randomLong(), randomLong());

0 commit comments

Comments
 (0)