Skip to content

Commit 34ce3f3

Browse files
committed
Add an index->step cache to the PolicyStepsRegistry (#82316)
1 parent d70692f commit 34ce3f3

File tree

3 files changed

+71
-5
lines changed

3 files changed

+71
-5
lines changed

docs/changelog/82316.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 82316
2+
summary: Add an index->step cache to the `PolicyStepsRegistry`
3+
area: ILM+SLM
4+
type: enhancement
5+
issues: []

x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleService.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -299,15 +299,25 @@ public void clusterChanged(ClusterChangedEvent event) {
299299
if (prevIsMaster != event.localNodeMaster()) {
300300
this.isMaster = event.localNodeMaster();
301301
if (this.isMaster) {
302+
// we weren't the master, and now we are
302303
onMaster(event.state());
303304
} else {
305+
// we were the master, and now we aren't
304306
cancelJob();
307+
policyRegistry.clear();
305308
}
306309
}
307310

308-
final IndexLifecycleMetadata lifecycleMetadata = event.state().metadata().custom(IndexLifecycleMetadata.TYPE);
309-
if (this.isMaster && lifecycleMetadata != null) {
310-
triggerPolicies(event.state(), true);
311+
// if we're the master, then process deleted indices and trigger policies
312+
if (this.isMaster) {
313+
for (Index index : event.indicesDeleted()) {
314+
policyRegistry.delete(index);
315+
}
316+
317+
final IndexLifecycleMetadata lifecycleMetadata = event.state().metadata().custom(IndexLifecycleMetadata.TYPE);
318+
if (lifecycleMetadata != null) {
319+
triggerPolicies(event.state(), true);
320+
}
311321
}
312322
}
313323

x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/PolicyStepsRegistry.java

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import org.elasticsearch.common.io.stream.StreamOutput;
1919
import org.elasticsearch.core.Nullable;
2020
import org.elasticsearch.core.TimeValue;
21+
import org.elasticsearch.core.Tuple;
2122
import org.elasticsearch.index.Index;
2223
import org.elasticsearch.license.XPackLicenseState;
2324
import org.elasticsearch.xcontent.DeprecationHandler;
@@ -48,20 +49,27 @@
4849
import java.util.Set;
4950
import java.util.SortedMap;
5051
import java.util.TreeMap;
52+
import java.util.concurrent.ConcurrentHashMap;
5153
import java.util.stream.Collectors;
5254

5355
public class PolicyStepsRegistry {
5456
private static final Logger logger = LogManager.getLogger(PolicyStepsRegistry.class);
5557

58+
private final NamedXContentRegistry xContentRegistry;
5659
private final Client client;
5760
private final XPackLicenseState licenseState;
61+
5862
// keeps track of existing policies in the cluster state
5963
private final SortedMap<String, LifecyclePolicyMetadata> lifecyclePolicyMap;
6064
// keeps track of what the first step in a policy is, the key is policy name
6165
private final Map<String, Step> firstStepMap;
6266
// keeps track of a mapping from policy/step-name to respective Step, the key is policy name
6367
private final Map<String, Map<Step.StepKey, Step>> stepMap;
64-
private final NamedXContentRegistry xContentRegistry;
68+
69+
// tracks an index->step cache, where the indexmetadata is also tracked for cache invalidation/eviction purposes.
70+
// for a given index, the step can be cached as long as the indexmetadata (and the policy!) hasn't changed. since
71+
// policies change infrequently, the entire cache is cleared on policy change.
72+
private final Map<Index, Tuple<IndexMetadata, Step>> cachedSteps = new ConcurrentHashMap<>();
6573

6674
public PolicyStepsRegistry(NamedXContentRegistry xContentRegistry, Client client, XPackLicenseState licenseState) {
6775
this(new TreeMap<>(), new HashMap<>(), new HashMap<>(), xContentRegistry, client, licenseState);
@@ -98,6 +106,9 @@ Map<String, Map<Step.StepKey, Step>> getStepMap() {
98106
public void update(IndexLifecycleMetadata meta) {
99107
assert meta != null : "IndexLifecycleMetadata cannot be null when updating the policy steps registry";
100108

109+
// since the policies (may have) changed, the whole steps cache needs to be thrown out
110+
cachedSteps.clear();
111+
101112
DiffableUtils.MapDiff<String, LifecyclePolicyMetadata, Map<String, LifecyclePolicyMetadata>> mapDiff = DiffableUtils.diff(
102113
lifecyclePolicyMap,
103114
meta.getPolicyMetadatas(),
@@ -154,6 +165,36 @@ public LifecyclePolicyMetadata read(StreamInput in, String key) {
154165
}
155166
}
156167

168+
/**
169+
* Remove the entry for an index from the index->step cache.
170+
*
171+
* We clear the map entirely when the master of the cluster changes, and when any
172+
* policy changes, but in a long-lived cluster that doesn't happen to experience
173+
* either of those events (and where indices are removed regularly) we still want
174+
* the cache to trim deleted indices.
175+
*
176+
* n.b. even with this, there's still a pretty small chance that a given index
177+
* could leak, if we're right in the middle of populating the cache for that
178+
* index (in getStep) when we process the delete here, then we'll end up with an
179+
* entry that doesn't get deleted until the master changes or a policy changes
180+
* -- it's harmless enough
181+
*/
182+
public void delete(Index deleted) {
183+
cachedSteps.remove(deleted);
184+
}
185+
186+
/**
187+
* Clear internal maps that were populated by update (and others).
188+
*/
189+
public void clear() {
190+
// this is potentially large, so it's important to clear it
191+
cachedSteps.clear();
192+
// these are relatively small, but there's no harm in clearing them
193+
lifecyclePolicyMap.clear();
194+
firstStepMap.clear();
195+
stepMap.clear();
196+
}
197+
157198
/**
158199
* Return all ordered steps for the current policy for the index. Does not
159200
* resolve steps using the phase caching, but only for the currently existing policy.
@@ -266,6 +307,14 @@ private List<Step> parseStepsFromPhase(String policy, String currentPhase, Strin
266307

267308
@Nullable
268309
public Step getStep(final IndexMetadata indexMetadata, final Step.StepKey stepKey) {
310+
final Tuple<IndexMetadata, Step> cachedStep = cachedSteps.get(indexMetadata.getIndex());
311+
// n.b. we're using instance equality here for the IndexMetadata rather than object equality because it's fast,
312+
// this means that we're erring on the side of cache misses (if the IndexMetadata changed in any way, it'll be
313+
// a new instance, so we'll miss-and-repopulate the cache for the index in question)
314+
if (cachedStep != null && cachedStep.v1() == indexMetadata && cachedStep.v2().getKey().equals(stepKey)) {
315+
return cachedStep.v2();
316+
}
317+
269318
if (ErrorStep.NAME.equals(stepKey.getName())) {
270319
return new ErrorStep(new Step.StepKey(stepKey.getPhase(), stepKey.getAction(), ErrorStep.NAME));
271320
}
@@ -304,7 +353,9 @@ public Step getStep(final IndexMetadata indexMetadata, final Step.StepKey stepKe
304353
+ phaseSteps;
305354

306355
// Return the step that matches the given stepKey or else null if we couldn't find it
307-
return phaseSteps.stream().filter(step -> step.getKey().equals(stepKey)).findFirst().orElse(null);
356+
final Step s = phaseSteps.stream().filter(step -> step.getKey().equals(stepKey)).findFirst().orElse(null);
357+
cachedSteps.put(indexMetadata.getIndex(), Tuple.tuple(indexMetadata, s));
358+
return s;
308359
}
309360

310361
/**

0 commit comments

Comments
 (0)