Skip to content

Commit c330790

Browse files
committed
Add an index->step cache to the PolicyStepsRegistry (#82316)
1 parent 9d0be14 commit c330790

File tree

3 files changed

+71
-5
lines changed

3 files changed

+71
-5
lines changed

docs/changelog/82316.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 82316
2+
summary: Add an index->step cache to the `PolicyStepsRegistry`
3+
area: ILM+SLM
4+
type: enhancement
5+
issues: []

x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleService.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -299,15 +299,25 @@ public void clusterChanged(ClusterChangedEvent event) {
299299
if (prevIsMaster != event.localNodeMaster()) {
300300
this.isMaster = event.localNodeMaster();
301301
if (this.isMaster) {
302+
// we weren't the master, and now we are
302303
onMaster(event.state());
303304
} else {
305+
// we were the master, and now we aren't
304306
cancelJob();
307+
policyRegistry.clear();
305308
}
306309
}
307310

308-
final IndexLifecycleMetadata lifecycleMetadata = event.state().metadata().custom(IndexLifecycleMetadata.TYPE);
309-
if (this.isMaster && lifecycleMetadata != null) {
310-
triggerPolicies(event.state(), true);
311+
// if we're the master, then process deleted indices and trigger policies
312+
if (this.isMaster) {
313+
for (Index index : event.indicesDeleted()) {
314+
policyRegistry.delete(index);
315+
}
316+
317+
final IndexLifecycleMetadata lifecycleMetadata = event.state().metadata().custom(IndexLifecycleMetadata.TYPE);
318+
if (lifecycleMetadata != null) {
319+
triggerPolicies(event.state(), true);
320+
}
311321
}
312322
}
313323

x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/PolicyStepsRegistry.java

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import org.elasticsearch.common.io.stream.StreamOutput;
1919
import org.elasticsearch.core.Nullable;
2020
import org.elasticsearch.core.TimeValue;
21+
import org.elasticsearch.core.Tuple;
2122
import org.elasticsearch.index.Index;
2223
import org.elasticsearch.license.XPackLicenseState;
2324
import org.elasticsearch.xcontent.DeprecationHandler;
@@ -49,20 +50,27 @@
4950
import java.util.Set;
5051
import java.util.SortedMap;
5152
import java.util.TreeMap;
53+
import java.util.concurrent.ConcurrentHashMap;
5254
import java.util.stream.Collectors;
5355

5456
public class PolicyStepsRegistry {
5557
private static final Logger logger = LogManager.getLogger(PolicyStepsRegistry.class);
5658

59+
private final NamedXContentRegistry xContentRegistry;
5760
private final Client client;
5861
private final XPackLicenseState licenseState;
62+
5963
// keeps track of existing policies in the cluster state
6064
private final SortedMap<String, LifecyclePolicyMetadata> lifecyclePolicyMap;
6165
// keeps track of what the first step in a policy is, the key is policy name
6266
private final Map<String, Step> firstStepMap;
6367
// keeps track of a mapping from policy/step-name to respective Step, the key is policy name
6468
private final Map<String, Map<Step.StepKey, Step>> stepMap;
65-
private final NamedXContentRegistry xContentRegistry;
69+
70+
// tracks an index->step cache, where the indexmetadata is also tracked for cache invalidation/eviction purposes.
71+
// for a given index, the step can be cached as long as the indexmetadata (and the policy!) hasn't changed. since
72+
// policies change infrequently, the entire cache is cleared on policy change.
73+
private final Map<Index, Tuple<IndexMetadata, Step>> cachedSteps = new ConcurrentHashMap<>();
6674

6775
public PolicyStepsRegistry(NamedXContentRegistry xContentRegistry, Client client, XPackLicenseState licenseState) {
6876
this(new TreeMap<>(), new HashMap<>(), new HashMap<>(), xContentRegistry, client, licenseState);
@@ -99,6 +107,9 @@ Map<String, Map<Step.StepKey, Step>> getStepMap() {
99107
public void update(IndexLifecycleMetadata meta) {
100108
assert meta != null : "IndexLifecycleMetadata cannot be null when updating the policy steps registry";
101109

110+
// since the policies (may have) changed, the whole steps cache needs to be thrown out
111+
cachedSteps.clear();
112+
102113
DiffableUtils.MapDiff<String, LifecyclePolicyMetadata, Map<String, LifecyclePolicyMetadata>> mapDiff = DiffableUtils.diff(
103114
lifecyclePolicyMap,
104115
meta.getPolicyMetadatas(),
@@ -155,6 +166,36 @@ public LifecyclePolicyMetadata read(StreamInput in, String key) {
155166
}
156167
}
157168

169+
/**
170+
* Remove the entry for an index from the index->step cache.
171+
*
172+
* We clear the map entirely when the master of the cluster changes, and when any
173+
* policy changes, but in a long-lived cluster that doesn't happen to experience
174+
* either of those events (and where indices are removed regularly) we still want
175+
* the cache to trim deleted indices.
176+
*
177+
* n.b. even with this, there's still a pretty small chance that a given index
178+
* could leak, if we're right in the middle of populating the cache for that
179+
* index (in getStep) when we process the delete here, then we'll end up with an
180+
* entry that doesn't get deleted until the master changes or a policy changes
181+
* -- it's harmless enough
182+
*/
183+
public void delete(Index deleted) {
184+
cachedSteps.remove(deleted);
185+
}
186+
187+
/**
188+
* Clear internal maps that were populated by update (and others).
189+
*/
190+
public void clear() {
191+
// this is potentially large, so it's important to clear it
192+
cachedSteps.clear();
193+
// these are relatively small, but there's no harm in clearing them
194+
lifecyclePolicyMap.clear();
195+
firstStepMap.clear();
196+
stepMap.clear();
197+
}
198+
158199
/**
159200
* Return all ordered steps for the current policy for the index. Does not
160201
* resolve steps using the phase caching, but only for the currently existing policy.
@@ -267,6 +308,14 @@ private List<Step> parseStepsFromPhase(String policy, String currentPhase, Strin
267308

268309
@Nullable
269310
public Step getStep(final IndexMetadata indexMetadata, final Step.StepKey stepKey) {
311+
final Tuple<IndexMetadata, Step> cachedStep = cachedSteps.get(indexMetadata.getIndex());
312+
// n.b. we're using instance equality here for the IndexMetadata rather than object equality because it's fast,
313+
// this means that we're erring on the side of cache misses (if the IndexMetadata changed in any way, it'll be
314+
// a new instance, so we'll miss-and-repopulate the cache for the index in question)
315+
if (cachedStep != null && cachedStep.v1() == indexMetadata && cachedStep.v2().getKey().equals(stepKey)) {
316+
return cachedStep.v2();
317+
}
318+
270319
if (ErrorStep.NAME.equals(stepKey.getName())) {
271320
return new ErrorStep(new Step.StepKey(stepKey.getPhase(), stepKey.getAction(), ErrorStep.NAME));
272321
}
@@ -305,7 +354,9 @@ public Step getStep(final IndexMetadata indexMetadata, final Step.StepKey stepKe
305354
+ phaseSteps;
306355

307356
// Return the step that matches the given stepKey or else null if we couldn't find it
308-
return phaseSteps.stream().filter(step -> step.getKey().equals(stepKey)).findFirst().orElse(null);
357+
final Step s = phaseSteps.stream().filter(step -> step.getKey().equals(stepKey)).findFirst().orElse(null);
358+
cachedSteps.put(indexMetadata.getIndex(), Tuple.tuple(indexMetadata, s));
359+
return s;
309360
}
310361

311362
/**

0 commit comments

Comments
 (0)