Skip to content

Commit b1b249d

Browse files
#101193 Preserve Step Info Across ILM Auto Retries (#113187)
* Add new Previous Step Info field to LifecycleExecutionState * Add new field to IndexLifecycleExplainResponse * Add new field to TransportExplainLifecycleAction * Add logic to IndexLifecycleTransition to keep previous setp info * Switch tests to use Java standard Clock class for any time based testing, this is the recommended method * Fix tests for new field Also refactor tests to newer style * Add test to ensure step info is preserved Across auto retries * Add docs for new field * Changelog Entry * Update docs/changelog/113187.yaml * Revert "Switch tests to use Java standard Clock class" This reverts commit 241074c. * PR Changes * PR Changes - Improve docs wording Co-authored-by: Mary Gouseti <[email protected]> * Integration test for new ILM explain field * Use ROOT locale instead of default toLowerCase * PR Changes - Switch to block strings * Remove forbidden API usage --------- Co-authored-by: Mary Gouseti <[email protected]>
1 parent 07846d4 commit b1b249d

File tree

11 files changed

+210
-113
lines changed

11 files changed

+210
-113
lines changed

docs/changelog/113187.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 113187
2+
summary: Preserve Step Info Across ILM Auto Retries
3+
area: ILM+SLM
4+
type: enhancement
5+
issues: []

docs/reference/ilm/apis/explain.asciidoc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,12 @@ the case.
303303
"index_uuid": "H7lF9n36Rzqa-KfKcnGQMg",
304304
"index": "test-000057"
305305
},
306+
"previous_step_info": { <5>
307+
"type": "cluster_block_exception",
308+
"reason": "index [test-000057/H7lF9n36Rzqa-KfKcnGQMg] blocked by: [FORBIDDEN/5/index read-only (api)",
309+
"index_uuid": "H7lF9n36Rzqa-KfKcnGQMg",
310+
"index": "test-000057"
311+
},
306312
"phase_execution": {
307313
"policy": "my_lifecycle3",
308314
"phase_definition": {
@@ -329,3 +335,4 @@ is true, {ilm-init} will retry the failed step automatically.
329335
<3> Shows the number of attempted automatic retries to execute the failed
330336
step.
331337
<4> What went wrong
338+
<5> Contains a copy of the `step_info` field (when it exists) of the last attempted or executed step for diagnostic purposes, since the `step_info` is overwritten during each new attempt.

server/src/main/java/org/elasticsearch/TransportVersions.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ static TransportVersion def(int id) {
226226
public static final TransportVersion SEMANTIC_TEXT_SEARCH_INFERENCE_ID = def(8_750_00_0);
227227
public static final TransportVersion ML_INFERENCE_CHUNKING_SETTINGS = def(8_751_00_0);
228228
public static final TransportVersion SEMANTIC_QUERY_INNER_HITS = def(8_752_00_0);
229+
public static final TransportVersion RETAIN_ILM_STEP_INFO = def(8_753_00_0);
229230

230231
/*
231232
* STOP! READ THIS FIRST! No, really,

server/src/main/java/org/elasticsearch/cluster/metadata/LifecycleExecutionState.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ public record LifecycleExecutionState(
2828
Boolean isAutoRetryableError,
2929
Integer failedStepRetryCount,
3030
String stepInfo,
31+
String previousStepInfo,
3132
String phaseDefinition,
3233
Long lifecycleDate,
3334
Long phaseTime,
@@ -53,6 +54,7 @@ public record LifecycleExecutionState(
5354
private static final String IS_AUTO_RETRYABLE_ERROR = "is_auto_retryable_error";
5455
private static final String FAILED_STEP_RETRY_COUNT = "failed_step_retry_count";
5556
private static final String STEP_INFO = "step_info";
57+
private static final String PREVIOUS_STEP_INFO = "previous_step_info";
5658
private static final String PHASE_DEFINITION = "phase_definition";
5759
private static final String SNAPSHOT_NAME = "snapshot_name";
5860
private static final String SNAPSHOT_REPOSITORY = "snapshot_repository";
@@ -74,6 +76,7 @@ public static Builder builder(LifecycleExecutionState state) {
7476
.setIsAutoRetryableError(state.isAutoRetryableError)
7577
.setFailedStepRetryCount(state.failedStepRetryCount)
7678
.setStepInfo(state.stepInfo)
79+
.setPreviousStepInfo(state.previousStepInfo)
7780
.setPhaseDefinition(state.phaseDefinition)
7881
.setIndexCreationDate(state.lifecycleDate)
7982
.setPhaseTime(state.phaseTime)
@@ -116,6 +119,10 @@ public static LifecycleExecutionState fromCustomMetadata(Map<String, String> cus
116119
if (stepInfo != null) {
117120
builder.setStepInfo(stepInfo);
118121
}
122+
String previousStepInfo = customData.get(PREVIOUS_STEP_INFO);
123+
if (previousStepInfo != null) {
124+
builder.setPreviousStepInfo(previousStepInfo);
125+
}
119126
String phaseDefinition = customData.get(PHASE_DEFINITION);
120127
if (phaseDefinition != null) {
121128
builder.setPhaseDefinition(phaseDefinition);
@@ -224,6 +231,9 @@ public Map<String, String> asMap() {
224231
if (stepInfo != null) {
225232
result.put(STEP_INFO, stepInfo);
226233
}
234+
if (previousStepInfo != null) {
235+
result.put(PREVIOUS_STEP_INFO, previousStepInfo);
236+
}
227237
if (lifecycleDate != null) {
228238
result.put(INDEX_CREATION_DATE, String.valueOf(lifecycleDate));
229239
}
@@ -263,6 +273,7 @@ public static class Builder {
263273
private String step;
264274
private String failedStep;
265275
private String stepInfo;
276+
private String previousStepInfo;
266277
private String phaseDefinition;
267278
private Long indexCreationDate;
268279
private Long phaseTime;
@@ -301,6 +312,11 @@ public Builder setStepInfo(String stepInfo) {
301312
return this;
302313
}
303314

315+
public Builder setPreviousStepInfo(String previousStepInfo) {
316+
this.previousStepInfo = previousStepInfo;
317+
return this;
318+
}
319+
304320
public Builder setPhaseDefinition(String phaseDefinition) {
305321
this.phaseDefinition = phaseDefinition;
306322
return this;
@@ -370,6 +386,7 @@ public LifecycleExecutionState build() {
370386
isAutoRetryableError,
371387
failedStepRetryCount,
372388
stepInfo,
389+
previousStepInfo,
373390
phaseDefinition,
374391
indexCreationDate,
375392
phaseTime,

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/IndexLifecycleExplainResponse.java

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ public class IndexLifecycleExplainResponse implements ToXContentObject, Writeabl
4848
private static final ParseField STEP_TIME_MILLIS_FIELD = new ParseField("step_time_millis");
4949
private static final ParseField STEP_TIME_FIELD = new ParseField("step_time");
5050
private static final ParseField STEP_INFO_FIELD = new ParseField("step_info");
51+
private static final ParseField PREVIOUS_STEP_INFO_FIELD = new ParseField("previous_step_info");
5152
private static final ParseField PHASE_EXECUTION_INFO = new ParseField("phase_execution");
5253
private static final ParseField AGE_FIELD = new ParseField("age");
5354
private static final ParseField TIME_SINCE_INDEX_CREATION_FIELD = new ParseField("time_since_index_creation");
@@ -76,6 +77,7 @@ public class IndexLifecycleExplainResponse implements ToXContentObject, Writeabl
7677
(String) a[17],
7778
(String) a[18],
7879
(BytesReference) a[11],
80+
(BytesReference) a[21],
7981
(PhaseExecutionInfo) a[12]
8082
// a[13] == "age"
8183
// a[20] == "time_since_index_creation"
@@ -111,6 +113,11 @@ public class IndexLifecycleExplainResponse implements ToXContentObject, Writeabl
111113
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), SHRINK_INDEX_NAME);
112114
PARSER.declareLong(ConstructingObjectParser.optionalConstructorArg(), INDEX_CREATION_DATE_MILLIS_FIELD);
113115
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), TIME_SINCE_INDEX_CREATION_FIELD);
116+
PARSER.declareObject(ConstructingObjectParser.optionalConstructorArg(), (p, c) -> {
117+
XContentBuilder builder = JsonXContent.contentBuilder();
118+
builder.copyCurrentStructure(p);
119+
return BytesReference.bytes(builder);
120+
}, PREVIOUS_STEP_INFO_FIELD);
114121
}
115122

116123
private final String index;
@@ -126,6 +133,7 @@ public class IndexLifecycleExplainResponse implements ToXContentObject, Writeabl
126133
private final Long stepTime;
127134
private final boolean managedByILM;
128135
private final BytesReference stepInfo;
136+
private final BytesReference previousStepInfo;
129137
private final PhaseExecutionInfo phaseExecutionInfo;
130138
private final Boolean isAutoRetryableError;
131139
private final Integer failedStepRetryCount;
@@ -153,6 +161,7 @@ public static IndexLifecycleExplainResponse newManagedIndexResponse(
153161
String snapshotName,
154162
String shrinkIndexName,
155163
BytesReference stepInfo,
164+
BytesReference previousStepInfo,
156165
PhaseExecutionInfo phaseExecutionInfo
157166
) {
158167
return new IndexLifecycleExplainResponse(
@@ -174,6 +183,7 @@ public static IndexLifecycleExplainResponse newManagedIndexResponse(
174183
snapshotName,
175184
shrinkIndexName,
176185
stepInfo,
186+
previousStepInfo,
177187
phaseExecutionInfo
178188
);
179189
}
@@ -198,6 +208,7 @@ public static IndexLifecycleExplainResponse newUnmanagedIndexResponse(String ind
198208
null,
199209
null,
200210
null,
211+
null,
201212
null
202213
);
203214
}
@@ -221,6 +232,7 @@ private IndexLifecycleExplainResponse(
221232
String snapshotName,
222233
String shrinkIndexName,
223234
BytesReference stepInfo,
235+
BytesReference previousStepInfo,
224236
PhaseExecutionInfo phaseExecutionInfo
225237
) {
226238
if (managedByILM) {
@@ -262,6 +274,7 @@ private IndexLifecycleExplainResponse(
262274
|| actionTime != null
263275
|| stepTime != null
264276
|| stepInfo != null
277+
|| previousStepInfo != null
265278
|| phaseExecutionInfo != null) {
266279
throw new IllegalArgumentException(
267280
"Unmanaged index response must only contain fields: [" + MANAGED_BY_ILM_FIELD + ", " + INDEX_FIELD + "]"
@@ -283,6 +296,7 @@ private IndexLifecycleExplainResponse(
283296
this.isAutoRetryableError = isAutoRetryableError;
284297
this.failedStepRetryCount = failedStepRetryCount;
285298
this.stepInfo = stepInfo;
299+
this.previousStepInfo = previousStepInfo;
286300
this.phaseExecutionInfo = phaseExecutionInfo;
287301
this.repositoryName = repositoryName;
288302
this.snapshotName = snapshotName;
@@ -314,6 +328,11 @@ public IndexLifecycleExplainResponse(StreamInput in) throws IOException {
314328
} else {
315329
indexCreationDate = null;
316330
}
331+
if (in.getTransportVersion().onOrAfter(TransportVersions.RETAIN_ILM_STEP_INFO)) {
332+
previousStepInfo = in.readOptionalBytesReference();
333+
} else {
334+
previousStepInfo = null;
335+
}
317336
} else {
318337
policyName = null;
319338
lifecycleDate = null;
@@ -327,6 +346,7 @@ public IndexLifecycleExplainResponse(StreamInput in) throws IOException {
327346
actionTime = null;
328347
stepTime = null;
329348
stepInfo = null;
349+
previousStepInfo = null;
330350
phaseExecutionInfo = null;
331351
repositoryName = null;
332352
snapshotName = null;
@@ -359,6 +379,9 @@ public void writeTo(StreamOutput out) throws IOException {
359379
if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_1_0)) {
360380
out.writeOptionalLong(indexCreationDate);
361381
}
382+
if (out.getTransportVersion().onOrAfter(TransportVersions.RETAIN_ILM_STEP_INFO)) {
383+
out.writeOptionalBytesReference(previousStepInfo);
384+
}
362385
}
363386
}
364387

@@ -422,6 +445,10 @@ public BytesReference getStepInfo() {
422445
return stepInfo;
423446
}
424447

448+
public BytesReference getPreviousStepInfo() {
449+
return previousStepInfo;
450+
}
451+
425452
public PhaseExecutionInfo getPhaseExecutionInfo() {
426453
return phaseExecutionInfo;
427454
}
@@ -515,6 +542,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
515542
if (stepInfo != null && stepInfo.length() > 0) {
516543
builder.rawField(STEP_INFO_FIELD.getPreferredName(), stepInfo.streamInput(), XContentType.JSON);
517544
}
545+
if (previousStepInfo != null && previousStepInfo.length() > 0) {
546+
builder.rawField(PREVIOUS_STEP_INFO_FIELD.getPreferredName(), previousStepInfo.streamInput(), XContentType.JSON);
547+
}
518548
if (phaseExecutionInfo != null) {
519549
builder.field(PHASE_EXECUTION_INFO.getPreferredName(), phaseExecutionInfo);
520550
}
@@ -544,6 +574,7 @@ public int hashCode() {
544574
snapshotName,
545575
shrinkIndexName,
546576
stepInfo,
577+
previousStepInfo,
547578
phaseExecutionInfo
548579
);
549580
}
@@ -575,6 +606,7 @@ public boolean equals(Object obj) {
575606
&& Objects.equals(snapshotName, other.snapshotName)
576607
&& Objects.equals(shrinkIndexName, other.shrinkIndexName)
577608
&& Objects.equals(stepInfo, other.stepInfo)
609+
&& Objects.equals(previousStepInfo, other.previousStepInfo)
578610
&& Objects.equals(phaseExecutionInfo, other.phaseExecutionInfo);
579611
}
580612

0 commit comments

Comments
 (0)