Skip to content

Commit 4ef4204

Browse files
andreidandakrone
andauthored
ILM wait for snapshot creation to complete (#54673)
As the snapshot is not taken on the calling thread even though the request wait_for_completion parameter is true, we can set the flag to true and make use of the response SnapshotInfo to verify the snapshot was created successfully or go back to the CleanSnapshotStep and retry creating the snapshot otherwise. This branching is executed using a new step that wraps an AsyncActionStep (keeping the wrapped's step key()) and moving to a different next step when the wrapped action reports an incomplete response. Co-Authored-By: Lee Hinman <[email protected]>
1 parent 64a2813 commit 4ef4204

File tree

8 files changed

+284
-532
lines changed

8 files changed

+284
-532
lines changed
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
7+
package org.elasticsearch.xpack.core.ilm;
8+
9+
import org.apache.lucene.util.SetOnce;
10+
import org.elasticsearch.client.Client;
11+
import org.elasticsearch.cluster.ClusterState;
12+
import org.elasticsearch.cluster.ClusterStateObserver;
13+
import org.elasticsearch.cluster.metadata.IndexMetadata;
14+
15+
import java.util.Objects;
16+
17+
/**
18+
* This step wraps an {@link AsyncActionStep} in order to be able to manipulate what the next step will be, depending on the result of the
19+
* wrapped {@link AsyncActionStep}.
20+
* <p>
21+
* If the action response is complete, the {@link AsyncActionBranchingStep}'s nextStepKey will be the nextStepKey of the wrapped action. If
22+
* the response is incomplete the nextStepKey will be the provided {@link AsyncActionBranchingStep#nextKeyOnIncompleteResponse}.
23+
* Failures encountered whilst executing the wrapped action will be propagated directly.
24+
*/
25+
public class AsyncActionBranchingStep extends AsyncActionStep {
26+
private final AsyncActionStep stepToExecute;
27+
28+
private StepKey nextKeyOnIncompleteResponse;
29+
private SetOnce<Boolean> onResponseResult;
30+
31+
public AsyncActionBranchingStep(AsyncActionStep stepToExecute, StepKey nextKeyOnIncompleteResponse, Client client) {
32+
// super.nextStepKey is set to null since it is not used by this step
33+
super(stepToExecute.getKey(), null, client);
34+
this.stepToExecute = stepToExecute;
35+
this.nextKeyOnIncompleteResponse = nextKeyOnIncompleteResponse;
36+
this.onResponseResult = new SetOnce<>();
37+
}
38+
39+
@Override
40+
public boolean isRetryable() {
41+
return true;
42+
}
43+
44+
@Override
45+
public void performAction(IndexMetadata indexMetadata, ClusterState currentClusterState, ClusterStateObserver observer,
46+
Listener listener) {
47+
stepToExecute.performAction(indexMetadata, currentClusterState, observer, new Listener() {
48+
@Override
49+
public void onResponse(boolean complete) {
50+
onResponseResult.set(complete);
51+
listener.onResponse(complete);
52+
}
53+
54+
@Override
55+
public void onFailure(Exception e) {
56+
listener.onFailure(e);
57+
}
58+
});
59+
}
60+
61+
@Override
62+
public final StepKey getNextStepKey() {
63+
if (onResponseResult.get() == null) {
64+
throw new IllegalStateException("cannot call getNextStepKey before performAction");
65+
}
66+
return onResponseResult.get() ? stepToExecute.getNextStepKey() : nextKeyOnIncompleteResponse;
67+
}
68+
69+
/**
70+
* Represents the {@link AsyncActionStep} that's wrapped by this branching step.
71+
*/
72+
AsyncActionStep getStepToExecute() {
73+
return stepToExecute;
74+
}
75+
76+
/**
77+
* The step key to be reported as the {@link AsyncActionBranchingStep#getNextStepKey()} if the response of the wrapped
78+
* {@link AsyncActionBranchingStep#getStepToExecute()} is incomplete.
79+
*/
80+
StepKey getNextKeyOnIncompleteResponse() {
81+
return nextKeyOnIncompleteResponse;
82+
}
83+
84+
@Override
85+
public boolean equals(Object o) {
86+
if (this == o) {
87+
return true;
88+
}
89+
if (o == null || getClass() != o.getClass()) {
90+
return false;
91+
}
92+
if (!super.equals(o)) {
93+
return false;
94+
}
95+
AsyncActionBranchingStep that = (AsyncActionBranchingStep) o;
96+
return super.equals(o)
97+
&& Objects.equals(stepToExecute, that.stepToExecute)
98+
&& Objects.equals(nextKeyOnIncompleteResponse, that.nextKeyOnIncompleteResponse);
99+
}
100+
101+
@Override
102+
public int hashCode() {
103+
return Objects.hash(super.hashCode(), stepToExecute, nextKeyOnIncompleteResponse);
104+
}
105+
}

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/CreateSnapshotStep.java

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,15 @@
55
*/
66
package org.elasticsearch.xpack.core.ilm;
77

8+
import org.apache.logging.log4j.LogManager;
9+
import org.apache.logging.log4j.Logger;
810
import org.elasticsearch.action.ActionListener;
911
import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotRequest;
1012
import org.elasticsearch.client.Client;
1113
import org.elasticsearch.cluster.ClusterState;
1214
import org.elasticsearch.cluster.metadata.IndexMetadata;
1315
import org.elasticsearch.common.Strings;
14-
import org.elasticsearch.rest.RestStatus;
15-
import org.elasticsearch.snapshots.SnapshotException;
16+
import org.elasticsearch.snapshots.SnapshotInfo;
1617

1718
import static org.elasticsearch.xpack.core.ilm.LifecycleExecutionState.fromIndexMetadata;
1819

@@ -23,6 +24,8 @@
2324
public class CreateSnapshotStep extends AsyncRetryDuringSnapshotActionStep {
2425
public static final String NAME = "create-snapshot";
2526

27+
private static final Logger logger = LogManager.getLogger(CreateSnapshotStep.class);
28+
2629
public CreateSnapshotStep(StepKey key, StepKey nextStepKey, Client client) {
2730
super(key, nextStepKey, client);
2831
}
@@ -54,19 +57,26 @@ void performDuringNoSnapshot(IndexMetadata indexMetadata, ClusterState currentCl
5457
}
5558
CreateSnapshotRequest request = new CreateSnapshotRequest(snapshotRepository, snapshotName);
5659
request.indices(indexName);
57-
// we'll not wait for the snapshot to complete in this step as the async steps are executed from threads that shouldn't perform
58-
// expensive operations (ie. clusterStateProcessed)
59-
request.waitForCompletion(false);
60+
// this is safe as the snapshot creation will still be async, it's just that the listener will be notified when the snapshot is
61+
// complete
62+
request.waitForCompletion(true);
6063
request.includeGlobalState(false);
6164
request.masterNodeTimeout(getMasterTimeout(currentClusterState));
6265
getClient().admin().cluster().createSnapshot(request,
6366
ActionListener.wrap(response -> {
64-
if (response.status().equals(RestStatus.INTERNAL_SERVER_ERROR)) {
65-
listener.onFailure(new SnapshotException(snapshotRepository, snapshotName,
66-
"unable to request snapshot creation [" + snapshotName + "] for index [ " + indexName + "] as part of policy [" +
67-
policyName + "] execution due to an internal server error"));
68-
} else {
67+
logger.debug("create snapshot response for policy [{}] and index [{}] is: {}", policyName, indexName,
68+
Strings.toString(response));
69+
final SnapshotInfo snapInfo = response.getSnapshotInfo();
70+
71+
// Check that there are no failed shards, since the request may not entirely
72+
// fail, but may still have failures (such as in the case of an aborted snapshot)
73+
if (snapInfo.failedShards() == 0) {
6974
listener.onResponse(true);
75+
} else {
76+
int failures = snapInfo.failedShards();
77+
int total = snapInfo.totalShards();
78+
logger.warn("failed to create snapshot successfully, {} failures out of {} total shards failed", failures, total);
79+
listener.onResponse(false);
7080
}
7181
}, listener::onFailure));
7282
}

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/OnAsyncWaitBranchingStep.java

Lines changed: 0 additions & 148 deletions
This file was deleted.

0 commit comments

Comments
 (0)