Skip to content

Commit d68b651

Browse files
committed
Fix SLM check for restore in progress (elastic#50868)
* Fix SLM check for restore in progress This commit fixes the check in SLM where the `RestoreInProgress` metadata was checked for existence. Rather than check existence we should instead check the `isEmpty` method. Prior to this, a successful restore for a repository that used SLM retention would prevent SLM retention from running in subsequent invocations, due to SLM thinking that a restore was still running.
1 parent 848388a commit d68b651

File tree

3 files changed

+84
-1
lines changed

3 files changed

+84
-1
lines changed

x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/slm/SnapshotRetentionTask.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,7 @@ public static boolean okayToDeleteSnapshots(ClusterState state) {
464464

465465
// Cannot delete during a restore
466466
final RestoreInProgress restoreInProgress = state.custom(RestoreInProgress.TYPE);
467-
if (restoreInProgress != null) {
467+
if (restoreInProgress != null && restoreInProgress.isEmpty() == false) {
468468
return false;
469469
}
470470

@@ -498,6 +498,7 @@ public void onNewClusterState(ClusterState state) {
498498
logger.debug("retrying SLM snapshot retention deletion after snapshot operation has completed");
499499
reRun.accept(state);
500500
} else {
501+
logger.trace("received new cluster state but a snapshot operation is still running");
501502
observer.waitForNextChange(this);
502503
}
503504
} catch (Exception e) {

x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/slm/SLMSnapshotBlockingIntegTests.java

+73
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88

99
import org.elasticsearch.action.ActionFuture;
1010
import org.elasticsearch.action.admin.cluster.snapshots.get.GetSnapshotsResponse;
11+
import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotAction;
12+
import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotRequest;
13+
import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse;
1114
import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotStatus;
1215
import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotsStatusResponse;
1316
import org.elasticsearch.action.index.IndexRequestBuilder;
@@ -22,6 +25,7 @@
2225
import org.elasticsearch.plugins.Plugin;
2326
import org.elasticsearch.repositories.RepositoriesService;
2427
import org.elasticsearch.repositories.RepositoryException;
28+
import org.elasticsearch.rest.RestStatus;
2529
import org.elasticsearch.snapshots.ConcurrentSnapshotExecutionException;
2630
import org.elasticsearch.snapshots.SnapshotInfo;
2731
import org.elasticsearch.snapshots.SnapshotMissingException;
@@ -30,6 +34,7 @@
3034
import org.elasticsearch.test.ESIntegTestCase;
3135
import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin;
3236
import org.elasticsearch.xpack.core.XPackSettings;
37+
import org.elasticsearch.xpack.core.slm.SnapshotInvocationRecord;
3338
import org.elasticsearch.xpack.core.slm.SnapshotLifecyclePolicy;
3439
import org.elasticsearch.xpack.core.slm.SnapshotLifecyclePolicyItem;
3540
import org.elasticsearch.xpack.core.slm.SnapshotRetentionConfiguration;
@@ -432,6 +437,74 @@ private void testUnsuccessfulSnapshotRetention(boolean partialSuccess) throws Ex
432437
}
433438
}
434439

440+
public void testSLMRetentionAfterRestore() throws Exception {
441+
final String indexName = "test";
442+
final String policyName = "test-policy";
443+
int docCount = 20;
444+
for (int i = 0; i < docCount; i++) {
445+
index(indexName, i + "", Collections.singletonMap("foo", "bar"));
446+
}
447+
448+
// Create a snapshot repo
449+
initializeRepo(REPO);
450+
451+
logger.info("--> creating policy {}", policyName);
452+
createSnapshotPolicy(policyName, "snap", NEVER_EXECUTE_CRON_SCHEDULE, REPO, indexName, true, false,
453+
new SnapshotRetentionConfiguration(TimeValue.ZERO, null, null));
454+
455+
logger.info("--> executing snapshot lifecycle");
456+
final String snapshotName = executePolicy(policyName);
457+
458+
// Check that the executed snapshot shows up in the SLM output
459+
assertBusy(() -> {
460+
GetSnapshotLifecycleAction.Response getResp =
461+
client().execute(GetSnapshotLifecycleAction.INSTANCE, new GetSnapshotLifecycleAction.Request(policyName)).get();
462+
logger.info("--> checking for in progress snapshot...");
463+
464+
assertThat(getResp.getPolicies().size(), greaterThan(0));
465+
SnapshotLifecyclePolicyItem item = getResp.getPolicies().get(0);
466+
SnapshotInvocationRecord lastSuccess = item.getLastSuccess();
467+
assertNotNull(lastSuccess);
468+
assertThat(lastSuccess.getSnapshotName(), equalTo(snapshotName));
469+
});
470+
471+
logger.info("--> restoring index");
472+
RestoreSnapshotRequest restoreReq = new RestoreSnapshotRequest(REPO, snapshotName);
473+
restoreReq.indices(indexName);
474+
restoreReq.renamePattern("(.+)");
475+
restoreReq.renameReplacement("restored_$1");
476+
restoreReq.waitForCompletion(true);
477+
RestoreSnapshotResponse resp = client().execute(RestoreSnapshotAction.INSTANCE, restoreReq).get();
478+
assertThat(resp.status(), equalTo(RestStatus.OK));
479+
480+
logger.info("--> executing SLM retention");
481+
assertAcked(client().execute(ExecuteSnapshotRetentionAction.INSTANCE, new ExecuteSnapshotRetentionAction.Request()).get());
482+
logger.info("--> waiting for {} snapshot to be deleted", snapshotName);
483+
assertBusy(() -> {
484+
try {
485+
try {
486+
GetSnapshotsResponse snapshotsStatusResponse = client().admin().cluster()
487+
.prepareGetSnapshots(REPO).setSnapshots(snapshotName).get();
488+
assertThat(snapshotsStatusResponse.getSnapshots(REPO), empty());
489+
} catch (SnapshotMissingException e) {
490+
// This is what we want to happen
491+
}
492+
logger.info("--> snapshot [{}] has been deleted", snapshotName);
493+
} catch (RepositoryException re) {
494+
// Concurrent status calls and write operations may lead to failures in determining the current repository generation
495+
// TODO: Remove this hack once tracking the current repository generation has been made consistent
496+
throw new AssertionError(re);
497+
}
498+
});
499+
500+
// Cancel/delete the snapshot
501+
try {
502+
client().admin().cluster().prepareDeleteSnapshot(REPO, snapshotName).get();
503+
} catch (SnapshotMissingException e) {
504+
// ignore
505+
}
506+
}
507+
435508
private SnapshotsStatusResponse getSnapshotStatus(String snapshotName) {
436509
try {
437510
return client().admin().cluster().prepareSnapshotStatus(REPO).setSnapshots(snapshotName).get();

x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/slm/SnapshotRetentionTaskTests.java

+9
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
import static org.hamcrest.Matchers.hasSize;
6868
import static org.hamcrest.Matchers.not;
6969
import static org.mockito.Mockito.mock;
70+
import static org.mockito.Mockito.when;
7071

7172
public class SnapshotRetentionTaskTests extends ESTestCase {
7273

@@ -362,6 +363,14 @@ public void testOkToDeleteSnapshots() {
362363
.build();
363364

364365
assertThat(SnapshotRetentionTask.okayToDeleteSnapshots(state), equalTo(false));
366+
367+
restoreInProgress = mock(RestoreInProgress.class);
368+
when(restoreInProgress.isEmpty()).thenReturn(true);
369+
state = ClusterState.builder(new ClusterName("cluster"))
370+
.putCustom(RestoreInProgress.TYPE, restoreInProgress)
371+
.build();
372+
373+
assertThat(SnapshotRetentionTask.okayToDeleteSnapshots(state), equalTo(true));
365374
}
366375

367376
public void testSkipWhileStopping() throws Exception {

0 commit comments

Comments
 (0)