Skip to content

Commit 5f72407

Browse files
authored
Retry on RepositoryException in SLM tests (#48548)
Due to a bug, GETing a snapshot can cause a RespositoryException to be thrown. This error is transient and should be retried, rather than causing the test to fail. This commit converts those RepositoryExceptions into AssertionErrors so that they will be retried in code wrapped in assertBusy.
1 parent 29ac34a commit 5f72407

File tree

2 files changed

+16
-11
lines changed

2 files changed

+16
-11
lines changed

client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/ILMDocumentationIT.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -765,7 +765,6 @@ public void onFailure(Exception e) {
765765
assertTrue(latch.await(30L, TimeUnit.SECONDS));
766766
}
767767

768-
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/46021")
769768
public void testAddSnapshotLifecyclePolicy() throws Exception {
770769
RestHighLevelClient client = highLevelClient();
771770

@@ -1057,6 +1056,8 @@ private void assertSnapshotExists(final RestHighLevelClient client, final String
10571056
} catch (Exception e) {
10581057
if (e.getMessage().contains("snapshot_missing_exception")) {
10591058
fail("snapshot does not exist: " + snapshotName);
1059+
} else if (e.getMessage().contains("repository_exception")) {
1060+
fail("got a respository_exception, retrying. original message: " + e.getMessage());
10601061
}
10611062
throw e;
10621063
}

x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/slm/SLMSnapshotBlockingIntegTests.java

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66

77
package org.elasticsearch.xpack.slm;
88

9-
import org.apache.lucene.util.LuceneTestCase;
109
import org.elasticsearch.action.ActionFuture;
1110
import org.elasticsearch.action.admin.cluster.snapshots.get.GetSnapshotsResponse;
1211
import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotStatus;
@@ -124,7 +123,6 @@ public void testSnapshotInProgress() throws Exception {
124123
}
125124
}
126125

127-
@LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/48441")
128126
public void testRetentionWhileSnapshotInProgress() throws Exception {
129127
final String indexName = "test";
130128
final String policyId = "slm-policy";
@@ -144,8 +142,7 @@ public void testRetentionWhileSnapshotInProgress() throws Exception {
144142
logger.info("--> kicked off snapshot {}", completedSnapshotName);
145143
assertBusy(() -> {
146144
try {
147-
SnapshotsStatusResponse s =
148-
client().admin().cluster().prepareSnapshotStatus(REPO).setSnapshots(completedSnapshotName).get();
145+
SnapshotsStatusResponse s = getSnapshotStatus(completedSnapshotName);
149146
assertThat("expected a snapshot but none were returned", s.getSnapshots().size(), equalTo(1));
150147
SnapshotStatus status = s.getSnapshots().get(0);
151148
logger.info("--> waiting for snapshot {} to be completed, got: {}", completedSnapshotName, status.getState());
@@ -213,13 +210,8 @@ public void testRetentionWhileSnapshotInProgress() throws Exception {
213210
client().admin().cluster().prepareReroute().get();
214211
logger.info("--> waiting for snapshot to be deleted");
215212
try {
216-
SnapshotsStatusResponse s =
217-
client().admin().cluster().prepareSnapshotStatus(REPO).setSnapshots(completedSnapshotName).get();
213+
SnapshotsStatusResponse s = getSnapshotStatus(completedSnapshotName);
218214
assertNull("expected no snapshot but one was returned", s.getSnapshots().get(0));
219-
} catch (RepositoryException e) {
220-
// Concurrent status calls and write operations may lead to failures in determining the current repository generation
221-
// TODO: Remove this hack once tracking the current repository generation has been made consistent
222-
throw new AssertionError(e);
223215
} catch (SnapshotMissingException e) {
224216
// Great, we wanted it to be deleted!
225217
}
@@ -383,6 +375,18 @@ private void testUnsuccessfulSnapshotRetention(boolean partialSuccess) throws Ex
383375
}
384376
}
385377

378+
private SnapshotsStatusResponse getSnapshotStatus(String snapshotName) {
379+
try {
380+
return client().admin().cluster().prepareSnapshotStatus(REPO).setSnapshots(snapshotName).get();
381+
} catch (RepositoryException e) {
382+
// Convert this to an AssertionError so that it can be retried in an assertBusy - this is often a transient error because
383+
// concurrent status calls and write operations may lead to failures in determining the current repository generation
384+
// TODO: Remove this hack once tracking the current repository generation has been made consistent
385+
logger.warn(e);
386+
throw new AssertionError(e);
387+
}
388+
}
389+
386390
private void createAndPopulateIndex(String indexName) throws InterruptedException {
387391
logger.info("--> creating and populating index [{}]", indexName);
388392
assertAcked(prepareCreate(indexName, 0, Settings.builder()

0 commit comments

Comments
 (0)