Skip to content

Commit 78e0b71

Browse files
Fix BlobStoreRepositoryCleanupIT Leaking Futures (#69446) (#69529)
The problem with #69434 was that during master failover the listener might get retried after the new master has already dropped the cleanup from the cluster state which then conflicts with the cleanup executed by the repo consistency checks after each test. To prevent running into this conflict, just wait for the future to actually return. Closes #69434
1 parent 213e06a commit 78e0b71

File tree

1 file changed

+21
-6
lines changed

1 file changed

+21
-6
lines changed

server/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryCleanupIT.java

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77
*/
88
package org.elasticsearch.repositories.blobstore;
99

10+
import org.elasticsearch.ExceptionsHelper;
11+
import org.elasticsearch.action.ActionFuture;
1012
import org.elasticsearch.action.ActionRunnable;
13+
import org.elasticsearch.action.admin.cluster.repositories.cleanup.CleanupRepositoryResponse;
1114
import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse;
1215
import org.elasticsearch.action.support.PlainActionFuture;
1316
import org.elasticsearch.cluster.RepositoryCleanupInProgress;
@@ -17,16 +20,18 @@
1720
import org.elasticsearch.snapshots.SnapshotState;
1821
import org.elasticsearch.test.ESIntegTestCase;
1922

23+
import java.io.IOException;
2024
import java.util.concurrent.ExecutionException;
2125

2226
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertFutureThrows;
27+
import static org.hamcrest.Matchers.instanceOf;
2328
import static org.hamcrest.Matchers.is;
2429

2530
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0)
2631
public class BlobStoreRepositoryCleanupIT extends AbstractSnapshotIntegTestCase {
2732

2833
public void testMasterFailoverDuringCleanup() throws Exception {
29-
startBlockedCleanup("test-repo");
34+
final ActionFuture<CleanupRepositoryResponse> cleanupFuture = startBlockedCleanup("test-repo");
3035

3136
final int nodeCount = internalCluster().numDataAndMasterNodes();
3237
logger.info("--> stopping master node");
@@ -37,10 +42,12 @@ public void testMasterFailoverDuringCleanup() throws Exception {
3742
logger.info("--> wait for cleanup to finish and disappear from cluster state");
3843
awaitClusterState(state ->
3944
state.custom(RepositoryCleanupInProgress.TYPE, RepositoryCleanupInProgress.EMPTY).hasCleanupInProgress() == false);
45+
46+
cleanupFuture.get();
4047
}
4148

4249
public void testRepeatCleanupsDontRemove() throws Exception {
43-
final String masterNode = startBlockedCleanup("test-repo");
50+
final ActionFuture<CleanupRepositoryResponse> cleanupFuture = startBlockedCleanup("test-repo");
4451

4552
logger.info("--> sending another cleanup");
4653
assertFutureThrows(client().admin().cluster().prepareCleanupRepository("test-repo").execute(), IllegalStateException.class);
@@ -51,14 +58,19 @@ public void testRepeatCleanupsDontRemove() throws Exception {
5158
assertTrue(cleanup.hasCleanupInProgress());
5259

5360
logger.info("--> unblocking master node");
54-
unblockNode("test-repo", masterNode);
61+
unblockNode("test-repo", internalCluster().getMasterName());
5562

5663
logger.info("--> wait for cleanup to finish and disappear from cluster state");
5764
awaitClusterState(state ->
5865
state.custom(RepositoryCleanupInProgress.TYPE, RepositoryCleanupInProgress.EMPTY).hasCleanupInProgress() == false);
66+
67+
final ExecutionException e = expectThrows(ExecutionException.class, cleanupFuture::get);
68+
final Throwable ioe = ExceptionsHelper.unwrap(e, IOException.class);
69+
assertThat(ioe, instanceOf(IOException.class));
70+
assertThat(ioe.getMessage(), is("exception after block"));
5971
}
6072

61-
private String startBlockedCleanup(String repoName) throws Exception {
73+
private ActionFuture<CleanupRepositoryResponse> startBlockedCleanup(String repoName) throws Exception {
6274
logger.info("--> starting two master nodes and one data node");
6375
internalCluster().startMasterOnlyNodes(2);
6476
internalCluster().startDataOnlyNodes(1);
@@ -80,13 +92,16 @@ private String startBlockedCleanup(String repoName) throws Exception {
8092
blockMasterFromFinalizingSnapshotOnIndexFile(repoName);
8193

8294
logger.info("--> starting repository cleanup");
83-
client().admin().cluster().prepareCleanupRepository(repoName).execute();
95+
// running from a non-master client because shutting down a master while a request to it is pending might result in the future
96+
// never completing
97+
final ActionFuture<CleanupRepositoryResponse> future =
98+
internalCluster().nonMasterClient().admin().cluster().prepareCleanupRepository(repoName).execute();
8499

85100
final String masterNode = internalCluster().getMasterName();
86101
waitForBlock(masterNode, repoName);
87102
awaitClusterState(state ->
88103
state.custom(RepositoryCleanupInProgress.TYPE, RepositoryCleanupInProgress.EMPTY).hasCleanupInProgress());
89-
return masterNode;
104+
return future;
90105
}
91106

92107
public void testCleanupOldIndexN() throws ExecutionException, InterruptedException {

0 commit comments

Comments
 (0)