Skip to content

Unblock blocked repositories after test execution #61703

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ public void clusterChanged(ClusterChangedEvent event) {
logger.info("--> waiting for disruption to start");
assertTrue(disruptionStarted.await(1, TimeUnit.MINUTES));

awaitNoMoreRunningOperations(dataNode);
awaitNoMoreSnapshotRunningOperations(dataNode);

logger.info("--> verify that snapshot was successful or no longer exist");
assertBusy(() -> {
Expand Down Expand Up @@ -148,7 +148,7 @@ public void clusterChanged(ClusterChangedEvent event) {
assertThat(sne.getSnapshotName(), is(snapshot));
}

awaitNoMoreRunningOperations(dataNode);
awaitNoMoreSnapshotRunningOperations(dataNode);
}

public void testDisruptionAfterShardFinalization() throws Exception {
Expand Down Expand Up @@ -237,7 +237,7 @@ public void testMasterFailOverDuringShardSnapshots() throws Exception {
unblockNode(repoName, dataNode);

networkDisruption.stopDisrupting();
awaitNoMoreRunningOperations(dataNode);
awaitNoMoreSnapshotRunningOperations(dataNode);

logger.info("--> make sure isolated master responds to snapshot request");
final SnapshotException sne =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1212,7 +1212,7 @@ private List<String> createNSnapshots(String repoName, int count) {
}

private void awaitNoMoreRunningOperations() throws Exception {
awaitNoMoreRunningOperations(internalCluster().getMasterName());
awaitNoMoreSnapshotRunningOperations(internalCluster().getMasterName());
}

private ActionFuture<AcknowledgedResponse> startDeleteFromNonMasterClient(String repoName, String snapshotName) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,11 @@
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.support.PlainActionFuture;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ClusterStateObserver;
import org.elasticsearch.cluster.SnapshotDeletionsInProgress;
import org.elasticsearch.cluster.SnapshotsInProgress;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.RepositoriesMetadata;
import org.elasticsearch.cluster.metadata.RepositoryMetadata;
import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.UUIDs;
import org.elasticsearch.common.bytes.BytesReference;
Expand All @@ -44,7 +41,6 @@
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.node.NodeClosedException;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.repositories.RepositoriesService;
import org.elasticsearch.repositories.Repository;
Expand All @@ -56,7 +52,6 @@
import org.elasticsearch.snapshots.mockstore.MockRepository;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.VersionUtils;
import org.elasticsearch.threadpool.ThreadPool;
import org.junit.After;

import java.io.IOException;
Expand All @@ -73,10 +68,8 @@
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import java.util.function.Predicate;

import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.hamcrest.Matchers.empty;
Expand Down Expand Up @@ -119,7 +112,7 @@ public void verifyNoLeakedListeners() throws Exception {

@After
public void assertRepoConsistency() {
if (skipRepoConsistencyCheckReason == null) {
if (skipRepoConsistencyCheckReason == null && getSuiteFailureMarker().wasSuccessful()) {
client().admin().cluster().prepareGetRepositories().get().repositories().forEach(repositoryMetadata -> {
final String name = repositoryMetadata.name();
if (repositoryMetadata.settings().getAsBoolean("readonly", false) == false) {
Expand Down Expand Up @@ -421,36 +414,4 @@ protected void addBwCFailedSnapshot(String repoName, String snapshotName, Map<St
ShardGenerations.EMPTY, getRepositoryData(repoName).getGenId(), state.metadata(), snapshotInfo,
SnapshotsService.OLD_SNAPSHOT_FORMAT, Function.identity(), f));
}

protected void awaitNoMoreRunningOperations(String viaNode) throws Exception {
logger.info("--> verify no more operations in the cluster state");
awaitClusterState(viaNode, state -> state.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY).entries().isEmpty() &&
state.custom(SnapshotDeletionsInProgress.TYPE, SnapshotDeletionsInProgress.EMPTY).hasDeletionsInProgress() == false);
}

protected void awaitClusterState(String viaNode, Predicate<ClusterState> statePredicate) throws Exception {
final ClusterService clusterService = internalCluster().getInstance(ClusterService.class, viaNode);
final ThreadPool threadPool = internalCluster().getInstance(ThreadPool.class, viaNode);
final ClusterStateObserver observer = new ClusterStateObserver(clusterService, logger, threadPool.getThreadContext());
if (statePredicate.test(observer.setAndGetObservedState()) == false) {
final PlainActionFuture<Void> future = PlainActionFuture.newFuture();
observer.waitForNextChange(new ClusterStateObserver.Listener() {
@Override
public void onNewClusterState(ClusterState state) {
future.onResponse(null);
}

@Override
public void onClusterServiceClose() {
future.onFailure(new NodeClosedException(clusterService.localNode()));
}

@Override
public void onTimeout(TimeValue timeout) {
future.onFailure(new TimeoutException());
}
}, statePredicate);
future.get(30L, TimeUnit.SECONDS);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,22 @@
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.support.DefaultShardOperationFailedException;
import org.elasticsearch.action.support.IndicesOptions;
import org.elasticsearch.action.support.PlainActionFuture;
import org.elasticsearch.client.AdminClient;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.Requests;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.cluster.ClusterModule;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ClusterStateObserver;
import org.elasticsearch.cluster.SnapshotDeletionsInProgress;
import org.elasticsearch.cluster.SnapshotsInProgress;
import org.elasticsearch.cluster.coordination.ElasticsearchNodeCommand;
import org.elasticsearch.cluster.health.ClusterHealthStatus;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.Metadata;
import org.elasticsearch.cluster.metadata.RepositoryMetadata;
import org.elasticsearch.cluster.routing.IndexRoutingTable;
import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
import org.elasticsearch.cluster.routing.ShardRouting;
Expand Down Expand Up @@ -116,20 +121,26 @@
import org.elasticsearch.indices.IndicesQueryCache;
import org.elasticsearch.indices.IndicesRequestCache;
import org.elasticsearch.indices.store.IndicesStore;
import org.elasticsearch.node.NodeClosedException;
import org.elasticsearch.node.NodeMocksPlugin;
import org.elasticsearch.plugins.NetworkPlugin;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.repositories.RepositoriesService;
import org.elasticsearch.repositories.Repository;
import org.elasticsearch.repositories.RepositoryMissingException;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.rest.action.RestCancellableNodeClient;
import org.elasticsearch.script.MockScriptService;
import org.elasticsearch.search.MockSearchService;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchService;
import org.elasticsearch.snapshots.mockstore.MockRepository;
import org.elasticsearch.test.client.RandomizingClient;
import org.elasticsearch.test.disruption.NetworkDisruption;
import org.elasticsearch.test.disruption.ServiceDisruptionScheme;
import org.elasticsearch.test.store.MockFSIndexStore;
import org.elasticsearch.test.transport.MockTransportService;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportInterceptor;
import org.elasticsearch.transport.TransportRequest;
import org.elasticsearch.transport.TransportRequestHandler;
Expand Down Expand Up @@ -168,8 +179,10 @@
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;

import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS;
Expand Down Expand Up @@ -547,6 +560,7 @@ private void afterInternal(boolean afterClass) throws Exception {
ensureClusterSizeConsistency();
ensureClusterStateConsistency();
ensureClusterStateCanBeReadByNodeTool();
unblockRepositories();
beforeIndexDeletion();
cluster().wipe(excludeTemplates()); // wipe after to make sure we fail in the test that didn't ack the delete
if (afterClass || currentClusterScope == Scope.TEST) {
Expand All @@ -569,6 +583,27 @@ private void afterInternal(boolean afterClass) throws Exception {
}
}

public void unblockRepositories() throws Exception {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a little unsure about this. I think we should only run this if the original test failed shouldn't we? (this is what we do in REST tests)
Otherwise we can have tests leaking running/blocked snapshots in the background and are quietly cleaning them up here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that makes sense. do we have something like getSuiteFailureMarker() but for a single test? I cannot find any method like that on ESTestCase

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe org.elasticsearch.test.ESTestCase#afterIfFailed will work here (I haven't checked the exact order of things I must admit). If it's called too late maybe we can do something with a @Rule? (I must admit I'm not an expert in JUnit so the latter is a bit of a guess).

if (isInternalCluster() == false || cluster().size() == 0) {
return;
}
List<RepositoryMetadata> repositories = admin().cluster().prepareGetRepositories().get().repositories();
for (RepositoriesService repositoriesService : internalCluster().getDataOrMasterNodeInstances(RepositoriesService.class)) {
for (RepositoryMetadata repositoryMetadata : repositories) {
try {
Repository repository = repositoriesService.repository(repositoryMetadata.name());
if (repository instanceof MockRepository) {
((MockRepository) repository).unblock();
}
} catch (RepositoryMissingException e) {
// ignore missing repositories
}
}
}

awaitNoMoreSnapshotRunningOperations(internalCluster().getMasterName());
}

/**
* @return An exclude set of index templates that will not be removed in between tests.
*/
Expand Down Expand Up @@ -2205,4 +2240,36 @@ public static String resolveCustomDataPath(String index) {
public static boolean inFipsJvm() {
return Boolean.parseBoolean(System.getProperty(FIPS_SYSPROP));
}

protected void awaitNoMoreSnapshotRunningOperations(String viaNode) throws Exception {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NIT: reorder this to natural word order awaitNoMoreRunningSnapshotOperations :)

logger.info("--> verify no more operations in the cluster state");
awaitClusterState(viaNode, state -> state.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY).entries().isEmpty() &&
state.custom(SnapshotDeletionsInProgress.TYPE, SnapshotDeletionsInProgress.EMPTY).hasDeletionsInProgress() == false);
}

protected void awaitClusterState(String viaNode, Predicate<ClusterState> statePredicate) throws Exception {
final ClusterService clusterService = internalCluster().getInstance(ClusterService.class, viaNode);
final ThreadPool threadPool = internalCluster().getInstance(ThreadPool.class, viaNode);
final ClusterStateObserver observer = new ClusterStateObserver(clusterService, logger, threadPool.getThreadContext());
if (statePredicate.test(observer.setAndGetObservedState()) == false) {
final PlainActionFuture<Void> future = PlainActionFuture.newFuture();
observer.waitForNextChange(new ClusterStateObserver.Listener() {
@Override
public void onNewClusterState(ClusterState state) {
future.onResponse(null);
}

@Override
public void onClusterServiceClose() {
future.onFailure(new NodeClosedException(clusterService.localNode()));
}

@Override
public void onTimeout(TimeValue timeout) {
future.onFailure(new TimeoutException());
}
}, statePredicate);
future.get(30L, TimeUnit.SECONDS);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ public void ensureClusterNodes() {

@After
public void awaitNoMoreRunningOps() throws Exception {
awaitNoMoreRunningOperations(internalCluster().getMasterName());
awaitNoMoreSnapshotRunningOperations(internalCluster().getMasterName());
}

@Override
Expand Down Expand Up @@ -168,7 +168,7 @@ public void testRetentionWhileSnapshotInProgress() throws Exception {
}
});

awaitNoMoreRunningOperations(randomFrom(dataNodeNames));
awaitNoMoreSnapshotRunningOperations(randomFrom(dataNodeNames));

logger.info("--> indexing more docs to force new segment files");
for (int i = 0; i < docCount; i++) {
Expand Down