Skip to content

Commit 0ca7cc1

Browse files
Safely Close Repositories on Node Shutdown (#48020) (#48107)
We were not closing repositories on Node shutdown. In production, this has little effect but in tests shutting down a node using `MockRepository` and is currently stuck in a simulated blocked-IO situation will only unblock when the node's threadpool is interrupted. This might in some edge cases (many snapshot threads and some CI slowness) result in the execution taking longer than 5s to release all the shard stores and thus we fail the assertion about unreleased shard stores in the internal test cluster. Regardless of tests, I think we should close repositories and release resources associated with them when closing a node and not just when removing a repository from the CS with running nodes as this behavior is really unexpected. Fixes #47689
1 parent f1bc3a0 commit 0ca7cc1

File tree

5 files changed

+30
-2
lines changed

5 files changed

+30
-2
lines changed

server/src/main/java/org/elasticsearch/node/Node.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,7 @@ public Node start() throws NodeValidationException {
668668
injector.getInstance(IndicesClusterStateService.class).start();
669669
injector.getInstance(SnapshotsService.class).start();
670670
injector.getInstance(SnapshotShardsService.class).start();
671+
injector.getInstance(RepositoriesService.class).start();
671672
injector.getInstance(SearchService.class).start();
672673
nodeService.getMonitorService().start();
673674

@@ -780,6 +781,7 @@ private Node stop() {
780781

781782
injector.getInstance(SnapshotsService.class).stop();
782783
injector.getInstance(SnapshotShardsService.class).stop();
784+
injector.getInstance(RepositoriesService.class).stop();
783785
// stop any changes happening as a result of cluster state changes
784786
injector.getInstance(IndicesClusterStateService.class).stop();
785787
// close discovery early to not react to pings anymore.

server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,19 @@
3737
import org.elasticsearch.cluster.node.DiscoveryNode;
3838
import org.elasticsearch.cluster.service.ClusterService;
3939
import org.elasticsearch.common.Strings;
40+
import org.elasticsearch.common.component.AbstractLifecycleComponent;
4041
import org.elasticsearch.common.regex.Regex;
4142
import org.elasticsearch.common.settings.Settings;
4243
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
44+
import org.elasticsearch.core.internal.io.IOUtils;
4345
import org.elasticsearch.snapshots.RestoreService;
4446
import org.elasticsearch.snapshots.SnapshotsService;
4547
import org.elasticsearch.threadpool.ThreadPool;
4648
import org.elasticsearch.transport.TransportService;
4749

50+
import java.io.IOException;
4851
import java.util.ArrayList;
52+
import java.util.Collection;
4953
import java.util.Collections;
5054
import java.util.HashMap;
5155
import java.util.List;
@@ -54,7 +58,7 @@
5458
/**
5559
* Service responsible for maintaining and providing access to snapshot repositories on nodes.
5660
*/
57-
public class RepositoriesService implements ClusterStateApplier {
61+
public class RepositoriesService extends AbstractLifecycleComponent implements ClusterStateApplier {
5862

5963
private static final Logger logger = LogManager.getLogger(RepositoriesService.class);
6064

@@ -95,6 +99,8 @@ public RepositoriesService(Settings settings, ClusterService clusterService, Tra
9599
* @param listener register repository listener
96100
*/
97101
public void registerRepository(final PutRepositoryRequest request, final ActionListener<ClusterStateUpdateResponse> listener) {
102+
assert lifecycle.started() : "Trying to register new repository but service is in state [" + lifecycle.state() + "]";
103+
98104
final RepositoryMetaData newRepositoryMetaData = new RepositoryMetaData(request.name(), request.type(), request.settings());
99105
validate(request.name());
100106

@@ -435,4 +441,23 @@ private void ensureRepositoryNotInUse(ClusterState clusterState, String reposito
435441
throw new IllegalStateException("trying to modify or unregister repository that is currently used ");
436442
}
437443
}
444+
445+
@Override
446+
protected void doStart() {
447+
448+
}
449+
450+
@Override
451+
protected void doStop() {
452+
453+
}
454+
455+
@Override
456+
protected void doClose() throws IOException {
457+
clusterService.removeApplier(this);
458+
final Collection<Repository> repos = new ArrayList<>();
459+
repos.addAll(internalRepositories.values());
460+
repos.addAll(repositories.values());
461+
IOUtils.close(repos);
462+
}
438463
}

server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ public void setUp() throws Exception {
6767
Collections.emptySet());
6868
repositoriesService = new RepositoriesService(Settings.EMPTY, mock(ClusterService.class),
6969
transportService, Collections.emptyMap(), Collections.singletonMap(TestRepository.TYPE, TestRepository::new), threadPool);
70+
repositoriesService.start();
7071
}
7172

7273
public void testRegisterInternalRepository() {

server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1186,6 +1186,7 @@ public void start(ClusterState initialState) {
11861186
transportService.acceptIncomingRequests();
11871187
snapshotsService.start();
11881188
snapshotShardsService.start();
1189+
repositoriesService.start();
11891190
final CoordinationState.PersistedState persistedState =
11901191
new InMemoryPersistedState(initialState.term(), stateForNode(initialState, node));
11911192
coordinator = new Coordinator(node.getName(), clusterService.getSettings(),

x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/slm/SLMSnapshotBlockingIntegTests.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,6 @@ protected Settings transportClientSettings() {
127127
return settings.build();
128128
}
129129

130-
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/47689")
131130
public void testSnapshotInProgress() throws Exception {
132131
final String indexName = "test";
133132
final String policyName = "test-policy";

0 commit comments

Comments
 (0)