Skip to content

Stricter failure handling in TransportGetSnapshotsAction #107191

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions docs/changelog/107191.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
pr: 107191
summary: Stricter failure handling in multi-repo get-snapshots request handling
area: Snapshot/Restore
type: bug
issues: []
highlight:
title: Stricter failure handling in multi-repo get-snapshots request handling
body: |
If a multi-repo get-snapshots request encounters a failure in one of the
targeted repositories then earlier versions of Elasticsearch would proceed
as if the faulty repository did not exist, except for a per-repository
failure report in a separate section of the response body. This makes it
impossible to paginate the results properly in the presence of failures. In
versions 8.15.0 and later this API's failure handling behaviour has been
made stricter, reporting an overall failure if any targeted repository's
contents cannot be listed.
notable: true
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,8 @@

import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.in;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.is;

public class GetSnapshotsIT extends AbstractSnapshotIntegTestCase {
Expand Down Expand Up @@ -314,6 +312,7 @@ public void testExcludePatterns() throws Exception {
assertThat(
clusterAdmin().prepareGetSnapshots(TEST_REQUEST_TIMEOUT, matchAllPattern())
.setSnapshots("non-existing*", otherPrefixSnapshot1, "-o*")
.setIgnoreUnavailable(true)
.get()
.getSnapshots(),
empty()
Expand Down Expand Up @@ -586,12 +585,17 @@ public void testRetrievingSnapshotsWhenRepositoryIsMissing() throws Exception {
final List<String> snapshotNames = createNSnapshots(repoName, randomIntBetween(1, 10));
snapshotNames.sort(String::compareTo);

final GetSnapshotsResponse response = clusterAdmin().prepareGetSnapshots(TEST_REQUEST_TIMEOUT, repoName, missingRepoName)
final var oneRepoFuture = clusterAdmin().prepareGetSnapshots(TEST_REQUEST_TIMEOUT, repoName, missingRepoName)
.setSort(SnapshotSortKey.NAME)
.get();
assertThat(response.getSnapshots().stream().map(info -> info.snapshotId().getName()).toList(), equalTo(snapshotNames));
assertTrue(response.getFailures().containsKey(missingRepoName));
assertThat(response.getFailures().get(missingRepoName), instanceOf(RepositoryMissingException.class));
.setIgnoreUnavailable(randomBoolean())
.execute();
expectThrows(RepositoryMissingException.class, oneRepoFuture::actionGet);

final var multiRepoFuture = clusterAdmin().prepareGetSnapshots(TEST_REQUEST_TIMEOUT, repoName, missingRepoName)
.setSort(SnapshotSortKey.NAME)
.setIgnoreUnavailable(randomBoolean())
.execute();
expectThrows(RepositoryMissingException.class, multiRepoFuture::actionGet);
}

// Create a snapshot that is guaranteed to have a unique start time and duration for tests around ordering by either.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.not;
import static org.hamcrest.Matchers.oneOf;
Expand Down Expand Up @@ -395,16 +394,13 @@ public void testGetSnapshotsMultipleRepos() throws Exception {
}

logger.info("--> specify all snapshot names with ignoreUnavailable=false");
GetSnapshotsResponse getSnapshotsResponse2 = client.admin()
final var failingFuture = client.admin()
.cluster()
.prepareGetSnapshots(TEST_REQUEST_TIMEOUT, randomFrom("_all", "repo*"))
.setIgnoreUnavailable(false)
.setSnapshots(snapshotList.toArray(new String[0]))
.get();

for (String repo : repoList) {
assertThat(getSnapshotsResponse2.getFailures().get(repo), instanceOf(SnapshotMissingException.class));
}
.execute();
expectThrows(SnapshotMissingException.class, failingFuture::actionGet);

logger.info("--> specify all snapshot names with ignoreUnavailable=true");
GetSnapshotsResponse getSnapshotsResponse3 = client.admin()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.search.sort.SortOrder;
Expand Down Expand Up @@ -220,13 +219,6 @@ public String[] policies() {
return policies;
}

public boolean isSingleRepositoryRequest() {
return repositories.length == 1
&& repositories[0] != null
&& "_all".equals(repositories[0]) == false
&& Regex.isSimpleMatchPattern(repositories[0]) == false;
}

/**
* Returns the names of the snapshots.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.ChunkedToXContentObject;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.UpdateForV9;
import org.elasticsearch.snapshots.SnapshotInfo;
import org.elasticsearch.xcontent.ToXContent;

Expand All @@ -33,6 +34,7 @@ public class GetSnapshotsResponse extends ActionResponse implements ChunkedToXCo

private final List<SnapshotInfo> snapshots;

@UpdateForV9 // always empty, can be dropped
private final Map<String, ElasticsearchException> failures;

@Nullable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

package org.elasticsearch.action.admin.cluster.snapshots.get;

import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.ActionType;
import org.elasticsearch.action.support.ActionFilters;
Expand Down Expand Up @@ -120,10 +119,14 @@ protected void masterOperation(
) {
assert task instanceof CancellableTask : task + " not cancellable";

final var resolvedRepositories = ResolvedRepositories.resolve(state, request.repositories());
if (resolvedRepositories.hasMissingRepositories()) {
throw new RepositoryMissingException(String.join(", ", resolvedRepositories.missing()));
}

new GetSnapshotsOperation(
(CancellableTask) task,
ResolvedRepositories.resolve(state, request.repositories()),
request.isSingleRepositoryRequest() == false,
resolvedRepositories.repositoryMetadata(),
request.snapshots(),
request.ignoreUnavailable(),
request.policies(),
Expand Down Expand Up @@ -151,7 +154,6 @@ private class GetSnapshotsOperation {

// repositories
private final List<RepositoryMetadata> repositories;
private final boolean isMultiRepoRequest;

// snapshots selection
private final SnapshotNamePredicate snapshotNamePredicate;
Expand Down Expand Up @@ -179,7 +181,6 @@ private class GetSnapshotsOperation {
private final GetSnapshotInfoExecutor getSnapshotInfoExecutor;

// results
private final Map<String, ElasticsearchException> failuresByRepository = ConcurrentCollections.newConcurrentMap();
private final Queue<List<SnapshotInfo>> allSnapshotInfos = ConcurrentCollections.newQueue();

/**
Expand All @@ -195,8 +196,7 @@ private class GetSnapshotsOperation {

GetSnapshotsOperation(
CancellableTask cancellableTask,
ResolvedRepositories resolvedRepositories,
boolean isMultiRepoRequest,
List<RepositoryMetadata> repositories,
String[] snapshots,
boolean ignoreUnavailable,
String[] policies,
Expand All @@ -211,8 +211,7 @@ private class GetSnapshotsOperation {
boolean indices
) {
this.cancellableTask = cancellableTask;
this.repositories = resolvedRepositories.repositoryMetadata();
this.isMultiRepoRequest = isMultiRepoRequest;
this.repositories = repositories;
this.ignoreUnavailable = ignoreUnavailable;
this.sortBy = sortBy;
this.order = order;
Expand All @@ -232,10 +231,6 @@ private class GetSnapshotsOperation {
threadPool.info(ThreadPool.Names.SNAPSHOT_META).getMax(),
cancellableTask::isCancelled
);

for (final var missingRepo : resolvedRepositories.missing()) {
failuresByRepository.put(missingRepo, new RepositoryMissingException(missingRepo));
}
}

void getMultipleReposSnapshotInfo(ActionListener<GetSnapshotsResponse> listener) {
Expand All @@ -249,6 +244,10 @@ void getMultipleReposSnapshotInfo(ActionListener<GetSnapshotsResponse> listener)
continue;
}

if (listeners.isFailing()) {
return;
}

SubscribableListener

.<RepositoryData>newForked(repositoryDataListener -> {
Expand All @@ -261,14 +260,7 @@ void getMultipleReposSnapshotInfo(ActionListener<GetSnapshotsResponse> listener)

.<Void>andThen((l, repositoryData) -> loadSnapshotInfos(repoName, repositoryData, l))

.addListener(listeners.acquire().delegateResponse((l, e) -> {
if (isMultiRepoRequest && e instanceof ElasticsearchException elasticsearchException) {
failuresByRepository.put(repoName, elasticsearchException);
l.onResponse(null);
} else {
l.onFailure(e);
}
}));
.addListener(listeners.acquire());
}
}
})
Expand Down Expand Up @@ -503,7 +495,7 @@ private GetSnapshotsResponse buildResponse() {
}
return new GetSnapshotsResponse(
snapshotInfos,
failuresByRepository,
null,
remaining > 0 ? sortBy.encodeAfterQueryParam(snapshotInfos.get(snapshotInfos.size() - 1)) : null,
totalCount.get(),
remaining
Expand Down