Skip to content

Commit 1fca213

Browse files
committed
Do not fail snapshot when deleting a missing snapshotted file (#30332)
When deleting or creating a snapshot for a given shard, elasticsearch usually starts by listing all the existing snapshotted files in the repository. Then it computes a diff and deletes the snapshotted files that are not needed anymore. During this deletion, an exception is thrown if the file to be deleted does not exist anymore. This behavior is challenging with cloud based repository implementations like S3 where a file that has been deleted can still appear in the bucket for few seconds/minutes (because the deletion can take some time to be fully replicated on S3). If the deleted file appears in the listing of files, then the following deletion will fail with a NoSuchFileException and the snapshot will be partially created/deleted. This pull request makes the deletion of these files a bit less strict, ie not failing if the file we want to delete does not exist anymore. It introduces a new BlobContainer.deleteIgnoringIfNotExists() method that can be used at some specific places where not failing when deleting a file is considered harmless. Closes #28322
1 parent e33370a commit 1fca213

File tree

6 files changed

+79
-52
lines changed

6 files changed

+79
-52
lines changed

docs/CHANGELOG.asciidoc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,8 @@ settings. See <<email-notification-settings>>.
219219
node action.
220220
* Refrains from appending a question mark to an HTTP request if no parameters
221221
are used.
222+
Fix NPE when CumulativeSum agg encounters null value/empty bucket ({pull}29641[#29641])
223+
Do not fail snapshot when deleting a missing snapshotted file ({pull}30332[#30332])
222224

223225
//[float]
224226
//=== Regressions

server/src/main/java/org/elasticsearch/common/blobstore/BlobContainer.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ public interface BlobContainer {
7575
void writeBlob(String blobName, InputStream inputStream, long blobSize) throws IOException;
7676

7777
/**
78-
* Deletes a blob with giving name, if the blob exists. If the blob does not exist, this method throws an IOException.
78+
* Deletes a blob with giving name, if the blob exists. If the blob does not exist,
79+
* this method throws a NoSuchFileException.
7980
*
8081
* @param blobName
8182
* The name of the blob to delete.
@@ -84,6 +85,21 @@ public interface BlobContainer {
8485
*/
8586
void deleteBlob(String blobName) throws IOException;
8687

88+
/**
89+
* Deletes a blob with giving name, ignoring if the blob does not exist.
90+
*
91+
* @param blobName
92+
* The name of the blob to delete.
93+
* @throws IOException if the blob exists but could not be deleted.
94+
*/
95+
default void deleteBlobIgnoringIfNotExists(String blobName) throws IOException {
96+
try {
97+
deleteBlob(blobName);
98+
} catch (final NoSuchFileException ignored) {
99+
// This exception is ignored
100+
}
101+
}
102+
87103
/**
88104
* Lists all blobs in the container.
89105
*

server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshots.java

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -102,13 +102,6 @@ private BlobStoreIndexShardSnapshots(Map<String, FileInfo> files, List<SnapshotF
102102
this.physicalFiles = unmodifiableMap(mapBuilder);
103103
}
104104

105-
private BlobStoreIndexShardSnapshots() {
106-
shardSnapshots = Collections.emptyList();
107-
files = Collections.emptyMap();
108-
physicalFiles = Collections.emptyMap();
109-
}
110-
111-
112105
/**
113106
* Returns list of snapshots
114107
*

server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreFormat.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ public T read(BlobContainer blobContainer, String name) throws IOException {
9090
return readBlob(blobContainer, blobName);
9191
}
9292

93-
9493
/**
9594
* Deletes obj in the blob container
9695
*/

server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java

Lines changed: 44 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@
120120

121121
import static java.util.Collections.emptyMap;
122122
import static java.util.Collections.unmodifiableMap;
123+
import static org.elasticsearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot.FileInfo.canonicalName;
123124

124125
/**
125126
* BlobStore - based implementation of Snapshot Repository
@@ -797,7 +798,7 @@ private void writeAtomic(final String blobName, final BytesReference bytesRef) t
797798
} catch (IOException ex) {
798799
// temporary blob creation or move failed - try cleaning up
799800
try {
800-
snapshotsBlobContainer.deleteBlob(tempBlobName);
801+
snapshotsBlobContainer.deleteBlobIgnoringIfNotExists(tempBlobName);
801802
} catch (IOException e) {
802803
ex.addSuppressed(e);
803804
}
@@ -932,13 +933,13 @@ public void delete() {
932933
}
933934
}
934935
// finalize the snapshot and rewrite the snapshot index with the next sequential snapshot index
935-
finalize(newSnapshotsList, fileListGeneration + 1, blobs);
936+
finalize(newSnapshotsList, fileListGeneration + 1, blobs, "snapshot deletion [" + snapshotId + "]");
936937
}
937938

938939
/**
939940
* Loads information about shard snapshot
940941
*/
941-
public BlobStoreIndexShardSnapshot loadSnapshot() {
942+
BlobStoreIndexShardSnapshot loadSnapshot() {
942943
try {
943944
return indexShardSnapshotFormat(version).read(blobContainer, snapshotId.getUUID());
944945
} catch (IOException ex) {
@@ -947,54 +948,57 @@ public BlobStoreIndexShardSnapshot loadSnapshot() {
947948
}
948949

949950
/**
950-
* Removes all unreferenced files from the repository and writes new index file
951+
* Writes a new index file for the shard and removes all unreferenced files from the repository.
951952
*
952-
* We need to be really careful in handling index files in case of failures to make sure we have index file that
953-
* points to files that were deleted.
953+
* We need to be really careful in handling index files in case of failures to make sure we don't
954+
* have index file that points to files that were deleted.
954955
*
955-
*
956-
* @param snapshots list of active snapshots in the container
956+
* @param snapshots list of active snapshots in the container
957957
* @param fileListGeneration the generation number of the snapshot index file
958-
* @param blobs list of blobs in the container
958+
* @param blobs list of blobs in the container
959+
* @param reason a reason explaining why the shard index file is written
959960
*/
960-
protected void finalize(List<SnapshotFiles> snapshots, int fileListGeneration, Map<String, BlobMetaData> blobs) {
961-
BlobStoreIndexShardSnapshots newSnapshots = new BlobStoreIndexShardSnapshots(snapshots);
962-
// delete old index files first
963-
for (String blobName : blobs.keySet()) {
964-
if (indexShardSnapshotsFormat.isTempBlobName(blobName) || blobName.startsWith(SNAPSHOT_INDEX_PREFIX)) {
965-
try {
966-
blobContainer.deleteBlob(blobName);
967-
} catch (IOException e) {
968-
// We cannot delete index file - this is fatal, we cannot continue, otherwise we might end up
969-
// with references to non-existing files
970-
throw new IndexShardSnapshotFailedException(shardId, "error deleting index file ["
971-
+ blobName + "] during cleanup", e);
972-
}
961+
protected void finalize(final List<SnapshotFiles> snapshots,
962+
final int fileListGeneration,
963+
final Map<String, BlobMetaData> blobs,
964+
final String reason) {
965+
final String indexGeneration = Integer.toString(fileListGeneration);
966+
final String currentIndexGen = indexShardSnapshotsFormat.blobName(indexGeneration);
967+
968+
final BlobStoreIndexShardSnapshots updatedSnapshots = new BlobStoreIndexShardSnapshots(snapshots);
969+
try {
970+
// If we deleted all snapshots, we don't need to create a new index file
971+
if (snapshots.size() > 0) {
972+
indexShardSnapshotsFormat.writeAtomic(updatedSnapshots, blobContainer, indexGeneration);
973973
}
974-
}
975974

976-
// now go over all the blobs, and if they don't exist in a snapshot, delete them
977-
for (String blobName : blobs.keySet()) {
978-
// delete unused files
979-
if (blobName.startsWith(DATA_BLOB_PREFIX)) {
980-
if (newSnapshots.findNameFile(BlobStoreIndexShardSnapshot.FileInfo.canonicalName(blobName)) == null) {
975+
// Delete old index files
976+
for (final String blobName : blobs.keySet()) {
977+
if (indexShardSnapshotsFormat.isTempBlobName(blobName) || blobName.startsWith(SNAPSHOT_INDEX_PREFIX)) {
981978
try {
982-
blobContainer.deleteBlob(blobName);
979+
blobContainer.deleteBlobIgnoringIfNotExists(blobName);
983980
} catch (IOException e) {
984-
// TODO: don't catch and let the user handle it?
985-
logger.debug(() -> new ParameterizedMessage("[{}] [{}] error deleting blob [{}] during cleanup", snapshotId, shardId, blobName), e);
981+
logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to delete index blob [{}] during finalization",
982+
snapshotId, shardId, blobName), e);
983+
throw e;
986984
}
987985
}
988986
}
989-
}
990987

991-
// If we deleted all snapshots - we don't need to create the index file
992-
if (snapshots.size() > 0) {
993-
try {
994-
indexShardSnapshotsFormat.writeAtomic(newSnapshots, blobContainer, Integer.toString(fileListGeneration));
995-
} catch (IOException e) {
996-
throw new IndexShardSnapshotFailedException(shardId, "Failed to write file list", e);
988+
// Delete all blobs that don't exist in a snapshot
989+
for (final String blobName : blobs.keySet()) {
990+
if (blobName.startsWith(DATA_BLOB_PREFIX) && (updatedSnapshots.findNameFile(canonicalName(blobName)) == null)) {
991+
try {
992+
blobContainer.deleteBlobIgnoringIfNotExists(blobName);
993+
} catch (IOException e) {
994+
logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to delete data blob [{}] during finalization",
995+
snapshotId, shardId, blobName), e);
996+
}
997+
}
997998
}
999+
} catch (IOException e) {
1000+
String message = "Failed to finalize " + reason + " with shard index [" + currentIndexGen + "]";
1001+
throw new IndexShardSnapshotFailedException(shardId, message, e);
9981002
}
9991003
}
10001004

@@ -1020,7 +1024,7 @@ protected long findLatestFileNameGeneration(Map<String, BlobMetaData> blobs) {
10201024
if (!name.startsWith(DATA_BLOB_PREFIX)) {
10211025
continue;
10221026
}
1023-
name = BlobStoreIndexShardSnapshot.FileInfo.canonicalName(name);
1027+
name = canonicalName(name);
10241028
try {
10251029
long currentGen = Long.parseLong(name.substring(DATA_BLOB_PREFIX.length()), Character.MAX_RADIX);
10261030
if (currentGen > generation) {
@@ -1234,7 +1238,7 @@ public void snapshot(final IndexCommit snapshotIndexCommit) {
12341238
newSnapshotsList.add(point);
12351239
}
12361240
// finalize the snapshot and rewrite the snapshot index with the next sequential snapshot index
1237-
finalize(newSnapshotsList, fileListGeneration + 1, blobs);
1241+
finalize(newSnapshotsList, fileListGeneration + 1, blobs, "snapshot creation [" + snapshotId + "]");
12381242
snapshotStatus.moveToDone(System.currentTimeMillis());
12391243

12401244
}

test/framework/src/main/java/org/elasticsearch/repositories/ESBlobStoreContainerTestCase.java

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,14 @@
2929

3030
import java.io.IOException;
3131
import java.io.InputStream;
32+
import java.nio.file.NoSuchFileException;
3233
import java.util.Arrays;
3334
import java.util.HashMap;
3435
import java.util.Map;
3536

36-
import static org.elasticsearch.repositories.ESBlobStoreTestCase.writeRandomBlob;
3737
import static org.elasticsearch.repositories.ESBlobStoreTestCase.randomBytes;
3838
import static org.elasticsearch.repositories.ESBlobStoreTestCase.readBlobFully;
39+
import static org.elasticsearch.repositories.ESBlobStoreTestCase.writeRandomBlob;
3940
import static org.hamcrest.CoreMatchers.equalTo;
4041
import static org.hamcrest.CoreMatchers.notNullValue;
4142

@@ -116,15 +117,27 @@ public void testDeleteBlob() throws IOException {
116117
try (BlobStore store = newBlobStore()) {
117118
final String blobName = "foobar";
118119
final BlobContainer container = store.blobContainer(new BlobPath());
119-
expectThrows(IOException.class, () -> container.deleteBlob(blobName));
120+
expectThrows(NoSuchFileException.class, () -> container.deleteBlob(blobName));
120121

121122
byte[] data = randomBytes(randomIntBetween(10, scaledRandomIntBetween(1024, 1 << 16)));
122123
final BytesArray bytesArray = new BytesArray(data);
123124
writeBlob(container, blobName, bytesArray);
124125
container.deleteBlob(blobName); // should not raise
125126

126127
// blob deleted, so should raise again
127-
expectThrows(IOException.class, () -> container.deleteBlob(blobName));
128+
expectThrows(NoSuchFileException.class, () -> container.deleteBlob(blobName));
129+
}
130+
}
131+
132+
public void testDeleteBlobIgnoringIfNotExists() throws IOException {
133+
try (BlobStore store = newBlobStore()) {
134+
BlobPath blobPath = new BlobPath();
135+
if (randomBoolean()) {
136+
blobPath = blobPath.add(randomAlphaOfLengthBetween(1, 10));
137+
}
138+
139+
final BlobContainer container = store.blobContainer(blobPath);
140+
container.deleteBlobIgnoringIfNotExists("does_not_exist");
128141
}
129142
}
130143

0 commit comments

Comments
 (0)