Skip to content

Fix Broken Index Shard Snapshot File Preventing Snapshot Creation #41476

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 25, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@
import static java.util.Collections.unmodifiableMap;

/**
* Contains information about all snapshot for the given shard in repository
* Contains information about all snapshots for the given shard in repository
* <p>
* This class is used to find files that were already snapshotted and clear out files that no longer referenced by any
* snapshots
* snapshots.
*/
public class BlobStoreIndexShardSnapshots implements Iterable<SnapshotFiles>, ToXContentFragment {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,27 +123,28 @@
* <pre>
* {@code
* STORE_ROOT
* |- index-N - list of all snapshot ids and the indices belonging to each snapshot, N is the generation of the file
* |- index-N - JSON serialized {@link RepositoryData} containing a list of all snapshot ids and the indices belonging to
* | each snapshot, N is the generation of the file
* |- index.latest - contains the numeric value of the latest generation of the index file (i.e. N from above)
* |- incompatible-snapshots - list of all snapshot ids that are no longer compatible with the current version of the cluster
* |- snap-20131010 - JSON serialized Snapshot for snapshot "20131010"
* |- meta-20131010.dat - JSON serialized MetaData for snapshot "20131010" (includes only global metadata)
* |- snap-20131011 - JSON serialized Snapshot for snapshot "20131011"
* |- meta-20131011.dat - JSON serialized MetaData for snapshot "20131011"
* |- snap-20131010.dat - SMILE serialized {@link SnapshotInfo} for snapshot "20131010"
* |- meta-20131010.dat - SMILE serialized {@link MetaData} for snapshot "20131010" (includes only global metadata)
* |- snap-20131011.dat - SMILE serialized {@link SnapshotInfo} for snapshot "20131011"
* |- meta-20131011.dat - SMILE serialized {@link MetaData} for snapshot "20131011"
* .....
* |- indices/ - data for all indices
* |- Ac1342-B_x/ - data for index "foo" which was assigned the unique id of Ac1342-B_x in the repository
* | |- meta-20131010.dat - JSON Serialized IndexMetaData for index "foo"
* | |- meta-20131010.dat - JSON Serialized {@link IndexMetaData} for index "foo"
* | |- 0/ - data for shard "0" of index "foo"
* | | |- __1 \
* | | |- __2 |
* | | |- __3 |- files from different segments see snapshot-* for their mappings to real segment files
* | | |- __4 |
* | | |- __5 /
* | | |- __1 \ (files with numeric names were created by older ES versions)
* | | |- __2 |
* | | |- __VPO5oDMVT5y4Akv8T_AO_A |- files from different segments see snap-* for their mappings to real segment files
* | | |- __1gbJy18wS_2kv1qI7FgKuQ |
* | | |- __R8JvZAHlSMyMXyZc2SS8Zg /
* | | .....
* | | |- snap-20131010.dat - JSON serialized BlobStoreIndexShardSnapshot for snapshot "20131010"
* | | |- snap-20131011.dat - JSON serialized BlobStoreIndexShardSnapshot for snapshot "20131011"
* | | |- list-123 - JSON serialized BlobStoreIndexShardSnapshot for snapshot "20131011"
* | | |- snap-20131010.dat - SMILE serialized {@link BlobStoreIndexShardSnapshot} for snapshot "20131010"
* | | |- snap-20131011.dat - SMILE serialized {@link BlobStoreIndexShardSnapshot} for snapshot "20131011"
* | | |- index-123 - SMILE serialized {@link BlobStoreIndexShardSnapshots} for the shard
* | |
* | |- 1/ - data for shard "1" of index "foo"
* | | |- __1
Expand Down Expand Up @@ -1142,7 +1143,7 @@ protected Tuple<BlobStoreIndexShardSnapshots, Integer> buildBlobStoreIndexShardS
logger.warn(() -> new ParameterizedMessage("failed to read commit point [{}]", name), e);
}
}
return new Tuple<>(new BlobStoreIndexShardSnapshots(snapshots), -1);
return new Tuple<>(new BlobStoreIndexShardSnapshots(snapshots), latest);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2963,6 +2963,108 @@ public void testRestoreSnapshotWithCorruptedIndexMetadata() throws Exception {
assertAcked(client().admin().cluster().prepareDeleteSnapshot("test-repo", snapshotInfo.snapshotId().getName()).get());
}

/**
* Tests that a shard snapshot with a corrupted shard index file can still be used for restore and incremental snapshots.
*/
public void testSnapshotWithCorruptedShardIndexFile() throws Exception {
final Client client = client();
final Path repo = randomRepoPath();
final String indexName = "test-idx";
final int nDocs = randomIntBetween(1, 10);

logger.info("--> creating index [{}] with [{}] documents in it", indexName, nDocs);
assertAcked(prepareCreate(indexName).setSettings(Settings.builder()
.put(SETTING_NUMBER_OF_SHARDS, 1).put(SETTING_NUMBER_OF_REPLICAS, 0)));

final IndexRequestBuilder[] documents = new IndexRequestBuilder[nDocs];
for (int j = 0; j < nDocs; j++) {
documents[j] = client.prepareIndex(indexName, "_doc").setSource("foo", "bar");
}
indexRandom(true, documents);
flushAndRefresh();

logger.info("--> creating repository");
assertAcked(client().admin().cluster().preparePutRepository("test-repo")
.setType("fs")
.setSettings(Settings.builder()
.put("location", repo)));

final String snapshot1 = "test-snap-1";
logger.info("--> creating snapshot [{}]", snapshot1);
final SnapshotInfo snapshotInfo = client().admin().cluster().prepareCreateSnapshot("test-repo", snapshot1)
.setWaitForCompletion(true)
.get()
.getSnapshotInfo();
assertThat(snapshotInfo.failedShards(), equalTo(0));
assertThat(snapshotInfo.successfulShards(), equalTo(snapshotInfo.totalShards()));
assertThat(snapshotInfo.indices(), hasSize(1));

RepositoriesService service = internalCluster().getInstance(RepositoriesService.class, internalCluster().getMasterName());
Repository repository = service.repository("test-repo");

final RepositoryData repositoryData = getRepositoryData(repository);
final Map<String, IndexId> indexIds = repositoryData.getIndices();
assertThat(indexIds.size(), equalTo(1));

final IndexId corruptedIndex = indexIds.get(indexName);
final Path shardIndexFile = repo.resolve("indices")
.resolve(corruptedIndex.getId()).resolve("0")
.resolve("index-0");

logger.info("--> truncating shard index file [{}]", shardIndexFile);
try (SeekableByteChannel outChan = Files.newByteChannel(shardIndexFile, StandardOpenOption.WRITE)) {
outChan.truncate(randomInt(10));
}

logger.info("--> verifying snapshot state for [{}]", snapshot1);
List<SnapshotInfo> snapshotInfos = client().admin().cluster().prepareGetSnapshots("test-repo").get().getSnapshots();
assertThat(snapshotInfos.size(), equalTo(1));
assertThat(snapshotInfos.get(0).state(), equalTo(SnapshotState.SUCCESS));
assertThat(snapshotInfos.get(0).snapshotId().getName(), equalTo(snapshot1));

logger.info("--> deleting index [{}]", indexName);
assertAcked(client().admin().indices().prepareDelete(indexName));

logger.info("--> restoring snapshot [{}]", snapshot1);
client().admin().cluster().prepareRestoreSnapshot("test-repo", snapshot1)
.setRestoreGlobalState(randomBoolean())
.setWaitForCompletion(true)
.get();
ensureGreen();

assertHitCount(client().prepareSearch(indexName).setSize(0).get(), nDocs);

logger.info("--> indexing [{}] more documents into [{}]", nDocs, indexName);
for (int j = 0; j < nDocs; j++) {
documents[j] = client.prepareIndex(indexName, "_doc").setSource("foo2", "bar2");
}
indexRandom(true, documents);

final String snapshot2 = "test-snap-2";
logger.info("--> creating snapshot [{}]", snapshot2);
final SnapshotInfo snapshotInfo2 = client().admin().cluster().prepareCreateSnapshot("test-repo", snapshot2)
.setWaitForCompletion(true)
.get()
.getSnapshotInfo();
assertThat(snapshotInfo2.state(), equalTo(SnapshotState.SUCCESS));
assertThat(snapshotInfo2.failedShards(), equalTo(0));
assertThat(snapshotInfo2.successfulShards(), equalTo(snapshotInfo.totalShards()));
assertThat(snapshotInfo2.indices(), hasSize(1));

logger.info("--> deleting index [{}]", indexName);
assertAcked(client().admin().indices().prepareDelete(indexName));

logger.info("--> restoring snapshot [{}]", snapshot2);
client().admin().cluster().prepareRestoreSnapshot("test-repo", snapshot2)
.setRestoreGlobalState(randomBoolean())
.setWaitForCompletion(true)
.get();

ensureGreen();

assertHitCount(client().prepareSearch(indexName).setSize(0).get(), 2 * nDocs);
}

public void testCannotCreateSnapshotsWithSameName() throws Exception {
final String repositoryName = "test-repo";
final String snapshotName = "test-snap";
Expand Down