Skip to content

Commit 7915b5f

Browse files
committed
[Tests] Add debug information to CorruptedFileIT
This test failed but the cause is not obvious. This commit adds more debug logging traces so that if it reproduces we could gather more information. Related #30577
1 parent 8a89306 commit 7915b5f

File tree

2 files changed

+14
-7
lines changed

2 files changed

+14
-7
lines changed

server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java

+1-3
Original file line numberDiff line numberDiff line change
@@ -1147,6 +1147,7 @@ public void snapshot(final IndexCommit snapshotIndexCommit) {
11471147
// TODO apparently we don't use the MetadataSnapshot#.recoveryDiff(...) here but we should
11481148
final Collection<String> fileNames;
11491149
try {
1150+
logger.trace("[{}] [{}] Loading store metadata using index commit [{}]", shardId, snapshotId, snapshotIndexCommit);
11501151
metadata = store.getMetadata(snapshotIndexCommit);
11511152
fileNames = snapshotIndexCommit.getFileNames();
11521153
} catch (IOException e) {
@@ -1242,9 +1243,6 @@ public void snapshot(final IndexCommit snapshotIndexCommit) {
12421243

12431244
/**
12441245
* Snapshot individual file
1245-
* <p>
1246-
* This is asynchronous method. Upon completion of the operation latch is getting counted down and any failures are
1247-
* added to the {@code failures} list
12481246
*
12491247
* @param fileInfo file to be snapshotted
12501248
*/

server/src/test/java/org/elasticsearch/index/store/CorruptedFileIT.java

+13-4
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,7 @@ protected void sendRequest(Connection connection, long requestId, String action,
470470
* TODO once checksum verification on snapshotting is implemented this test needs to be fixed or split into several
471471
* parts... We should also corrupt files on the actual snapshot and check that we don't restore the corrupted shard.
472472
*/
473-
@TestLogging("org.elasticsearch.monitor.fs:DEBUG")
473+
@TestLogging("org.elasticsearch.repositories:TRACE,org.elasticsearch.snapshots:TRACE")
474474
public void testCorruptFileThenSnapshotAndRestore() throws ExecutionException, InterruptedException, IOException {
475475
int numDocs = scaledRandomIntBetween(100, 1000);
476476
internalCluster().ensureAtLeastNumDataNodes(2);
@@ -494,6 +494,7 @@ public void testCorruptFileThenSnapshotAndRestore() throws ExecutionException, I
494494
assertHitCount(countResponse, numDocs);
495495

496496
ShardRouting shardRouting = corruptRandomPrimaryFile(false);
497+
logger.info("--> shard {} has a corrupted file", shardRouting);
497498
// we don't corrupt segments.gen since S/R doesn't snapshot this file
498499
// the other problem here why we can't corrupt segments.X files is that the snapshot flushes again before
499500
// it snapshots and that will write a new segments.X+1 file
@@ -504,9 +505,12 @@ public void testCorruptFileThenSnapshotAndRestore() throws ExecutionException, I
504505
.put("compress", randomBoolean())
505506
.put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES)));
506507
logger.info("--> snapshot");
507-
CreateSnapshotResponse createSnapshotResponse = client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(true).setIndices("test").get();
508-
assertThat(createSnapshotResponse.getSnapshotInfo().state(), equalTo(SnapshotState.PARTIAL));
509-
logger.info("failed during snapshot -- maybe SI file got corrupted");
508+
final CreateSnapshotResponse createSnapshotResponse = client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap")
509+
.setWaitForCompletion(true)
510+
.setIndices("test")
511+
.get();
512+
final SnapshotState snapshotState = createSnapshotResponse.getSnapshotInfo().state();
513+
logger.info("--> snapshot terminated with state " + snapshotState);
510514
final List<Path> files = listShardFiles(shardRouting);
511515
Path corruptedFile = null;
512516
for (Path file : files) {
@@ -515,6 +519,11 @@ public void testCorruptFileThenSnapshotAndRestore() throws ExecutionException, I
515519
break;
516520
}
517521
}
522+
if (snapshotState != SnapshotState.PARTIAL) {
523+
logger.info("--> listing shard files for investigation");
524+
files.forEach(f -> logger.info("path: {}", f.toAbsolutePath()));
525+
}
526+
assertThat(createSnapshotResponse.getSnapshotInfo().state(), equalTo(SnapshotState.PARTIAL));
518527
assertThat(corruptedFile, notNullValue());
519528
}
520529

0 commit comments

Comments
 (0)