[Tests] Add debug information to CorruptedFileIT

tlrx · tlrx · commit 7915b5f7aaa1 · 2018-05-17T10:57:25.000+02:00
This test failed but the cause is not obvious. This commit adds more debug logging traces so that if it reproduces we could gather more information. Related #30577
diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java
@@ -1147,6 +1147,7 @@ public void snapshot(final IndexCommit snapshotIndexCommit) {
                 // TODO apparently we don't use the MetadataSnapshot#.recoveryDiff(...) here but we should
                 final Collection<String> fileNames;
                 try {
+                    logger.trace("[{}] [{}] Loading store metadata using index commit [{}]", shardId, snapshotId, snapshotIndexCommit);
                     metadata = store.getMetadata(snapshotIndexCommit);
                     fileNames = snapshotIndexCommit.getFileNames();
                 } catch (IOException e) {
@@ -1242,9 +1243,6 @@ public void snapshot(final IndexCommit snapshotIndexCommit) {
 
         /**
          * Snapshot individual file
-         * <p>
-         * This is asynchronous method. Upon completion of the operation latch is getting counted down and any failures are
-         * added to the {@code failures} list
          *
          * @param fileInfo file to be snapshotted
          */
diff --git a/server/src/test/java/org/elasticsearch/index/store/CorruptedFileIT.java b/server/src/test/java/org/elasticsearch/index/store/CorruptedFileIT.java
@@ -470,7 +470,7 @@ protected void sendRequest(Connection connection, long requestId, String action,
      * TODO once checksum verification on snapshotting is implemented this test needs to be fixed or split into several
      * parts... We should also corrupt files on the actual snapshot and check that we don't restore the corrupted shard.
      */
-    @TestLogging("org.elasticsearch.monitor.fs:DEBUG")
+    @TestLogging("org.elasticsearch.repositories:TRACE,org.elasticsearch.snapshots:TRACE")
     public void testCorruptFileThenSnapshotAndRestore() throws ExecutionException, InterruptedException, IOException {
         int numDocs = scaledRandomIntBetween(100, 1000);
         internalCluster().ensureAtLeastNumDataNodes(2);
@@ -494,6 +494,7 @@ public void testCorruptFileThenSnapshotAndRestore() throws ExecutionException, I
         assertHitCount(countResponse, numDocs);
 
         ShardRouting shardRouting = corruptRandomPrimaryFile(false);
+        logger.info("--> shard {} has a corrupted file", shardRouting);
         // we don't corrupt segments.gen since S/R doesn't snapshot this file
         // the other problem here why we can't corrupt segments.X files is that the snapshot flushes again before
         // it snapshots and that will write a new segments.X+1 file
@@ -504,9 +505,12 @@ public void testCorruptFileThenSnapshotAndRestore() throws ExecutionException, I
                         .put("compress", randomBoolean())
                         .put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES)));
         logger.info("--> snapshot");
-        CreateSnapshotResponse createSnapshotResponse = client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(true).setIndices("test").get();
-        assertThat(createSnapshotResponse.getSnapshotInfo().state(), equalTo(SnapshotState.PARTIAL));
-        logger.info("failed during snapshot -- maybe SI file got corrupted");
+        final CreateSnapshotResponse createSnapshotResponse = client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap")
+                                                                                        .setWaitForCompletion(true)
+                                                                                        .setIndices("test")
+                                                                                        .get();
+        final SnapshotState snapshotState = createSnapshotResponse.getSnapshotInfo().state();
+        logger.info("--> snapshot terminated with state " + snapshotState);
         final List<Path> files = listShardFiles(shardRouting);
         Path corruptedFile = null;
         for (Path file : files) {
@@ -515,6 +519,11 @@ public void testCorruptFileThenSnapshotAndRestore() throws ExecutionException, I
                 break;
             }
         }
+        if (snapshotState != SnapshotState.PARTIAL) {
+            logger.info("--> listing shard files for investigation");
+            files.forEach(f -> logger.info("path: {}", f.toAbsolutePath()));
+        }
+        assertThat(createSnapshotResponse.getSnapshotInfo().state(), equalTo(SnapshotState.PARTIAL));
         assertThat(corruptedFile, notNullValue());
     }