Improve BlobStoreFormatTests#randomCorruption (#73201)

DaveCTurner · DaveCTurner · commit b4ff4d098029 · 2021-05-18T17:13:09.000+01:00
This method today corrupts bytes until the checksum changes, but (a)
it's comparing the checksum vs one computed before even reading the
file, and (b) changing a single byte will always invalidate a CRC-32
checksum so the loop is unnecessary as is the checksum calculation. It
also doesn't ever try truncating the file which is a realistic kind of
corruption that we must be able to detect.

This commit addresses all that.
diff --git a/server/src/test/java/org/elasticsearch/snapshots/BlobStoreFormatTests.java b/server/src/test/java/org/elasticsearch/snapshots/BlobStoreFormatTests.java
@@ -17,20 +17,19 @@
 import org.elasticsearch.common.blobstore.fs.FsBlobStore;
 import org.elasticsearch.common.bytes.BytesArray;
 import org.elasticsearch.common.io.Streams;
-import org.elasticsearch.common.io.stream.BytesStreamOutput;
 import org.elasticsearch.common.util.MockBigArrays;
 import org.elasticsearch.common.xcontent.ToXContent;
 import org.elasticsearch.common.xcontent.ToXContentFragment;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentParser;
-import org.elasticsearch.index.translog.BufferedChecksumStreamOutput;
 import org.elasticsearch.repositories.blobstore.ChecksumBlobStoreFormat;
 import org.elasticsearch.test.ESTestCase;
 
 import java.io.EOFException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Map;
+
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.greaterThan;
 
@@ -145,24 +144,24 @@ protected BlobStore createTestBlobStore() throws IOException {
     }
 
     protected void randomCorruption(BlobContainer blobContainer, String blobName) throws IOException {
-        byte[] buffer = new byte[(int) blobContainer.listBlobsByPrefix(blobName).get(blobName).length()];
-        long originalChecksum = checksum(buffer);
+        final byte[] buffer = new byte[(int) blobContainer.listBlobsByPrefix(blobName).get(blobName).length()];
         try (InputStream inputStream = blobContainer.readBlob(blobName)) {
             Streams.readFully(inputStream, buffer);
         }
-        do {
-            int location = randomIntBetween(0, buffer.length - 1);
-            buffer[location] = (byte) (buffer[location] ^ 42);
-        } while (originalChecksum == checksum(buffer));
-        blobContainer.writeBlob(blobName, new BytesArray(buffer), false);
-    }
-
-    private long checksum(byte[] buffer) throws IOException {
-        try (BytesStreamOutput streamOutput = new BytesStreamOutput()) {
-            try (BufferedChecksumStreamOutput checksumOutput = new BufferedChecksumStreamOutput(streamOutput)) {
-                checksumOutput.write(buffer);
-                return checksumOutput.getChecksum();
-            }
+        final BytesArray corruptedBytes;
+        final int location = randomIntBetween(0, buffer.length - 1);
+        if (randomBoolean()) {
+            // flipping bits in a single byte will always invalidate the file: CRC-32 certainly detects all eight-bit-burst errors; we don't
+            // checksum the last 8 bytes but we do verify that they contain the checksum preceded by 4 zero bytes so in any case this will
+            // be a detectable error:
+            buffer[location] = (byte) (buffer[location] ^ between(1, 255));
+            corruptedBytes = new BytesArray(buffer);
+        } else {
+            // truncation will invalidate the file: the last 12 bytes should start with 8 zero bytes but by construction we won't have
+            // another sequence of 8 zero bytes anywhere in the file, let alone such a sequence followed by a correct checksum.
+            corruptedBytes = new BytesArray(buffer, 0, location);
         }
+        blobContainer.writeBlob(blobName, corruptedBytes, false);
     }
+
 }