Skip to content

Commit 477391d

Browse files
Don't test corruption detection within CFS checksum (#33911)
Closes #33881
1 parent 17605bf commit 477391d

File tree

2 files changed

+112
-16
lines changed

2 files changed

+112
-16
lines changed

test/framework/src/main/java/org/elasticsearch/test/CorruptionUtils.java

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -78,24 +78,19 @@ public static void corruptFile(Random random, Path... files) throws IOException
7878
checksumBeforeCorruption = CodecUtil.retrieveChecksum(input);
7979
}
8080
try (FileChannel raf = FileChannel.open(fileToCorrupt, StandardOpenOption.READ, StandardOpenOption.WRITE)) {
81-
// read
82-
raf.position(random.nextInt((int) Math.min(Integer.MAX_VALUE, raf.size())));
83-
long filePointer = raf.position();
84-
ByteBuffer bb = ByteBuffer.wrap(new byte[1]);
85-
raf.read(bb);
86-
bb.flip();
81+
long maxPosition = raf.size();
8782

88-
// corrupt
89-
byte oldValue = bb.get(0);
90-
byte newValue = (byte) (oldValue + 1);
91-
bb.put(0, newValue);
92-
93-
// rewrite
94-
raf.position(filePointer);
95-
raf.write(bb);
96-
logger.info("Corrupting file -- flipping at position {} from {} to {} file: {}", filePointer,
97-
Integer.toHexString(oldValue), Integer.toHexString(newValue), fileToCorrupt.getFileName());
83+
if (fileToCorrupt.getFileName().toString().endsWith(".cfs") && maxPosition > 4) {
84+
// TODO: it is known that Lucene does not check the checksum of CFS file (CompoundFileS, like an archive)
85+
// see note at https://github.com/elastic/elasticsearch/pull/33911
86+
// so far, don't corrupt crc32 part of checksum (last 4 bytes) of cfs file
87+
// checksum is 8 bytes: first 4 bytes have to be zeros, while crc32 value is not verified
88+
maxPosition -= 4;
89+
}
90+
final int position = random.nextInt((int) Math.min(Integer.MAX_VALUE, maxPosition));
91+
corruptAt(fileToCorrupt, raf, position);
9892
}
93+
9994
long checksumAfterCorruption;
10095
long actualChecksumAfterCorruption;
10196
try (ChecksumIndexInput input = dir.openChecksumInput(fileToCorrupt.getFileName().toString(), IOContext.DEFAULT)) {
@@ -120,5 +115,25 @@ public static void corruptFile(Random random, Path... files) throws IOException
120115
}
121116
}
122117

118+
static void corruptAt(Path path, FileChannel channel, int position) throws IOException {
119+
// read
120+
channel.position(position);
121+
long filePointer = channel.position();
122+
ByteBuffer bb = ByteBuffer.wrap(new byte[1]);
123+
channel.read(bb);
124+
bb.flip();
125+
126+
// corrupt
127+
byte oldValue = bb.get(0);
128+
byte newValue = (byte) (oldValue + 1);
129+
bb.put(0, newValue);
130+
131+
// rewrite
132+
channel.position(filePointer);
133+
channel.write(bb);
134+
logger.info("Corrupting file -- flipping at position {} from {} to {} file: {}", filePointer,
135+
Integer.toHexString(oldValue), Integer.toHexString(newValue), path.getFileName());
136+
}
137+
123138

124139
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.test;
20+
21+
import org.apache.lucene.index.CheckIndex;
22+
import org.apache.lucene.store.SimpleFSDirectory;
23+
import org.elasticsearch.action.admin.indices.flush.FlushRequest;
24+
import org.elasticsearch.index.shard.IndexShard;
25+
import org.elasticsearch.index.shard.IndexShardTestCase;
26+
import org.elasticsearch.index.shard.ShardPath;
27+
28+
import java.nio.channels.FileChannel;
29+
import java.nio.file.Files;
30+
import java.nio.file.Path;
31+
import java.nio.file.StandardOpenOption;
32+
import java.util.stream.Stream;
33+
34+
import static org.elasticsearch.test.CorruptionUtils.corruptAt;
35+
import static org.hamcrest.Matchers.equalTo;
36+
import static org.hamcrest.Matchers.lessThan;
37+
38+
public class CorruptionUtilsTests extends IndexShardTestCase {
39+
40+
/**
41+
* There is a dependency on Lucene bug fix
42+
* https://github.com/elastic/elasticsearch/pull/33911
43+
*/
44+
public void testLuceneCheckIndexIgnoresLast4Bytes() throws Exception {
45+
final IndexShard indexShard = newStartedShard(true);
46+
47+
final long numDocs = between(10, 100);
48+
for (long i = 0; i < numDocs; i++) {
49+
indexDoc(indexShard, "_doc", Long.toString(i), "{}");
50+
}
51+
indexShard.flush(new FlushRequest());
52+
closeShards(indexShard);
53+
54+
final ShardPath shardPath = indexShard.shardPath();
55+
56+
final Path indexPath = shardPath.getDataPath().resolve(ShardPath.INDEX_FOLDER_NAME);
57+
58+
final Path cfsFile;
59+
try (Stream<Path> paths = Files.walk(indexPath)) {
60+
cfsFile = paths.filter(p -> p.getFileName().toString().endsWith(".cfs")).findFirst()
61+
.orElseThrow(() -> new IllegalStateException("CFS file has to be there"));
62+
}
63+
64+
try (FileChannel raf = FileChannel.open(cfsFile, StandardOpenOption.READ, StandardOpenOption.WRITE)) {
65+
assertThat(raf.size(), lessThan(Integer.MAX_VALUE * 1L));
66+
final int maxPosition = (int) raf.size();
67+
// corrupt only last 4 bytes!
68+
final int position = randomIntBetween(maxPosition - 4, maxPosition - 1);
69+
corruptAt(cfsFile, raf, position);
70+
}
71+
72+
final CheckIndex.Status status;
73+
try (CheckIndex checkIndex = new CheckIndex(new SimpleFSDirectory(indexPath))) {
74+
status = checkIndex.checkIndex();
75+
}
76+
77+
assertThat("That's a good news! "
78+
+ "Lucene now validates CRC32 of CFS file: time to drop workaround at CorruptionUtils (and this test)",
79+
status.clean, equalTo(true));
80+
}
81+
}

0 commit comments

Comments
 (0)