Skip to content

Commit 1fdc3f0

Browse files
authored
Do not expose hard-deleted docs in Lucene history (elastic#32333)
Today when reading operation history in Lucene, we read all documents. However, if indexing a document is aborted, IndexWriter will hard-delete it; we, therefore, need to exclude that document from Lucene history. This commit makes sure that we exclude aborted documents by using the hard liveDocs of a SegmentReader if there are deletes. Closes elastic#32269
1 parent 2245812 commit 1fdc3f0

File tree

4 files changed

+119
-36
lines changed

4 files changed

+119
-36
lines changed

rest-api-spec/src/main/resources/rest-api-spec/test/suggest/30_context.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -339,8 +339,8 @@ setup:
339339
---
340340
"Indexing and Querying without contexts is forbidden":
341341
- skip:
342-
version: "all"
343-
reason: AwaitsFix https://github.com/elastic/elasticsearch/issues/32269
342+
version: " - 6.99.99"
343+
reason: this feature was removed in 7.0
344344

345345
- do:
346346
index:

server/src/main/java/org/elasticsearch/common/lucene/Lucene.java

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -837,7 +837,8 @@ public int length() {
837837
}
838838

839839
/**
840-
* Wraps a directory reader to include all live docs.
840+
* Wraps a directory reader to make all documents live except those were rolled back
841+
* or hard-deleted due to non-aborting exceptions during indexing.
841842
* The wrapped reader can be used to query all documents.
842843
*
843844
* @param in the input directory reader
@@ -848,17 +849,21 @@ public static DirectoryReader wrapAllDocsLive(DirectoryReader in) throws IOExcep
848849
}
849850

850851
private static final class DirectoryReaderWithAllLiveDocs extends FilterDirectoryReader {
851-
static final class SubReaderWithAllLiveDocs extends FilterLeafReader {
852-
SubReaderWithAllLiveDocs(LeafReader in) {
852+
static final class LeafReaderWithLiveDocs extends FilterLeafReader {
853+
final Bits liveDocs;
854+
final int numDocs;
855+
LeafReaderWithLiveDocs(LeafReader in, Bits liveDocs, int numDocs) {
853856
super(in);
857+
this.liveDocs = liveDocs;
858+
this.numDocs = numDocs;
854859
}
855860
@Override
856861
public Bits getLiveDocs() {
857-
return null;
862+
return liveDocs;
858863
}
859864
@Override
860865
public int numDocs() {
861-
return maxDoc();
866+
return numDocs;
862867
}
863868
@Override
864869
public CacheHelper getCoreCacheHelper() {
@@ -869,14 +874,28 @@ public CacheHelper getReaderCacheHelper() {
869874
return null; // Modifying liveDocs
870875
}
871876
}
877+
872878
DirectoryReaderWithAllLiveDocs(DirectoryReader in) throws IOException {
873-
super(in, new FilterDirectoryReader.SubReaderWrapper() {
879+
super(in, new SubReaderWrapper() {
874880
@Override
875881
public LeafReader wrap(LeafReader leaf) {
876-
return new SubReaderWithAllLiveDocs(leaf);
882+
SegmentReader segmentReader = segmentReader(leaf);
883+
Bits hardLiveDocs = segmentReader.getHardLiveDocs();
884+
if (hardLiveDocs == null) {
885+
return new LeafReaderWithLiveDocs(leaf, null, leaf.maxDoc());
886+
}
887+
// TODO: Avoid recalculate numDocs everytime.
888+
int numDocs = 0;
889+
for (int i = 0; i < hardLiveDocs.length(); i++) {
890+
if (hardLiveDocs.get(i)) {
891+
numDocs++;
892+
}
893+
}
894+
return new LeafReaderWithLiveDocs(segmentReader, hardLiveDocs, numDocs);
877895
}
878896
});
879897
}
898+
880899
@Override
881900
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
882901
return wrapAllDocsLive(in);

server/src/test/java/org/elasticsearch/common/lucene/LuceneTests.java

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,18 +33,23 @@
3333
import org.apache.lucene.index.NoMergePolicy;
3434
import org.apache.lucene.index.RandomIndexWriter;
3535
import org.apache.lucene.index.SegmentInfos;
36+
import org.apache.lucene.index.SoftDeletesRetentionMergePolicy;
3637
import org.apache.lucene.index.Term;
3738
import org.apache.lucene.search.IndexSearcher;
3839
import org.apache.lucene.search.MatchAllDocsQuery;
40+
import org.apache.lucene.search.ScoreDoc;
3941
import org.apache.lucene.search.TermQuery;
42+
import org.apache.lucene.search.TopDocs;
4043
import org.apache.lucene.search.Weight;
4144
import org.apache.lucene.store.Directory;
4245
import org.apache.lucene.store.MMapDirectory;
4346
import org.apache.lucene.store.MockDirectoryWrapper;
4447
import org.apache.lucene.util.Bits;
48+
import org.elasticsearch.core.internal.io.IOUtils;
4549
import org.elasticsearch.test.ESTestCase;
4650

4751
import java.io.IOException;
52+
import java.io.StringReader;
4853
import java.util.ArrayList;
4954
import java.util.Collections;
5055
import java.util.HashSet;
@@ -53,6 +58,8 @@
5358
import java.util.concurrent.CountDownLatch;
5459
import java.util.concurrent.atomic.AtomicBoolean;
5560

61+
import static org.hamcrest.Matchers.equalTo;
62+
5663
public class LuceneTests extends ESTestCase {
5764
public void testWaitForIndex() throws Exception {
5865
final MockDirectoryWrapper dir = newMockDirectory();
@@ -406,4 +413,88 @@ public void testMMapHackSupported() throws Exception {
406413
// add assume's here if needed for certain platforms, but we should know if it does not work.
407414
assertTrue("MMapDirectory does not support unmapping: " + MMapDirectory.UNMAP_NOT_SUPPORTED_REASON, MMapDirectory.UNMAP_SUPPORTED);
408415
}
416+
417+
public void testWrapAllDocsLive() throws Exception {
418+
Directory dir = newDirectory();
419+
IndexWriterConfig config = newIndexWriterConfig().setSoftDeletesField(Lucene.SOFT_DELETE_FIELD)
420+
.setMergePolicy(new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETE_FIELD, MatchAllDocsQuery::new, newMergePolicy()));
421+
IndexWriter writer = new IndexWriter(dir, config);
422+
int numDocs = between(1, 10);
423+
Set<String> liveDocs = new HashSet<>();
424+
for (int i = 0; i < numDocs; i++) {
425+
String id = Integer.toString(i);
426+
Document doc = new Document();
427+
doc.add(new StringField("id", id, Store.YES));
428+
writer.addDocument(doc);
429+
liveDocs.add(id);
430+
}
431+
for (int i = 0; i < numDocs; i++) {
432+
if (randomBoolean()) {
433+
String id = Integer.toString(i);
434+
Document doc = new Document();
435+
doc.add(new StringField("id", "v2-" + id, Store.YES));
436+
if (randomBoolean()) {
437+
doc.add(Lucene.newSoftDeleteField());
438+
}
439+
writer.softUpdateDocument(new Term("id", id), doc, Lucene.newSoftDeleteField());
440+
liveDocs.add("v2-" + id);
441+
}
442+
}
443+
try (DirectoryReader unwrapped = DirectoryReader.open(writer)) {
444+
DirectoryReader reader = Lucene.wrapAllDocsLive(unwrapped);
445+
assertThat(reader.numDocs(), equalTo(liveDocs.size()));
446+
IndexSearcher searcher = new IndexSearcher(reader);
447+
Set<String> actualDocs = new HashSet<>();
448+
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), Integer.MAX_VALUE);
449+
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
450+
actualDocs.add(reader.document(scoreDoc.doc).get("id"));
451+
}
452+
assertThat(actualDocs, equalTo(liveDocs));
453+
}
454+
IOUtils.close(writer, dir);
455+
}
456+
457+
public void testWrapLiveDocsNotExposeAbortedDocuments() throws Exception {
458+
Directory dir = newDirectory();
459+
IndexWriterConfig config = newIndexWriterConfig().setSoftDeletesField(Lucene.SOFT_DELETE_FIELD)
460+
.setMergePolicy(new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETE_FIELD, MatchAllDocsQuery::new, newMergePolicy()));
461+
IndexWriter writer = new IndexWriter(dir, config);
462+
int numDocs = between(1, 10);
463+
List<String> liveDocs = new ArrayList<>();
464+
for (int i = 0; i < numDocs; i++) {
465+
String id = Integer.toString(i);
466+
Document doc = new Document();
467+
doc.add(new StringField("id", id, Store.YES));
468+
if (randomBoolean()) {
469+
doc.add(Lucene.newSoftDeleteField());
470+
}
471+
writer.addDocument(doc);
472+
liveDocs.add(id);
473+
}
474+
int abortedDocs = between(1, 10);
475+
for (int i = 0; i < abortedDocs; i++) {
476+
try {
477+
Document doc = new Document();
478+
doc.add(new StringField("id", "aborted-" + i, Store.YES));
479+
StringReader reader = new StringReader("");
480+
doc.add(new TextField("other", reader));
481+
reader.close(); // mark the indexing hit non-aborting error
482+
writer.addDocument(doc);
483+
fail("index should have failed");
484+
} catch (Exception ignored) { }
485+
}
486+
try (DirectoryReader unwrapped = DirectoryReader.open(writer)) {
487+
DirectoryReader reader = Lucene.wrapAllDocsLive(unwrapped);
488+
assertThat(reader.maxDoc(), equalTo(numDocs + abortedDocs));
489+
assertThat(reader.numDocs(), equalTo(liveDocs.size()));
490+
IndexSearcher searcher = new IndexSearcher(reader);
491+
List<String> actualDocs = new ArrayList<>();
492+
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), Integer.MAX_VALUE);
493+
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
494+
actualDocs.add(reader.document(scoreDoc.doc).get("id"));
495+
}
496+
assertThat(actualDocs, equalTo(liveDocs));
497+
}
498+
IOUtils.close(writer, dir);
499+
}
409500
}

server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@
6464
import org.elasticsearch.common.io.stream.StreamInput;
6565
import org.elasticsearch.common.lease.Releasable;
6666
import org.elasticsearch.common.lease.Releasables;
67-
import org.elasticsearch.common.lucene.Lucene;
6867
import org.elasticsearch.common.settings.IndexScopedSettings;
6968
import org.elasticsearch.common.settings.Settings;
7069
import org.elasticsearch.common.unit.TimeValue;
@@ -3228,30 +3227,4 @@ public void testSupplyTombstoneDoc() throws Exception {
32283227

32293228
closeShards(shard);
32303229
}
3231-
3232-
public void testSearcherIncludesSoftDeletes() throws Exception {
3233-
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
3234-
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1)
3235-
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
3236-
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true)
3237-
.build();
3238-
IndexMetaData metaData = IndexMetaData.builder("test")
3239-
.putMapping("test", "{ \"properties\": { \"foo\": { \"type\": \"text\"}}}")
3240-
.settings(settings)
3241-
.primaryTerm(0, 1).build();
3242-
IndexShard shard = newShard(new ShardId(metaData.getIndex(), 0), true, "n1", metaData, null);
3243-
recoverShardFromStore(shard);
3244-
indexDoc(shard, "test", "0", "{\"foo\" : \"bar\"}");
3245-
indexDoc(shard, "test", "1", "{\"foo\" : \"baz\"}");
3246-
deleteDoc(shard, "test", "0");
3247-
shard.refresh("test");
3248-
try (Engine.Searcher searcher = shard.acquireSearcher("test")) {
3249-
IndexSearcher searchWithSoftDeletes = new IndexSearcher(Lucene.wrapAllDocsLive(searcher.getDirectoryReader()));
3250-
assertThat(searcher.searcher().search(new TermQuery(new Term("foo", "bar")), 10).totalHits, equalTo(0L));
3251-
assertThat(searchWithSoftDeletes.search(new TermQuery(new Term("foo", "bar")), 10).totalHits, equalTo(1L));
3252-
assertThat(searcher.searcher().search(new TermQuery(new Term("foo", "baz")), 10).totalHits, equalTo(1L));
3253-
assertThat(searchWithSoftDeletes.search(new TermQuery(new Term("foo", "baz")), 10).totalHits, equalTo(1L));
3254-
}
3255-
closeShards(shard);
3256-
}
32573230
}

0 commit comments

Comments
 (0)