Skip to content

Commit 6dd0aa5

Browse files
authored
Integrates soft-deletes into Elasticsearch (#33222)
This PR integrates Lucene soft-deletes(LUCENE-8200) into Elasticsearch. Highlight works in this PR include: - Replace hard-deletes by soft-deletes in InternalEngine - Use _recovery_source if _source is disabled or modified (#31106) - Soft-deletes retention policy based on the global checkpoint (#30335) - Read operation history from Lucene instead of translog (#30120) - Use Lucene history in peer-recovery (#30522) Relates #30086 Closes #29530 --- These works have been done by the whole team; however, these individuals (lexical order) have significant contribution in coding and reviewing: Co-authored-by: Adrien Grand [email protected] Co-authored-by: Boaz Leskes [email protected] Co-authored-by: Jason Tedor [email protected] Co-authored-by: Martijn van Groningen [email protected] Co-authored-by: Nhat Nguyen [email protected] Co-authored-by: Simon Willnauer [email protected]
1 parent 86feb77 commit 6dd0aa5

File tree

63 files changed

+3432
-499
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+3432
-499
lines changed

modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java

+7-1
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
import org.apache.lucene.store.RAMDirectory;
7878
import org.apache.lucene.util.BytesRef;
7979
import org.elasticsearch.Version;
80+
import org.elasticsearch.cluster.metadata.IndexMetaData;
8081
import org.elasticsearch.common.CheckedFunction;
8182
import org.elasticsearch.common.Strings;
8283
import org.elasticsearch.common.bytes.BytesArray;
@@ -87,6 +88,7 @@
8788
import org.elasticsearch.common.settings.Settings;
8889
import org.elasticsearch.common.xcontent.XContentFactory;
8990
import org.elasticsearch.index.IndexService;
91+
import org.elasticsearch.index.IndexSettings;
9092
import org.elasticsearch.index.mapper.DocumentMapper;
9193
import org.elasticsearch.index.mapper.MappedFieldType;
9294
import org.elasticsearch.index.mapper.MapperService;
@@ -1109,7 +1111,11 @@ private void duelRun(PercolateQuery.QueryStore queryStore, MemoryIndex memoryInd
11091111
}
11101112

11111113
private void addQuery(Query query, List<ParseContext.Document> docs) {
1112-
ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(Settings.EMPTY,
1114+
IndexMetaData build = IndexMetaData.builder("")
1115+
.settings(Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT))
1116+
.numberOfShards(1).numberOfReplicas(0).build();
1117+
IndexSettings settings = new IndexSettings(build, Settings.EMPTY);
1118+
ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(settings,
11131119
mapperService.documentMapperParser(), documentMapper, null, null);
11141120
fieldMapper.processQuery(query, parseContext);
11151121
ParseContext.Document queryDocument = parseContext.doc();

modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorFieldMapperTests.java

+24-6
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import org.apache.lucene.util.BytesRef;
4343
import org.elasticsearch.Version;
4444
import org.elasticsearch.action.support.PlainActionFuture;
45+
import org.elasticsearch.cluster.metadata.IndexMetaData;
4546
import org.elasticsearch.common.Strings;
4647
import org.elasticsearch.common.bytes.BytesArray;
4748
import org.elasticsearch.common.bytes.BytesReference;
@@ -58,6 +59,7 @@
5859
import org.elasticsearch.common.xcontent.XContentFactory;
5960
import org.elasticsearch.common.xcontent.XContentType;
6061
import org.elasticsearch.index.IndexService;
62+
import org.elasticsearch.index.IndexSettings;
6163
import org.elasticsearch.index.mapper.DocumentMapper;
6264
import org.elasticsearch.index.mapper.DocumentMapperParser;
6365
import org.elasticsearch.index.mapper.MapperParsingException;
@@ -182,7 +184,11 @@ public void testExtractTerms() throws Exception {
182184

183185
DocumentMapper documentMapper = mapperService.documentMapper("doc");
184186
PercolatorFieldMapper fieldMapper = (PercolatorFieldMapper) documentMapper.mappers().getMapper(fieldName);
185-
ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(Settings.EMPTY,
187+
IndexMetaData build = IndexMetaData.builder("")
188+
.settings(Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT))
189+
.numberOfShards(1).numberOfReplicas(0).build();
190+
IndexSettings settings = new IndexSettings(build, Settings.EMPTY);
191+
ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(settings,
186192
mapperService.documentMapperParser(), documentMapper, null, null);
187193
fieldMapper.processQuery(bq.build(), parseContext);
188194
ParseContext.Document document = parseContext.doc();
@@ -204,7 +210,7 @@ public void testExtractTerms() throws Exception {
204210
bq.add(termQuery1, Occur.MUST);
205211
bq.add(termQuery2, Occur.MUST);
206212

207-
parseContext = new ParseContext.InternalParseContext(Settings.EMPTY, mapperService.documentMapperParser(),
213+
parseContext = new ParseContext.InternalParseContext(settings, mapperService.documentMapperParser(),
208214
documentMapper, null, null);
209215
fieldMapper.processQuery(bq.build(), parseContext);
210216
document = parseContext.doc();
@@ -232,8 +238,12 @@ public void testExtractRanges() throws Exception {
232238
bq.add(rangeQuery2, Occur.MUST);
233239

234240
DocumentMapper documentMapper = mapperService.documentMapper("doc");
241+
IndexMetaData build = IndexMetaData.builder("")
242+
.settings(Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT))
243+
.numberOfShards(1).numberOfReplicas(0).build();
244+
IndexSettings settings = new IndexSettings(build, Settings.EMPTY);
235245
PercolatorFieldMapper fieldMapper = (PercolatorFieldMapper) documentMapper.mappers().getMapper(fieldName);
236-
ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(Settings.EMPTY,
246+
ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(settings,
237247
mapperService.documentMapperParser(), documentMapper, null, null);
238248
fieldMapper.processQuery(bq.build(), parseContext);
239249
ParseContext.Document document = parseContext.doc();
@@ -259,7 +269,7 @@ public void testExtractRanges() throws Exception {
259269
.rangeQuery(15, 20, true, true, null, null, null, null);
260270
bq.add(rangeQuery2, Occur.MUST);
261271

262-
parseContext = new ParseContext.InternalParseContext(Settings.EMPTY,
272+
parseContext = new ParseContext.InternalParseContext(settings,
263273
mapperService.documentMapperParser(), documentMapper, null, null);
264274
fieldMapper.processQuery(bq.build(), parseContext);
265275
document = parseContext.doc();
@@ -283,7 +293,11 @@ public void testExtractTermsAndRanges_failed() throws Exception {
283293
TermRangeQuery query = new TermRangeQuery("field1", new BytesRef("a"), new BytesRef("z"), true, true);
284294
DocumentMapper documentMapper = mapperService.documentMapper("doc");
285295
PercolatorFieldMapper fieldMapper = (PercolatorFieldMapper) documentMapper.mappers().getMapper(fieldName);
286-
ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(Settings.EMPTY,
296+
IndexMetaData build = IndexMetaData.builder("")
297+
.settings(Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT))
298+
.numberOfShards(1).numberOfReplicas(0).build();
299+
IndexSettings settings = new IndexSettings(build, Settings.EMPTY);
300+
ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(settings,
287301
mapperService.documentMapperParser(), documentMapper, null, null);
288302
fieldMapper.processQuery(query, parseContext);
289303
ParseContext.Document document = parseContext.doc();
@@ -298,7 +312,11 @@ public void testExtractTermsAndRanges_partial() throws Exception {
298312
PhraseQuery phraseQuery = new PhraseQuery("field", "term");
299313
DocumentMapper documentMapper = mapperService.documentMapper("doc");
300314
PercolatorFieldMapper fieldMapper = (PercolatorFieldMapper) documentMapper.mappers().getMapper(fieldName);
301-
ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(Settings.EMPTY,
315+
IndexMetaData build = IndexMetaData.builder("")
316+
.settings(Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT))
317+
.numberOfShards(1).numberOfReplicas(0).build();
318+
IndexSettings settings = new IndexSettings(build, Settings.EMPTY);
319+
ParseContext.InternalParseContext parseContext = new ParseContext.InternalParseContext(settings,
302320
mapperService.documentMapperParser(), documentMapper, null, null);
303321
fieldMapper.processQuery(phraseQuery, parseContext);
304322
ParseContext.Document document = parseContext.doc();

server/src/main/java/org/elasticsearch/common/lucene/Lucene.java

+85-1
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,10 @@
2727
import org.apache.lucene.codecs.DocValuesFormat;
2828
import org.apache.lucene.codecs.PostingsFormat;
2929
import org.apache.lucene.document.LatLonDocValuesField;
30+
import org.apache.lucene.document.NumericDocValuesField;
3031
import org.apache.lucene.index.CorruptIndexException;
3132
import org.apache.lucene.index.DirectoryReader;
33+
import org.apache.lucene.index.FilterDirectoryReader;
3234
import org.apache.lucene.index.FilterLeafReader;
3335
import org.apache.lucene.index.IndexCommit;
3436
import org.apache.lucene.index.IndexFileNames;
@@ -96,6 +98,8 @@ public class Lucene {
9698
assert annotation == null : "DocValuesFormat " + LATEST_DOC_VALUES_FORMAT + " is deprecated" ;
9799
}
98100

101+
public static final String SOFT_DELETES_FIELD = "__soft_deletes";
102+
99103
public static final NamedAnalyzer STANDARD_ANALYZER = new NamedAnalyzer("_standard", AnalyzerScope.GLOBAL, new StandardAnalyzer());
100104
public static final NamedAnalyzer KEYWORD_ANALYZER = new NamedAnalyzer("_keyword", AnalyzerScope.GLOBAL, new KeywordAnalyzer());
101105

@@ -140,7 +144,7 @@ public static Iterable<String> files(SegmentInfos infos) throws IOException {
140144
public static int getNumDocs(SegmentInfos info) {
141145
int numDocs = 0;
142146
for (SegmentCommitInfo si : info) {
143-
numDocs += si.info.maxDoc() - si.getDelCount();
147+
numDocs += si.info.maxDoc() - si.getDelCount() - si.getSoftDelCount();
144148
}
145149
return numDocs;
146150
}
@@ -197,6 +201,7 @@ public static SegmentInfos pruneUnreferencedFiles(String segmentsFileName, Direc
197201
}
198202
final CommitPoint cp = new CommitPoint(si, directory);
199203
try (IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)
204+
.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD)
200205
.setIndexCommit(cp)
201206
.setCommitOnClose(false)
202207
.setMergePolicy(NoMergePolicy.INSTANCE)
@@ -220,6 +225,7 @@ public static void cleanLuceneIndex(Directory directory) throws IOException {
220225
}
221226
}
222227
try (IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)
228+
.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD)
223229
.setMergePolicy(NoMergePolicy.INSTANCE) // no merges
224230
.setCommitOnClose(false) // no commits
225231
.setOpenMode(IndexWriterConfig.OpenMode.CREATE))) // force creation - don't append...
@@ -829,4 +835,82 @@ public int length() {
829835
}
830836
};
831837
}
838+
839+
/**
840+
* Wraps a directory reader to make all documents live except those were rolled back
841+
* or hard-deleted due to non-aborting exceptions during indexing.
842+
* The wrapped reader can be used to query all documents.
843+
*
844+
* @param in the input directory reader
845+
* @return the wrapped reader
846+
*/
847+
public static DirectoryReader wrapAllDocsLive(DirectoryReader in) throws IOException {
848+
return new DirectoryReaderWithAllLiveDocs(in);
849+
}
850+
851+
private static final class DirectoryReaderWithAllLiveDocs extends FilterDirectoryReader {
852+
static final class LeafReaderWithLiveDocs extends FilterLeafReader {
853+
final Bits liveDocs;
854+
final int numDocs;
855+
LeafReaderWithLiveDocs(LeafReader in, Bits liveDocs, int numDocs) {
856+
super(in);
857+
this.liveDocs = liveDocs;
858+
this.numDocs = numDocs;
859+
}
860+
@Override
861+
public Bits getLiveDocs() {
862+
return liveDocs;
863+
}
864+
@Override
865+
public int numDocs() {
866+
return numDocs;
867+
}
868+
@Override
869+
public CacheHelper getCoreCacheHelper() {
870+
return in.getCoreCacheHelper();
871+
}
872+
@Override
873+
public CacheHelper getReaderCacheHelper() {
874+
return null; // Modifying liveDocs
875+
}
876+
}
877+
878+
DirectoryReaderWithAllLiveDocs(DirectoryReader in) throws IOException {
879+
super(in, new SubReaderWrapper() {
880+
@Override
881+
public LeafReader wrap(LeafReader leaf) {
882+
SegmentReader segmentReader = segmentReader(leaf);
883+
Bits hardLiveDocs = segmentReader.getHardLiveDocs();
884+
if (hardLiveDocs == null) {
885+
return new LeafReaderWithLiveDocs(leaf, null, leaf.maxDoc());
886+
}
887+
// TODO: Can we avoid calculate numDocs by using SegmentReader#getSegmentInfo with LUCENE-8458?
888+
int numDocs = 0;
889+
for (int i = 0; i < hardLiveDocs.length(); i++) {
890+
if (hardLiveDocs.get(i)) {
891+
numDocs++;
892+
}
893+
}
894+
return new LeafReaderWithLiveDocs(segmentReader, hardLiveDocs, numDocs);
895+
}
896+
});
897+
}
898+
899+
@Override
900+
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
901+
return wrapAllDocsLive(in);
902+
}
903+
904+
@Override
905+
public CacheHelper getReaderCacheHelper() {
906+
return null; // Modifying liveDocs
907+
}
908+
}
909+
910+
/**
911+
* Returns a numeric docvalues which can be used to soft-delete documents.
912+
*/
913+
public static NumericDocValuesField newSoftDeletesField() {
914+
return new NumericDocValuesField(SOFT_DELETES_FIELD, 1);
915+
}
832916
}

server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java

+15-6
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.apache.lucene.search.DocIdSetIterator;
2929
import org.apache.lucene.util.Bits;
3030
import org.apache.lucene.util.BytesRef;
31+
import org.elasticsearch.common.lucene.Lucene;
3132
import org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndSeqNo;
3233
import org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndVersion;
3334
import org.elasticsearch.index.mapper.SeqNoFieldMapper;
@@ -66,15 +67,22 @@ final class PerThreadIDVersionAndSeqNoLookup {
6667
*/
6768
PerThreadIDVersionAndSeqNoLookup(LeafReader reader, String uidField) throws IOException {
6869
this.uidField = uidField;
69-
Terms terms = reader.terms(uidField);
70+
final Terms terms = reader.terms(uidField);
7071
if (terms == null) {
71-
throw new IllegalArgumentException("reader misses the [" + uidField + "] field");
72+
// If a segment contains only no-ops, it does not have _uid but has both _soft_deletes and _tombstone fields.
73+
final NumericDocValues softDeletesDV = reader.getNumericDocValues(Lucene.SOFT_DELETES_FIELD);
74+
final NumericDocValues tombstoneDV = reader.getNumericDocValues(SeqNoFieldMapper.TOMBSTONE_NAME);
75+
if (softDeletesDV == null || tombstoneDV == null) {
76+
throw new IllegalArgumentException("reader does not have _uid terms but not a no-op segment; " +
77+
"_soft_deletes [" + softDeletesDV + "], _tombstone [" + tombstoneDV + "]");
78+
}
79+
termsEnum = null;
80+
} else {
81+
termsEnum = terms.iterator();
7282
}
73-
termsEnum = terms.iterator();
7483
if (reader.getNumericDocValues(VersionFieldMapper.NAME) == null) {
75-
throw new IllegalArgumentException("reader misses the [" + VersionFieldMapper.NAME + "] field");
84+
throw new IllegalArgumentException("reader misses the [" + VersionFieldMapper.NAME + "] field; _uid terms [" + terms + "]");
7685
}
77-
7886
Object readerKey = null;
7987
assert (readerKey = reader.getCoreCacheHelper().getKey()) != null;
8088
this.readerKey = readerKey;
@@ -111,7 +119,8 @@ public DocIdAndVersion lookupVersion(BytesRef id, LeafReaderContext context)
111119
* {@link DocIdSetIterator#NO_MORE_DOCS} is returned if not found
112120
* */
113121
private int getDocID(BytesRef id, Bits liveDocs) throws IOException {
114-
if (termsEnum.seekExact(id)) {
122+
// termsEnum can possibly be null here if this leaf contains only no-ops.
123+
if (termsEnum != null && termsEnum.seekExact(id)) {
115124
int docID = DocIdSetIterator.NO_MORE_DOCS;
116125
// there may be more than one matching docID, in the case of nested docs, so we want the last one:
117126
docsEnum = termsEnum.postings(docsEnum, 0);

server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java

+2
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,8 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
129129
IndexSettings.MAX_REGEX_LENGTH_SETTING,
130130
ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE_SETTING,
131131
IndexSettings.INDEX_GC_DELETES_SETTING,
132+
IndexSettings.INDEX_SOFT_DELETES_SETTING,
133+
IndexSettings.INDEX_SOFT_DELETES_RETENTION_OPERATIONS_SETTING,
132134
IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING,
133135
UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING,
134136
EnableAllocationDecider.INDEX_ROUTING_REBALANCE_ENABLE_SETTING,

0 commit comments

Comments
 (0)