Skip to content

Commit bbc29bb

Browse files
authored
Rebuild version map when opening internal engine (#43202)
With this change, we will rebuild the live version map and local checkpoint using documents (including soft-deleted) from the safe commit when opening an internal engine. This allows us to safely prune away _id of all soft-deleted documents as the version map is always in-sync with the Lucene index. Relates #40741 Supersedes #42979
1 parent 551353d commit bbc29bb

File tree

9 files changed

+348
-162
lines changed

9 files changed

+348
-162
lines changed

server/src/main/java/org/elasticsearch/common/lucene/Lucene.java

-38
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
import org.apache.lucene.codecs.DocValuesFormat;
2828
import org.apache.lucene.codecs.PostingsFormat;
2929
import org.apache.lucene.document.LatLonDocValuesField;
30-
import org.apache.lucene.document.LongPoint;
3130
import org.apache.lucene.document.NumericDocValuesField;
3231
import org.apache.lucene.index.BinaryDocValues;
3332
import org.apache.lucene.index.CorruptIndexException;
@@ -95,7 +94,6 @@
9594
import org.elasticsearch.index.analysis.AnalyzerScope;
9695
import org.elasticsearch.index.analysis.NamedAnalyzer;
9796
import org.elasticsearch.index.fielddata.IndexFieldData;
98-
import org.elasticsearch.index.mapper.SeqNoFieldMapper;
9997

10098
import java.io.IOException;
10199
import java.text.ParseException;
@@ -105,7 +103,6 @@
105103
import java.util.Collections;
106104
import java.util.List;
107105
import java.util.Map;
108-
import java.util.function.LongConsumer;
109106

110107
public class Lucene {
111108
public static final String LATEST_DOC_VALUES_FORMAT = "Lucene70";
@@ -1050,39 +1047,4 @@ public CacheHelper getReaderCacheHelper() {
10501047
}
10511048
};
10521049
}
1053-
1054-
/**
1055-
* Scans sequence numbers (i.e., {@link SeqNoFieldMapper#NAME}) between {@code fromSeqNo}(inclusive) and {@code toSeqNo}(inclusive)
1056-
* in the provided directory reader. This method invokes the callback {@code onNewSeqNo} whenever a sequence number value is found.
1057-
*
1058-
* @param directoryReader the directory reader to scan
1059-
* @param fromSeqNo the lower bound of a range of seq_no to scan (inclusive)
1060-
* @param toSeqNo the upper bound of a range of seq_no to scan (inclusive)
1061-
* @param onNewSeqNo the callback to be called whenever a new valid sequence number is found
1062-
*/
1063-
public static void scanSeqNosInReader(DirectoryReader directoryReader, long fromSeqNo, long toSeqNo,
1064-
LongConsumer onNewSeqNo) throws IOException {
1065-
final DirectoryReader reader = Lucene.wrapAllDocsLive(directoryReader);
1066-
final IndexSearcher searcher = new IndexSearcher(reader);
1067-
searcher.setQueryCache(null);
1068-
final Query query = LongPoint.newRangeQuery(SeqNoFieldMapper.NAME, fromSeqNo, toSeqNo);
1069-
final Weight weight = searcher.createWeight(query, ScoreMode.COMPLETE_NO_SCORES, 1.0f);
1070-
for (LeafReaderContext leaf : reader.leaves()) {
1071-
final Scorer scorer = weight.scorer(leaf);
1072-
if (scorer == null) {
1073-
continue;
1074-
}
1075-
final DocIdSetIterator docIdSetIterator = scorer.iterator();
1076-
final NumericDocValues seqNoDocValues = leaf.reader().getNumericDocValues(SeqNoFieldMapper.NAME);
1077-
int docId;
1078-
while ((docId = docIdSetIterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
1079-
if (seqNoDocValues == null || seqNoDocValues.advanceExact(docId) == false) {
1080-
throw new IllegalStateException("seq_no doc_values not found for doc_id=" + docId);
1081-
}
1082-
final long seqNo = seqNoDocValues.longValue();
1083-
assert fromSeqNo <= seqNo && seqNo <= toSeqNo : "from_seq_no=" + fromSeqNo + " seq_no=" + seqNo + " to_seq_no=" + toSeqNo;
1084-
onNewSeqNo.accept(seqNo);
1085-
}
1086-
}
1087-
}
10881050
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.index.engine;
21+
22+
import org.apache.lucene.index.LeafReader;
23+
import org.apache.lucene.index.NumericDocValues;
24+
import org.elasticsearch.index.mapper.SeqNoFieldMapper;
25+
import org.elasticsearch.index.mapper.SourceFieldMapper;
26+
import org.elasticsearch.index.mapper.VersionFieldMapper;
27+
28+
import java.io.IOException;
29+
import java.util.Objects;
30+
31+
final class CombinedDocValues {
32+
private final NumericDocValues versionDV;
33+
private final NumericDocValues seqNoDV;
34+
private final NumericDocValues primaryTermDV;
35+
private final NumericDocValues tombstoneDV;
36+
private final NumericDocValues recoverySource;
37+
38+
CombinedDocValues(LeafReader leafReader) throws IOException {
39+
this.versionDV = Objects.requireNonNull(leafReader.getNumericDocValues(VersionFieldMapper.NAME), "VersionDV is missing");
40+
this.seqNoDV = Objects.requireNonNull(leafReader.getNumericDocValues(SeqNoFieldMapper.NAME), "SeqNoDV is missing");
41+
this.primaryTermDV = Objects.requireNonNull(
42+
leafReader.getNumericDocValues(SeqNoFieldMapper.PRIMARY_TERM_NAME), "PrimaryTermDV is missing");
43+
this.tombstoneDV = leafReader.getNumericDocValues(SeqNoFieldMapper.TOMBSTONE_NAME);
44+
this.recoverySource = leafReader.getNumericDocValues(SourceFieldMapper.RECOVERY_SOURCE_NAME);
45+
}
46+
47+
long docVersion(int segmentDocId) throws IOException {
48+
assert versionDV.docID() < segmentDocId;
49+
if (versionDV.advanceExact(segmentDocId) == false) {
50+
throw new IllegalStateException("DocValues for field [" + VersionFieldMapper.NAME + "] is not found");
51+
}
52+
return versionDV.longValue();
53+
}
54+
55+
long docSeqNo(int segmentDocId) throws IOException {
56+
assert seqNoDV.docID() < segmentDocId;
57+
if (seqNoDV.advanceExact(segmentDocId) == false) {
58+
throw new IllegalStateException("DocValues for field [" + SeqNoFieldMapper.NAME + "] is not found");
59+
}
60+
return seqNoDV.longValue();
61+
}
62+
63+
long docPrimaryTerm(int segmentDocId) throws IOException {
64+
if (primaryTermDV == null) {
65+
return -1L;
66+
}
67+
assert primaryTermDV.docID() < segmentDocId;
68+
// Use -1 for docs which don't have primary term. The caller considers those docs as nested docs.
69+
if (primaryTermDV.advanceExact(segmentDocId) == false) {
70+
return -1;
71+
}
72+
return primaryTermDV.longValue();
73+
}
74+
75+
boolean isTombstone(int segmentDocId) throws IOException {
76+
if (tombstoneDV == null) {
77+
return false;
78+
}
79+
assert tombstoneDV.docID() < segmentDocId;
80+
return tombstoneDV.advanceExact(segmentDocId) && tombstoneDV.longValue() > 0;
81+
}
82+
83+
boolean hasRecoverySource(int segmentDocId) throws IOException {
84+
if (recoverySource == null) {
85+
return false;
86+
}
87+
assert recoverySource.docID() < segmentDocId;
88+
return recoverySource.advanceExact(segmentDocId);
89+
}
90+
}

0 commit comments

Comments
 (0)