Skip to content

Commit 52f3084

Browse files
committed
actually use OrdinalMap when merging sorted and sorted dv
fixed sorted set dv added unit test with index sorting
1 parent 9bd2907 commit 52f3084

File tree

3 files changed

+406
-79
lines changed

3 files changed

+406
-79
lines changed

Diff for: server/src/main/java/org/elasticsearch/index/codec/tsdb/DocValuesConsumerUtil.java

+275-6
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,98 @@
99

1010
package org.elasticsearch.index.codec.tsdb;
1111

12+
import org.apache.lucene.codecs.DocValuesProducer;
13+
import org.apache.lucene.index.BaseTermsEnum;
1214
import org.apache.lucene.index.DocIDMerger;
15+
import org.apache.lucene.index.DocValuesSkipIndexType;
16+
import org.apache.lucene.index.EmptyDocValuesProducer;
17+
import org.apache.lucene.index.FieldInfo;
18+
import org.apache.lucene.index.ImpactsEnum;
1319
import org.apache.lucene.index.MergeState;
1420
import org.apache.lucene.index.NumericDocValues;
21+
import org.apache.lucene.index.OrdinalMap;
22+
import org.apache.lucene.index.PostingsEnum;
1523
import org.apache.lucene.index.SortedDocValues;
1624
import org.apache.lucene.index.SortedNumericDocValues;
25+
import org.apache.lucene.index.SortedSetDocValues;
26+
import org.apache.lucene.index.TermState;
27+
import org.apache.lucene.index.TermsEnum;
28+
import org.apache.lucene.util.AttributeSource;
1729
import org.apache.lucene.util.BytesRef;
30+
import org.apache.lucene.util.LongValues;
1831

1932
import java.io.IOException;
2033
import java.util.List;
34+
import java.util.function.BiFunction;
2135

36+
/**
37+
* Contains mainly forked code from {@link org.apache.lucene.codecs.DocValuesConsumer}.
38+
*/
2239
class DocValuesConsumerUtil {
2340

41+
static final MergeStats UNSUPPORTED = new MergeStats(false, -1, -1);
42+
43+
abstract static class TsdbDocValuesProducer extends EmptyDocValuesProducer {
44+
45+
final MergeStats mergeStats;
46+
47+
TsdbDocValuesProducer(MergeStats mergeStats) {
48+
this.mergeStats = mergeStats;
49+
}
50+
51+
}
52+
53+
record MergeStats(boolean supported, long sumNumValues, int sumNumDocsWithField) {}
54+
55+
record FieldEntry(long docsWithFieldOffset, long numValues, int numDocsWithField) {}
56+
57+
static MergeStats compatibleWithOptimizedMerge(
58+
boolean optimizedMergeEnabled,
59+
FieldInfo mergeFieldInfo,
60+
MergeState mergeState,
61+
BiFunction<ES87TSDBDocValuesProducer, String, FieldEntry> function
62+
) {
63+
if (optimizedMergeEnabled == false
64+
|| mergeState.needsIndexSort == false
65+
|| mergeFieldInfo.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE) {
66+
return UNSUPPORTED;
67+
}
68+
69+
long sumNumValues = 0;
70+
int sumNumDocsWithField = 0;
71+
72+
for (DocValuesProducer docValuesProducer : mergeState.docValuesProducers) {
73+
if (docValuesProducer instanceof ES87TSDBDocValuesProducer tsdbProducer) {
74+
if (tsdbProducer.version != ES87TSDBDocValuesFormat.VERSION_CURRENT) {
75+
return UNSUPPORTED;
76+
}
77+
78+
var entry = function.apply(tsdbProducer, mergeFieldInfo.name);
79+
assert entry != null;
80+
// TODO: support also fields with offsets
81+
if (entry.docsWithFieldOffset != -1) {
82+
return UNSUPPORTED;
83+
}
84+
sumNumValues += entry.numValues;
85+
sumNumDocsWithField += entry.numDocsWithField;
86+
} else {
87+
return UNSUPPORTED;
88+
}
89+
}
90+
91+
if (Math.toIntExact(sumNumValues) != sumNumDocsWithField) {
92+
return UNSUPPORTED;
93+
}
94+
// Documents marked as deleted should be rare. Maybe in the case of noop operation?
95+
for (int i = 0; i < mergeState.liveDocs.length; i++) {
96+
if (mergeState.liveDocs[i] != null) {
97+
return UNSUPPORTED;
98+
}
99+
}
100+
101+
return new MergeStats(true, sumNumValues, sumNumDocsWithField);
102+
}
103+
24104
static SortedNumericDocValues mergeSortedNumericValues(List<SortedNumericDocValuesSub> subs, boolean indexIsSorted) throws IOException {
25105
long cost = 0;
26106
for (SortedNumericDocValuesSub sub : subs) {
@@ -164,7 +244,7 @@ public int nextDoc() throws IOException {
164244
}
165245
}
166246

167-
static SortedDocValues mergeSortedValues(List<SortedDocValuesSub> subs, boolean indexIsSorted) throws IOException {
247+
static SortedDocValues mergeSortedValues(List<SortedDocValuesSub> subs, boolean indexIsSorted, OrdinalMap map) throws IOException {
168248
long cost = 0;
169249
for (SortedDocValuesSub sub : subs) {
170250
cost += sub.values.cost();
@@ -210,25 +290,38 @@ public long cost() {
210290

211291
@Override
212292
public int ordValue() throws IOException {
213-
return current.values.ordValue();
293+
int subOrd = current.values.ordValue();
294+
assert subOrd != -1;
295+
return (int) current.map.get(subOrd);
214296
}
215297

216298
@Override
217299
public BytesRef lookupOrd(int ord) throws IOException {
218-
return current.values.lookupOrd(ord);
300+
int segmentNumber = map.getFirstSegmentNumber(ord);
301+
int segmentOrd = (int) map.getFirstSegmentOrd(ord);
302+
return subs.get(segmentNumber).values.lookupOrd(segmentOrd);
219303
}
220304

221305
@Override
222306
public int getValueCount() {
223-
return current.values.getValueCount();
307+
return (int) map.getValueCount();
308+
}
309+
310+
@Override
311+
public TermsEnum termsEnum() throws IOException {
312+
TermsEnum[] termsEnurmSubs = new TermsEnum[subs.size()];
313+
for (int sub = 0; sub < termsEnurmSubs.length; ++sub) {
314+
termsEnurmSubs[sub] = subs.get(sub).values.termsEnum();
315+
}
316+
return new MergedTermsEnum(map, termsEnurmSubs);
224317
}
225318
};
226319
}
227320

228321
static class SortedDocValuesSub extends DocIDMerger.Sub {
229322

323+
LongValues map;
230324
final SortedDocValues values;
231-
int docID = -1;
232325

233326
SortedDocValuesSub(MergeState.DocMap docMap, SortedDocValues values) {
234327
super(docMap);
@@ -238,7 +331,183 @@ static class SortedDocValuesSub extends DocIDMerger.Sub {
238331

239332
@Override
240333
public int nextDoc() throws IOException {
241-
return docID = values.nextDoc();
334+
return values.nextDoc();
335+
}
336+
}
337+
338+
static SortedSetDocValues mergeSortedSetValues(List<SortedSetDocValuesSub> subs, boolean indexIsSorted, OrdinalMap map)
339+
throws IOException {
340+
long cost = 0;
341+
for (SortedSetDocValuesSub sub : subs) {
342+
cost += sub.values.cost();
343+
}
344+
final long finalCost = cost;
345+
346+
final DocIDMerger<SortedSetDocValuesSub> docIDMerger = DocIDMerger.of(subs, indexIsSorted);
347+
348+
return new SortedSetDocValues() {
349+
private int docID = -1;
350+
private SortedSetDocValuesSub current;
351+
352+
@Override
353+
public int docID() {
354+
return docID;
355+
}
356+
357+
@Override
358+
public int nextDoc() throws IOException {
359+
current = docIDMerger.next();
360+
if (current == null) {
361+
docID = NO_MORE_DOCS;
362+
} else {
363+
docID = current.mappedDocID;
364+
}
365+
return docID;
366+
}
367+
368+
@Override
369+
public int advance(int target) throws IOException {
370+
throw new UnsupportedOperationException();
371+
}
372+
373+
@Override
374+
public boolean advanceExact(int target) throws IOException {
375+
throw new UnsupportedOperationException();
376+
}
377+
378+
@Override
379+
public long cost() {
380+
return finalCost;
381+
}
382+
383+
@Override
384+
public long nextOrd() throws IOException {
385+
long subOrd = current.values.nextOrd();
386+
return current.map.get(subOrd);
387+
}
388+
389+
@Override
390+
public int docValueCount() {
391+
return current.values.docValueCount();
392+
}
393+
394+
@Override
395+
public BytesRef lookupOrd(long ord) throws IOException {
396+
int segmentNumber = map.getFirstSegmentNumber(ord);
397+
int segmentOrd = (int) map.getFirstSegmentOrd(ord);
398+
return subs.get(segmentNumber).values.lookupOrd(segmentOrd);
399+
}
400+
401+
@Override
402+
public long getValueCount() {
403+
return map.getValueCount();
404+
}
405+
406+
@Override
407+
public TermsEnum termsEnum() throws IOException {
408+
TermsEnum[] termsEnurmSubs = new TermsEnum[subs.size()];
409+
for (int sub = 0; sub < termsEnurmSubs.length; ++sub) {
410+
termsEnurmSubs[sub] = subs.get(sub).values.termsEnum();
411+
}
412+
return new MergedTermsEnum(map, termsEnurmSubs);
413+
}
414+
};
415+
}
416+
417+
static class SortedSetDocValuesSub extends DocIDMerger.Sub {
418+
419+
LongValues map;
420+
final SortedSetDocValues values;
421+
422+
SortedSetDocValuesSub(MergeState.DocMap docMap, SortedSetDocValues values) {
423+
super(docMap);
424+
this.values = values;
425+
assert values.docID() == -1;
426+
}
427+
428+
@Override
429+
public int nextDoc() throws IOException {
430+
return values.nextDoc();
431+
}
432+
}
433+
434+
static class MergedTermsEnum extends BaseTermsEnum {
435+
436+
private final TermsEnum[] subs;
437+
private final OrdinalMap ordinalMap;
438+
private final long valueCount;
439+
private long ord = -1;
440+
private BytesRef term;
441+
442+
MergedTermsEnum(OrdinalMap ordinalMap, TermsEnum[] subs) {
443+
this.ordinalMap = ordinalMap;
444+
this.subs = subs;
445+
this.valueCount = ordinalMap.getValueCount();
446+
}
447+
448+
@Override
449+
public BytesRef term() throws IOException {
450+
return term;
451+
}
452+
453+
@Override
454+
public long ord() throws IOException {
455+
return ord;
456+
}
457+
458+
@Override
459+
public BytesRef next() throws IOException {
460+
if (++ord >= valueCount) {
461+
return null;
462+
}
463+
final int subNum = ordinalMap.getFirstSegmentNumber(ord);
464+
final TermsEnum sub = subs[subNum];
465+
final long subOrd = ordinalMap.getFirstSegmentOrd(ord);
466+
do {
467+
term = sub.next();
468+
} while (sub.ord() < subOrd);
469+
assert sub.ord() == subOrd;
470+
return term;
471+
}
472+
473+
@Override
474+
public AttributeSource attributes() {
475+
throw new UnsupportedOperationException();
476+
}
477+
478+
@Override
479+
public SeekStatus seekCeil(BytesRef text) throws IOException {
480+
throw new UnsupportedOperationException();
481+
}
482+
483+
@Override
484+
public void seekExact(long ord) throws IOException {
485+
throw new UnsupportedOperationException();
486+
}
487+
488+
@Override
489+
public int docFreq() throws IOException {
490+
throw new UnsupportedOperationException();
491+
}
492+
493+
@Override
494+
public long totalTermFreq() throws IOException {
495+
throw new UnsupportedOperationException();
496+
}
497+
498+
@Override
499+
public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
500+
throw new UnsupportedOperationException();
501+
}
502+
503+
@Override
504+
public ImpactsEnum impacts(int flags) throws IOException {
505+
throw new UnsupportedOperationException();
506+
}
507+
508+
@Override
509+
public TermState termState() throws IOException {
510+
throw new UnsupportedOperationException();
242511
}
243512
}
244513

0 commit comments

Comments
 (0)