9
9
10
10
package org .elasticsearch .index .codec .tsdb ;
11
11
12
+ import org .apache .lucene .codecs .DocValuesProducer ;
13
+ import org .apache .lucene .index .BaseTermsEnum ;
12
14
import org .apache .lucene .index .DocIDMerger ;
15
+ import org .apache .lucene .index .DocValuesSkipIndexType ;
16
+ import org .apache .lucene .index .EmptyDocValuesProducer ;
17
+ import org .apache .lucene .index .FieldInfo ;
18
+ import org .apache .lucene .index .ImpactsEnum ;
13
19
import org .apache .lucene .index .MergeState ;
14
20
import org .apache .lucene .index .NumericDocValues ;
21
+ import org .apache .lucene .index .OrdinalMap ;
22
+ import org .apache .lucene .index .PostingsEnum ;
15
23
import org .apache .lucene .index .SortedDocValues ;
16
24
import org .apache .lucene .index .SortedNumericDocValues ;
25
+ import org .apache .lucene .index .SortedSetDocValues ;
26
+ import org .apache .lucene .index .TermState ;
27
+ import org .apache .lucene .index .TermsEnum ;
28
+ import org .apache .lucene .util .AttributeSource ;
17
29
import org .apache .lucene .util .BytesRef ;
30
+ import org .apache .lucene .util .LongValues ;
18
31
19
32
import java .io .IOException ;
20
33
import java .util .List ;
34
+ import java .util .function .BiFunction ;
21
35
36
+ /**
37
+ * Contains mainly forked code from {@link org.apache.lucene.codecs.DocValuesConsumer}.
38
+ */
22
39
class DocValuesConsumerUtil {
23
40
41
+ static final MergeStats UNSUPPORTED = new MergeStats (false , -1 , -1 );
42
+
43
+ abstract static class TsdbDocValuesProducer extends EmptyDocValuesProducer {
44
+
45
+ final MergeStats mergeStats ;
46
+
47
+ TsdbDocValuesProducer (MergeStats mergeStats ) {
48
+ this .mergeStats = mergeStats ;
49
+ }
50
+
51
+ }
52
+
53
+ record MergeStats (boolean supported , long sumNumValues , int sumNumDocsWithField ) {}
54
+
55
+ record FieldEntry (long docsWithFieldOffset , long numValues , int numDocsWithField ) {}
56
+
57
+ static MergeStats compatibleWithOptimizedMerge (
58
+ boolean optimizedMergeEnabled ,
59
+ FieldInfo mergeFieldInfo ,
60
+ MergeState mergeState ,
61
+ BiFunction <ES87TSDBDocValuesProducer , String , FieldEntry > function
62
+ ) {
63
+ if (optimizedMergeEnabled == false
64
+ || mergeState .needsIndexSort == false
65
+ || mergeFieldInfo .docValuesSkipIndexType () != DocValuesSkipIndexType .NONE ) {
66
+ return UNSUPPORTED ;
67
+ }
68
+
69
+ long sumNumValues = 0 ;
70
+ int sumNumDocsWithField = 0 ;
71
+
72
+ for (DocValuesProducer docValuesProducer : mergeState .docValuesProducers ) {
73
+ if (docValuesProducer instanceof ES87TSDBDocValuesProducer tsdbProducer ) {
74
+ if (tsdbProducer .version != ES87TSDBDocValuesFormat .VERSION_CURRENT ) {
75
+ return UNSUPPORTED ;
76
+ }
77
+
78
+ var entry = function .apply (tsdbProducer , mergeFieldInfo .name );
79
+ assert entry != null ;
80
+ // TODO: support also fields with offsets
81
+ if (entry .docsWithFieldOffset != -1 ) {
82
+ return UNSUPPORTED ;
83
+ }
84
+ sumNumValues += entry .numValues ;
85
+ sumNumDocsWithField += entry .numDocsWithField ;
86
+ } else {
87
+ return UNSUPPORTED ;
88
+ }
89
+ }
90
+
91
+ if (Math .toIntExact (sumNumValues ) != sumNumDocsWithField ) {
92
+ return UNSUPPORTED ;
93
+ }
94
+ // Documents marked as deleted should be rare. Maybe in the case of noop operation?
95
+ for (int i = 0 ; i < mergeState .liveDocs .length ; i ++) {
96
+ if (mergeState .liveDocs [i ] != null ) {
97
+ return UNSUPPORTED ;
98
+ }
99
+ }
100
+
101
+ return new MergeStats (true , sumNumValues , sumNumDocsWithField );
102
+ }
103
+
24
104
static SortedNumericDocValues mergeSortedNumericValues (List <SortedNumericDocValuesSub > subs , boolean indexIsSorted ) throws IOException {
25
105
long cost = 0 ;
26
106
for (SortedNumericDocValuesSub sub : subs ) {
@@ -164,7 +244,7 @@ public int nextDoc() throws IOException {
164
244
}
165
245
}
166
246
167
- static SortedDocValues mergeSortedValues (List <SortedDocValuesSub > subs , boolean indexIsSorted ) throws IOException {
247
+ static SortedDocValues mergeSortedValues (List <SortedDocValuesSub > subs , boolean indexIsSorted , OrdinalMap map ) throws IOException {
168
248
long cost = 0 ;
169
249
for (SortedDocValuesSub sub : subs ) {
170
250
cost += sub .values .cost ();
@@ -210,25 +290,38 @@ public long cost() {
210
290
211
291
@ Override
212
292
public int ordValue () throws IOException {
213
- return current .values .ordValue ();
293
+ int subOrd = current .values .ordValue ();
294
+ assert subOrd != -1 ;
295
+ return (int ) current .map .get (subOrd );
214
296
}
215
297
216
298
@ Override
217
299
public BytesRef lookupOrd (int ord ) throws IOException {
218
- return current .values .lookupOrd (ord );
300
+ int segmentNumber = map .getFirstSegmentNumber (ord );
301
+ int segmentOrd = (int ) map .getFirstSegmentOrd (ord );
302
+ return subs .get (segmentNumber ).values .lookupOrd (segmentOrd );
219
303
}
220
304
221
305
@ Override
222
306
public int getValueCount () {
223
- return current .values .getValueCount ();
307
+ return (int ) map .getValueCount ();
308
+ }
309
+
310
+ @ Override
311
+ public TermsEnum termsEnum () throws IOException {
312
+ TermsEnum [] termsEnurmSubs = new TermsEnum [subs .size ()];
313
+ for (int sub = 0 ; sub < termsEnurmSubs .length ; ++sub ) {
314
+ termsEnurmSubs [sub ] = subs .get (sub ).values .termsEnum ();
315
+ }
316
+ return new MergedTermsEnum (map , termsEnurmSubs );
224
317
}
225
318
};
226
319
}
227
320
228
321
static class SortedDocValuesSub extends DocIDMerger .Sub {
229
322
323
+ LongValues map ;
230
324
final SortedDocValues values ;
231
- int docID = -1 ;
232
325
233
326
SortedDocValuesSub (MergeState .DocMap docMap , SortedDocValues values ) {
234
327
super (docMap );
@@ -238,7 +331,183 @@ static class SortedDocValuesSub extends DocIDMerger.Sub {
238
331
239
332
@ Override
240
333
public int nextDoc () throws IOException {
241
- return docID = values .nextDoc ();
334
+ return values .nextDoc ();
335
+ }
336
+ }
337
+
338
+ static SortedSetDocValues mergeSortedSetValues (List <SortedSetDocValuesSub > subs , boolean indexIsSorted , OrdinalMap map )
339
+ throws IOException {
340
+ long cost = 0 ;
341
+ for (SortedSetDocValuesSub sub : subs ) {
342
+ cost += sub .values .cost ();
343
+ }
344
+ final long finalCost = cost ;
345
+
346
+ final DocIDMerger <SortedSetDocValuesSub > docIDMerger = DocIDMerger .of (subs , indexIsSorted );
347
+
348
+ return new SortedSetDocValues () {
349
+ private int docID = -1 ;
350
+ private SortedSetDocValuesSub current ;
351
+
352
+ @ Override
353
+ public int docID () {
354
+ return docID ;
355
+ }
356
+
357
+ @ Override
358
+ public int nextDoc () throws IOException {
359
+ current = docIDMerger .next ();
360
+ if (current == null ) {
361
+ docID = NO_MORE_DOCS ;
362
+ } else {
363
+ docID = current .mappedDocID ;
364
+ }
365
+ return docID ;
366
+ }
367
+
368
+ @ Override
369
+ public int advance (int target ) throws IOException {
370
+ throw new UnsupportedOperationException ();
371
+ }
372
+
373
+ @ Override
374
+ public boolean advanceExact (int target ) throws IOException {
375
+ throw new UnsupportedOperationException ();
376
+ }
377
+
378
+ @ Override
379
+ public long cost () {
380
+ return finalCost ;
381
+ }
382
+
383
+ @ Override
384
+ public long nextOrd () throws IOException {
385
+ long subOrd = current .values .nextOrd ();
386
+ return current .map .get (subOrd );
387
+ }
388
+
389
+ @ Override
390
+ public int docValueCount () {
391
+ return current .values .docValueCount ();
392
+ }
393
+
394
+ @ Override
395
+ public BytesRef lookupOrd (long ord ) throws IOException {
396
+ int segmentNumber = map .getFirstSegmentNumber (ord );
397
+ int segmentOrd = (int ) map .getFirstSegmentOrd (ord );
398
+ return subs .get (segmentNumber ).values .lookupOrd (segmentOrd );
399
+ }
400
+
401
+ @ Override
402
+ public long getValueCount () {
403
+ return map .getValueCount ();
404
+ }
405
+
406
+ @ Override
407
+ public TermsEnum termsEnum () throws IOException {
408
+ TermsEnum [] termsEnurmSubs = new TermsEnum [subs .size ()];
409
+ for (int sub = 0 ; sub < termsEnurmSubs .length ; ++sub ) {
410
+ termsEnurmSubs [sub ] = subs .get (sub ).values .termsEnum ();
411
+ }
412
+ return new MergedTermsEnum (map , termsEnurmSubs );
413
+ }
414
+ };
415
+ }
416
+
417
+ static class SortedSetDocValuesSub extends DocIDMerger .Sub {
418
+
419
+ LongValues map ;
420
+ final SortedSetDocValues values ;
421
+
422
+ SortedSetDocValuesSub (MergeState .DocMap docMap , SortedSetDocValues values ) {
423
+ super (docMap );
424
+ this .values = values ;
425
+ assert values .docID () == -1 ;
426
+ }
427
+
428
+ @ Override
429
+ public int nextDoc () throws IOException {
430
+ return values .nextDoc ();
431
+ }
432
+ }
433
+
434
+ static class MergedTermsEnum extends BaseTermsEnum {
435
+
436
+ private final TermsEnum [] subs ;
437
+ private final OrdinalMap ordinalMap ;
438
+ private final long valueCount ;
439
+ private long ord = -1 ;
440
+ private BytesRef term ;
441
+
442
+ MergedTermsEnum (OrdinalMap ordinalMap , TermsEnum [] subs ) {
443
+ this .ordinalMap = ordinalMap ;
444
+ this .subs = subs ;
445
+ this .valueCount = ordinalMap .getValueCount ();
446
+ }
447
+
448
+ @ Override
449
+ public BytesRef term () throws IOException {
450
+ return term ;
451
+ }
452
+
453
+ @ Override
454
+ public long ord () throws IOException {
455
+ return ord ;
456
+ }
457
+
458
+ @ Override
459
+ public BytesRef next () throws IOException {
460
+ if (++ord >= valueCount ) {
461
+ return null ;
462
+ }
463
+ final int subNum = ordinalMap .getFirstSegmentNumber (ord );
464
+ final TermsEnum sub = subs [subNum ];
465
+ final long subOrd = ordinalMap .getFirstSegmentOrd (ord );
466
+ do {
467
+ term = sub .next ();
468
+ } while (sub .ord () < subOrd );
469
+ assert sub .ord () == subOrd ;
470
+ return term ;
471
+ }
472
+
473
+ @ Override
474
+ public AttributeSource attributes () {
475
+ throw new UnsupportedOperationException ();
476
+ }
477
+
478
+ @ Override
479
+ public SeekStatus seekCeil (BytesRef text ) throws IOException {
480
+ throw new UnsupportedOperationException ();
481
+ }
482
+
483
+ @ Override
484
+ public void seekExact (long ord ) throws IOException {
485
+ throw new UnsupportedOperationException ();
486
+ }
487
+
488
+ @ Override
489
+ public int docFreq () throws IOException {
490
+ throw new UnsupportedOperationException ();
491
+ }
492
+
493
+ @ Override
494
+ public long totalTermFreq () throws IOException {
495
+ throw new UnsupportedOperationException ();
496
+ }
497
+
498
+ @ Override
499
+ public PostingsEnum postings (PostingsEnum reuse , int flags ) throws IOException {
500
+ throw new UnsupportedOperationException ();
501
+ }
502
+
503
+ @ Override
504
+ public ImpactsEnum impacts (int flags ) throws IOException {
505
+ throw new UnsupportedOperationException ();
506
+ }
507
+
508
+ @ Override
509
+ public TermState termState () throws IOException {
510
+ throw new UnsupportedOperationException ();
242
511
}
243
512
}
244
513
0 commit comments