99
1010package org .elasticsearch .index .codec .tsdb ;
1111
12+ import org .apache .lucene .codecs .DocValuesProducer ;
13+ import org .apache .lucene .index .BaseTermsEnum ;
1214import org .apache .lucene .index .DocIDMerger ;
15+ import org .apache .lucene .index .DocValuesSkipIndexType ;
16+ import org .apache .lucene .index .EmptyDocValuesProducer ;
17+ import org .apache .lucene .index .FieldInfo ;
18+ import org .apache .lucene .index .ImpactsEnum ;
1319import org .apache .lucene .index .MergeState ;
1420import org .apache .lucene .index .NumericDocValues ;
21+ import org .apache .lucene .index .OrdinalMap ;
22+ import org .apache .lucene .index .PostingsEnum ;
1523import org .apache .lucene .index .SortedDocValues ;
1624import org .apache .lucene .index .SortedNumericDocValues ;
25+ import org .apache .lucene .index .SortedSetDocValues ;
26+ import org .apache .lucene .index .TermState ;
27+ import org .apache .lucene .index .TermsEnum ;
28+ import org .apache .lucene .util .AttributeSource ;
1729import org .apache .lucene .util .BytesRef ;
30+ import org .apache .lucene .util .LongValues ;
1831
1932import java .io .IOException ;
2033import java .util .List ;
34+ import java .util .function .BiFunction ;
2135
36+ /**
37+ * Contains mainly forked code from {@link org.apache.lucene.codecs.DocValuesConsumer}.
38+ */
2239class DocValuesConsumerUtil {
2340
41+ static final MergeStats UNSUPPORTED = new MergeStats (false , -1 , -1 );
42+
43+ abstract static class TsdbDocValuesProducer extends EmptyDocValuesProducer {
44+
45+ final MergeStats mergeStats ;
46+
47+ TsdbDocValuesProducer (MergeStats mergeStats ) {
48+ this .mergeStats = mergeStats ;
49+ }
50+
51+ }
52+
53+ record MergeStats (boolean supported , long sumNumValues , int sumNumDocsWithField ) {}
54+
55+ record FieldEntry (long docsWithFieldOffset , long numValues , int numDocsWithField ) {}
56+
57+ static MergeStats compatibleWithOptimizedMerge (
58+ boolean optimizedMergeEnabled ,
59+ FieldInfo mergeFieldInfo ,
60+ MergeState mergeState ,
61+ BiFunction <ES87TSDBDocValuesProducer , String , FieldEntry > function
62+ ) {
63+ if (optimizedMergeEnabled == false
64+ || mergeState .needsIndexSort == false
65+ || mergeFieldInfo .docValuesSkipIndexType () != DocValuesSkipIndexType .NONE ) {
66+ return UNSUPPORTED ;
67+ }
68+
69+ long sumNumValues = 0 ;
70+ int sumNumDocsWithField = 0 ;
71+
72+ for (DocValuesProducer docValuesProducer : mergeState .docValuesProducers ) {
73+ if (docValuesProducer instanceof ES87TSDBDocValuesProducer tsdbProducer ) {
74+ if (tsdbProducer .version != ES87TSDBDocValuesFormat .VERSION_CURRENT ) {
75+ return UNSUPPORTED ;
76+ }
77+
78+ var entry = function .apply (tsdbProducer , mergeFieldInfo .name );
79+ assert entry != null ;
80+ // TODO: support also fields with offsets
81+ if (entry .docsWithFieldOffset != -1 ) {
82+ return UNSUPPORTED ;
83+ }
84+ sumNumValues += entry .numValues ;
85+ sumNumDocsWithField += entry .numDocsWithField ;
86+ } else {
87+ return UNSUPPORTED ;
88+ }
89+ }
90+
91+ if (Math .toIntExact (sumNumValues ) != sumNumDocsWithField ) {
92+ return UNSUPPORTED ;
93+ }
94+ // Documents marked as deleted should be rare. Maybe in the case of noop operation?
95+ for (int i = 0 ; i < mergeState .liveDocs .length ; i ++) {
96+ if (mergeState .liveDocs [i ] != null ) {
97+ return UNSUPPORTED ;
98+ }
99+ }
100+
101+ return new MergeStats (true , sumNumValues , sumNumDocsWithField );
102+ }
103+
24104 static SortedNumericDocValues mergeSortedNumericValues (List <SortedNumericDocValuesSub > subs , boolean indexIsSorted ) throws IOException {
25105 long cost = 0 ;
26106 for (SortedNumericDocValuesSub sub : subs ) {
@@ -164,7 +244,7 @@ public int nextDoc() throws IOException {
164244 }
165245 }
166246
167- static SortedDocValues mergeSortedValues (List <SortedDocValuesSub > subs , boolean indexIsSorted ) throws IOException {
247+ static SortedDocValues mergeSortedValues (List <SortedDocValuesSub > subs , boolean indexIsSorted , OrdinalMap map ) throws IOException {
168248 long cost = 0 ;
169249 for (SortedDocValuesSub sub : subs ) {
170250 cost += sub .values .cost ();
@@ -210,25 +290,38 @@ public long cost() {
210290
211291 @ Override
212292 public int ordValue () throws IOException {
213- return current .values .ordValue ();
293+ int subOrd = current .values .ordValue ();
294+ assert subOrd != -1 ;
295+ return (int ) current .map .get (subOrd );
214296 }
215297
216298 @ Override
217299 public BytesRef lookupOrd (int ord ) throws IOException {
218- return current .values .lookupOrd (ord );
300+ int segmentNumber = map .getFirstSegmentNumber (ord );
301+ int segmentOrd = (int ) map .getFirstSegmentOrd (ord );
302+ return subs .get (segmentNumber ).values .lookupOrd (segmentOrd );
219303 }
220304
221305 @ Override
222306 public int getValueCount () {
223- return current .values .getValueCount ();
307+ return (int ) map .getValueCount ();
308+ }
309+
310+ @ Override
311+ public TermsEnum termsEnum () throws IOException {
312+ TermsEnum [] termsEnurmSubs = new TermsEnum [subs .size ()];
313+ for (int sub = 0 ; sub < termsEnurmSubs .length ; ++sub ) {
314+ termsEnurmSubs [sub ] = subs .get (sub ).values .termsEnum ();
315+ }
316+ return new MergedTermsEnum (map , termsEnurmSubs );
224317 }
225318 };
226319 }
227320
228321 static class SortedDocValuesSub extends DocIDMerger .Sub {
229322
323+ LongValues map ;
230324 final SortedDocValues values ;
231- int docID = -1 ;
232325
233326 SortedDocValuesSub (MergeState .DocMap docMap , SortedDocValues values ) {
234327 super (docMap );
@@ -238,7 +331,183 @@ static class SortedDocValuesSub extends DocIDMerger.Sub {
238331
239332 @ Override
240333 public int nextDoc () throws IOException {
241- return docID = values .nextDoc ();
334+ return values .nextDoc ();
335+ }
336+ }
337+
338+ static SortedSetDocValues mergeSortedSetValues (List <SortedSetDocValuesSub > subs , boolean indexIsSorted , OrdinalMap map )
339+ throws IOException {
340+ long cost = 0 ;
341+ for (SortedSetDocValuesSub sub : subs ) {
342+ cost += sub .values .cost ();
343+ }
344+ final long finalCost = cost ;
345+
346+ final DocIDMerger <SortedSetDocValuesSub > docIDMerger = DocIDMerger .of (subs , indexIsSorted );
347+
348+ return new SortedSetDocValues () {
349+ private int docID = -1 ;
350+ private SortedSetDocValuesSub current ;
351+
352+ @ Override
353+ public int docID () {
354+ return docID ;
355+ }
356+
357+ @ Override
358+ public int nextDoc () throws IOException {
359+ current = docIDMerger .next ();
360+ if (current == null ) {
361+ docID = NO_MORE_DOCS ;
362+ } else {
363+ docID = current .mappedDocID ;
364+ }
365+ return docID ;
366+ }
367+
368+ @ Override
369+ public int advance (int target ) throws IOException {
370+ throw new UnsupportedOperationException ();
371+ }
372+
373+ @ Override
374+ public boolean advanceExact (int target ) throws IOException {
375+ throw new UnsupportedOperationException ();
376+ }
377+
378+ @ Override
379+ public long cost () {
380+ return finalCost ;
381+ }
382+
383+ @ Override
384+ public long nextOrd () throws IOException {
385+ long subOrd = current .values .nextOrd ();
386+ return current .map .get (subOrd );
387+ }
388+
389+ @ Override
390+ public int docValueCount () {
391+ return current .values .docValueCount ();
392+ }
393+
394+ @ Override
395+ public BytesRef lookupOrd (long ord ) throws IOException {
396+ int segmentNumber = map .getFirstSegmentNumber (ord );
397+ int segmentOrd = (int ) map .getFirstSegmentOrd (ord );
398+ return subs .get (segmentNumber ).values .lookupOrd (segmentOrd );
399+ }
400+
401+ @ Override
402+ public long getValueCount () {
403+ return map .getValueCount ();
404+ }
405+
406+ @ Override
407+ public TermsEnum termsEnum () throws IOException {
408+ TermsEnum [] termsEnurmSubs = new TermsEnum [subs .size ()];
409+ for (int sub = 0 ; sub < termsEnurmSubs .length ; ++sub ) {
410+ termsEnurmSubs [sub ] = subs .get (sub ).values .termsEnum ();
411+ }
412+ return new MergedTermsEnum (map , termsEnurmSubs );
413+ }
414+ };
415+ }
416+
417+ static class SortedSetDocValuesSub extends DocIDMerger .Sub {
418+
419+ LongValues map ;
420+ final SortedSetDocValues values ;
421+
422+ SortedSetDocValuesSub (MergeState .DocMap docMap , SortedSetDocValues values ) {
423+ super (docMap );
424+ this .values = values ;
425+ assert values .docID () == -1 ;
426+ }
427+
428+ @ Override
429+ public int nextDoc () throws IOException {
430+ return values .nextDoc ();
431+ }
432+ }
433+
434+ static class MergedTermsEnum extends BaseTermsEnum {
435+
436+ private final TermsEnum [] subs ;
437+ private final OrdinalMap ordinalMap ;
438+ private final long valueCount ;
439+ private long ord = -1 ;
440+ private BytesRef term ;
441+
442+ MergedTermsEnum (OrdinalMap ordinalMap , TermsEnum [] subs ) {
443+ this .ordinalMap = ordinalMap ;
444+ this .subs = subs ;
445+ this .valueCount = ordinalMap .getValueCount ();
446+ }
447+
448+ @ Override
449+ public BytesRef term () throws IOException {
450+ return term ;
451+ }
452+
453+ @ Override
454+ public long ord () throws IOException {
455+ return ord ;
456+ }
457+
458+ @ Override
459+ public BytesRef next () throws IOException {
460+ if (++ord >= valueCount ) {
461+ return null ;
462+ }
463+ final int subNum = ordinalMap .getFirstSegmentNumber (ord );
464+ final TermsEnum sub = subs [subNum ];
465+ final long subOrd = ordinalMap .getFirstSegmentOrd (ord );
466+ do {
467+ term = sub .next ();
468+ } while (sub .ord () < subOrd );
469+ assert sub .ord () == subOrd ;
470+ return term ;
471+ }
472+
473+ @ Override
474+ public AttributeSource attributes () {
475+ throw new UnsupportedOperationException ();
476+ }
477+
478+ @ Override
479+ public SeekStatus seekCeil (BytesRef text ) throws IOException {
480+ throw new UnsupportedOperationException ();
481+ }
482+
483+ @ Override
484+ public void seekExact (long ord ) throws IOException {
485+ throw new UnsupportedOperationException ();
486+ }
487+
488+ @ Override
489+ public int docFreq () throws IOException {
490+ throw new UnsupportedOperationException ();
491+ }
492+
493+ @ Override
494+ public long totalTermFreq () throws IOException {
495+ throw new UnsupportedOperationException ();
496+ }
497+
498+ @ Override
499+ public PostingsEnum postings (PostingsEnum reuse , int flags ) throws IOException {
500+ throw new UnsupportedOperationException ();
501+ }
502+
503+ @ Override
504+ public ImpactsEnum impacts (int flags ) throws IOException {
505+ throw new UnsupportedOperationException ();
506+ }
507+
508+ @ Override
509+ public TermState termState () throws IOException {
510+ throw new UnsupportedOperationException ();
242511 }
243512 }
244513
0 commit comments