|
31 | 31 | import org.apache.lucene.document.TextField;
|
32 | 32 | import org.apache.lucene.index.DirectoryReader;
|
33 | 33 | import org.apache.lucene.index.IndexReader;
|
| 34 | +import org.apache.lucene.index.IndexWriter; |
34 | 35 | import org.apache.lucene.index.IndexWriterConfig;
|
35 | 36 | import org.apache.lucene.index.LeafReaderContext;
|
36 | 37 | import org.apache.lucene.index.NoMergePolicy;
|
|
65 | 66 | import org.apache.lucene.search.spans.SpanNearQuery;
|
66 | 67 | import org.apache.lucene.search.spans.SpanTermQuery;
|
67 | 68 | import org.apache.lucene.store.Directory;
|
| 69 | +import org.apache.lucene.store.IOContext; |
| 70 | +import org.apache.lucene.store.IndexInput; |
| 71 | +import org.apache.lucene.store.IndexOutput; |
68 | 72 | import org.apache.lucene.util.BytesRef;
|
69 | 73 | import org.apache.lucene.util.FixedBitSet;
|
| 74 | +import org.apache.lucene.util.bkd.BKDReader; |
| 75 | +import org.apache.lucene.util.bkd.BKDWriter; |
70 | 76 | import org.elasticsearch.action.search.SearchTask;
|
71 | 77 | import org.elasticsearch.common.settings.Settings;
|
72 | 78 | import org.elasticsearch.index.mapper.DateFieldMapper;
|
|
88 | 94 | import java.util.Collections;
|
89 | 95 | import java.util.List;
|
90 | 96 |
|
| 97 | +import static org.elasticsearch.search.query.QueryPhase.estimateMedianValue; |
| 98 | +import static org.elasticsearch.search.query.QueryPhase.estimatePointCount; |
91 | 99 | import static org.hamcrest.Matchers.anyOf;
|
92 | 100 | import static org.hamcrest.Matchers.equalTo;
|
93 | 101 | import static org.hamcrest.Matchers.greaterThanOrEqualTo;
|
94 | 102 | import static org.hamcrest.Matchers.instanceOf;
|
| 103 | +import static org.hamcrest.Matchers.lessThan; |
| 104 | +import static org.hamcrest.Matchers.lessThanOrEqualTo; |
95 | 105 | import static org.mockito.Mockito.mock;
|
96 | 106 | import static org.mockito.Mockito.when;
|
97 | 107 | import static org.mockito.Mockito.spy;
|
@@ -652,9 +662,9 @@ public void testNumericLongOrDateSortOptimization() throws Exception {
|
652 | 662 | TestSearchContext searchContext = spy(new TestSearchContext(null, indexShard));
|
653 | 663 | when(searchContext.mapperService()).thenReturn(mapperService);
|
654 | 664 |
|
655 |
| - final int numDocs = scaledRandomIntBetween(50, 100); |
| 665 | + final int numDocs = 4000; |
656 | 666 | Directory dir = newDirectory();
|
657 |
| - RandomIndexWriter writer = new RandomIndexWriter(random(), dir); |
| 667 | + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(null)); |
658 | 668 | for (int i = 0; i < numDocs; ++i) {
|
659 | 669 | Document doc = new Document();
|
660 | 670 | long longValue = randomLongBetween(-10000000L, 10000000L);
|
@@ -708,6 +718,68 @@ public void testNumericLongOrDateSortOptimization() throws Exception {
|
708 | 718 | dir.close();
|
709 | 719 | }
|
710 | 720 |
|
| 721 | + public void testIndexHasDuplicateData() throws IOException { |
| 722 | + int valuesCount = 5000; |
| 723 | + int maxPointsInLeafNode = 40; |
| 724 | + long expectedMedianCount = (long)(valuesCount * 0.6); |
| 725 | + long expectedMedianValue = randomLongBetween(-10000000L, 10000000L); |
| 726 | + |
| 727 | + try (Directory dir = newDirectory()) { |
| 728 | + BKDWriter w = new BKDWriter(valuesCount, dir, "tmp", 1, 1, 8, maxPointsInLeafNode, 1, valuesCount); |
| 729 | + byte[] longBytes = new byte[8]; |
| 730 | + for (int docId = 0; docId < valuesCount; docId++) { |
| 731 | + long value = docId < expectedMedianCount ? expectedMedianValue : randomLongBetween(-10000000L, 10000000L); |
| 732 | + LongPoint.encodeDimension(value, longBytes, 0); |
| 733 | + w.add(longBytes, docId); |
| 734 | + } |
| 735 | + long indexFP; |
| 736 | + try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) { |
| 737 | + indexFP = w.finish(out); |
| 738 | + } |
| 739 | + try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) { |
| 740 | + in.seek(indexFP); |
| 741 | + BKDReader r = new BKDReader(in); |
| 742 | + long medianValue = estimateMedianValue(r); |
| 743 | + long medianCount = estimatePointCount(r, medianValue, medianValue); |
| 744 | + |
| 745 | + assertEquals(expectedMedianValue, medianValue); |
| 746 | + assertThat(medianCount, greaterThanOrEqualTo((long) (valuesCount/2))); //assert that Index has duplicate data |
| 747 | + assertThat(medianCount, greaterThanOrEqualTo((long) (0.75 * expectedMedianCount))); |
| 748 | + assertThat(medianCount, lessThanOrEqualTo((long) (1.25 * expectedMedianCount))); |
| 749 | + } |
| 750 | + } |
| 751 | + } |
| 752 | + |
| 753 | + public void testIndexHasNotDuplicateData() throws IOException { |
| 754 | + int valuesCount = 5000; |
| 755 | + int maxPointsInLeafNode = 40; |
| 756 | + long expectedMedianCount = (long)(valuesCount * 0.35); |
| 757 | + long expectedMedianValue = randomLongBetween(-10000000L, 10000000L); |
| 758 | + |
| 759 | + try (Directory dir = newDirectory()) { |
| 760 | + BKDWriter w = new BKDWriter(valuesCount, dir, "tmp", 1, 1, 8, maxPointsInLeafNode, 1, valuesCount); |
| 761 | + byte[] longBytes = new byte[8]; |
| 762 | + for (int docId = 0; docId < valuesCount; docId++) { |
| 763 | + long value = docId < expectedMedianCount ? expectedMedianValue : randomLongBetween(-10000000L, 10000000L); |
| 764 | + LongPoint.encodeDimension(value, longBytes, 0); |
| 765 | + w.add(longBytes, docId); |
| 766 | + } |
| 767 | + long indexFP; |
| 768 | + try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) { |
| 769 | + indexFP = w.finish(out); |
| 770 | + } |
| 771 | + try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) { |
| 772 | + in.seek(indexFP); |
| 773 | + BKDReader r = new BKDReader(in); |
| 774 | + long medianValue = estimateMedianValue(r); |
| 775 | + long medianCount = estimatePointCount(r, medianValue, medianValue); |
| 776 | + |
| 777 | + // can't make any assertion about the values of medianValue and medianCount |
| 778 | + // as BKDReader::estimatePointCount can be really off for non-duplicate data |
| 779 | + assertThat(medianCount, lessThan((long) (valuesCount/2))); //assert that Index does NOT have duplicate data |
| 780 | + } |
| 781 | + } |
| 782 | + } |
711 | 783 |
|
712 | 784 | public void testMaxScoreQueryVisitor() {
|
713 | 785 | BitSetProducer producer = context -> new FixedBitSet(1);
|
@@ -760,42 +832,6 @@ public void testMaxScoreQueryVisitor() {
|
760 | 832 | }
|
761 | 833 | }
|
762 | 834 |
|
763 |
| - public void testNumericLongSortOptimizationDocsHaveTheSameValue() throws Exception { |
764 |
| - final String fieldNameLong = "long-field"; |
765 |
| - MappedFieldType fieldTypeLong = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG); |
766 |
| - MapperService mapperService = mock(MapperService.class); |
767 |
| - when(mapperService.fullName(fieldNameLong)).thenReturn(fieldTypeLong); |
768 |
| - TestSearchContext searchContext = spy(new TestSearchContext(null, indexShard)); |
769 |
| - when(searchContext.mapperService()).thenReturn(mapperService); |
770 |
| - |
771 |
| - final int numDocs = scaledRandomIntBetween(5, 10); |
772 |
| - long longValue = randomLongBetween(-10000000L, 10000000L); // all docs have the same value |
773 |
| - Directory dir = newDirectory(); |
774 |
| - RandomIndexWriter writer = new RandomIndexWriter(random(), dir); |
775 |
| - for (int i = 0; i < numDocs; ++i) { |
776 |
| - Document doc = new Document(); |
777 |
| - doc.add(new LongPoint(fieldNameLong, longValue)); |
778 |
| - doc.add(new NumericDocValuesField(fieldNameLong, longValue)); |
779 |
| - writer.addDocument(doc); |
780 |
| - } |
781 |
| - writer.close(); |
782 |
| - final IndexReader reader = DirectoryReader.open(dir); |
783 |
| - IndexSearcher searcher = getAssertingSortOptimizedSearcher(reader, 1); |
784 |
| - |
785 |
| - final SortField sortFieldLong = new SortField(fieldNameLong, SortField.Type.LONG); |
786 |
| - sortFieldLong.setMissingValue(Long.MAX_VALUE); |
787 |
| - final Sort longSort = new Sort(sortFieldLong); |
788 |
| - SortAndFormats sortAndFormats = new SortAndFormats(longSort, new DocValueFormat[]{DocValueFormat.RAW}); |
789 |
| - searchContext.sort(sortAndFormats); |
790 |
| - searchContext.parsedQuery(new ParsedQuery(new MatchAllDocsQuery())); |
791 |
| - searchContext.setTask(new SearchTask(123L, "", "", "", null, Collections.emptyMap())); |
792 |
| - searchContext.setSize(10); |
793 |
| - QueryPhase.execute(searchContext, searcher, checkCancelled -> {}); |
794 |
| - assertSortResults(searchContext.queryResult().topDocs().topDocs, (long) numDocs, false); |
795 |
| - reader.close(); |
796 |
| - dir.close(); |
797 |
| - } |
798 |
| - |
799 | 835 | // used to check that numeric long or date sort optimization was run
|
800 | 836 | private static IndexSearcher getAssertingSortOptimizedSearcher(IndexReader reader, int queryType) {
|
801 | 837 | return new IndexSearcher(reader) {
|
|
0 commit comments