From a40e583318a0e4cb506d67248919d141341ab941 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Thu, 28 Feb 2019 11:41:45 +0100 Subject: [PATCH 1/4] WIP --- .../search/query/TopDocsCollectorContext.java | 21 ++++++++++++++ .../search/query/QueryPhaseTests.java | 28 +++++++++++++------ 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java b/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java index 1ccc8f4cb92db..17b367a4f3376 100644 --- a/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java +++ b/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java @@ -19,12 +19,18 @@ package org.elasticsearch.search.query; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PointValues; import org.apache.lucene.index.Term; +import org.apache.lucene.index.Terms; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.Collector; import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiCollector; @@ -125,6 +131,7 @@ private EmptyTopDocsCollectorContext(IndexReader reader, Query query, } } + @Override Collector create(Collector in) { assert in == null; return collector; @@ -357,6 +364,20 @@ static int shortcutTotalHitCount(IndexReader reader, Query query) throws IOExcep count += context.reader().docFreq(term); } return count; + } else if (query.getClass() == DocValuesFieldExistsQuery.class && reader.hasDeletions() == false) { + final String field = ((DocValuesFieldExistsQuery) query).getField(); + int count = 0; + for (LeafReaderContext context : reader.leaves()) { + FieldInfos fieldInfos = context.reader().getFieldInfos(); + FieldInfo fieldInfo = fieldInfos.fieldInfo(field); + if (fieldInfo.getPointIndexDimensionCount() > 0) { + count += PointValues.getDocCount(context.reader(), field); + } else if (fieldInfo.getIndexOptions() != IndexOptions.NONE) { + Terms terms = context.reader().terms(field); + count += terms.getDocCount(); + } + } + return count; } else { return -1; } diff --git a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java index a321ff9c1a80a..52fa09208d15d 100644 --- a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java +++ b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java @@ -22,6 +22,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; @@ -35,6 +36,7 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Collector; import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.FilterCollector; @@ -50,6 +52,7 @@ import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.search.Weight; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.action.search.SearchTask; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.query.ParsedQuery; @@ -92,18 +95,20 @@ public void tearDown() throws Exception { closeShards(indexShard); } - private void countTestCase(Query query, IndexReader reader, boolean shouldCollect) throws Exception { + private void countTestCase(Query query, IndexReader reader, boolean shouldCollectSearch, boolean shouldCollectCount) throws Exception { TestSearchContext context = new TestSearchContext(null, indexShard); context.parsedQuery(new ParsedQuery(query)); context.setSize(0); context.setTask(new SearchTask(123L, "", "", "", null, Collections.emptyMap())); - final IndexSearcher searcher = shouldCollect ? new IndexSearcher(reader) : + final IndexSearcher searcher = shouldCollectSearch ? new IndexSearcher(reader) : getAssertingEarlyTerminationSearcher(reader, 0); final boolean rescore = QueryPhase.execute(context, searcher, checkCancelled -> {}); assertFalse(rescore); - assertEquals(searcher.count(query), context.queryResult().topDocs().topDocs.totalHits.value); + IndexSearcher countSearcher = shouldCollectCount ? new IndexSearcher(reader) : + getAssertingEarlyTerminationSearcher(reader, 0); + assertEquals(countSearcher.count(query), context.queryResult().topDocs().topDocs.totalHits.value); } private void countTestCase(boolean withDeletions) throws Exception { @@ -115,9 +120,11 @@ private void countTestCase(boolean withDeletions) throws Exception { Document doc = new Document(); if (randomBoolean()) { doc.add(new StringField("foo", "bar", Store.NO)); + doc.add(new SortedSetDocValuesField("foo", new BytesRef("bar"))); } if (randomBoolean()) { - doc.add(new StringField("foo", "baz", Store.NO)); + doc.add(new StringField("foo", "bar", Store.NO)); + doc.add(new SortedSetDocValuesField("foo", new BytesRef("bar"))); } if (withDeletions && (rarely() || i == 0)) { doc.add(new StringField("delete", "yes", Store.NO)); @@ -132,16 +139,18 @@ private void countTestCase(boolean withDeletions) throws Exception { Query matchAllCsq = new ConstantScoreQuery(matchAll); Query tq = new TermQuery(new Term("foo", "bar")); Query tCsq = new ConstantScoreQuery(tq); + Query dvfeq = new DocValuesFieldExistsQuery("foo"); BooleanQuery bq = new BooleanQuery.Builder() .add(matchAll, Occur.SHOULD) .add(tq, Occur.MUST) .build(); - countTestCase(matchAll, reader, false); - countTestCase(matchAllCsq, reader, false); - countTestCase(tq, reader, withDeletions); - countTestCase(tCsq, reader, withDeletions); - countTestCase(bq, reader, true); + countTestCase(matchAll, reader, false, false); + countTestCase(matchAllCsq, reader, false, false); + countTestCase(tq, reader, withDeletions, withDeletions); + countTestCase(tCsq, reader, withDeletions, withDeletions); + countTestCase(dvfeq, reader, withDeletions, true); + countTestCase(bq, reader, true, true); reader.close(); w.close(); dir.close(); @@ -541,6 +550,7 @@ public void testIndexSortScrollOptimization() throws Exception { private static IndexSearcher getAssertingEarlyTerminationSearcher(IndexReader reader, int size) { return new IndexSearcher(reader) { + @Override protected void search(List leaves, Weight weight, Collector collector) throws IOException { final Collector in = new AssertingEarlyTerminationFilterCollector(collector, size); super.search(leaves, weight, in); From efd7457566e02db6d22c339b14a9c4714cc4d009 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Mon, 4 Mar 2019 10:39:34 +0100 Subject: [PATCH 2/4] Handle handle fields that have doc values but are not indexed --- .../search/query/TopDocsCollectorContext.java | 2 ++ .../search/query/QueryPhaseTests.java | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java b/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java index 17b367a4f3376..60e73ed474b72 100644 --- a/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java +++ b/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java @@ -375,6 +375,8 @@ static int shortcutTotalHitCount(IndexReader reader, Query query) throws IOExcep } else if (fieldInfo.getIndexOptions() != IndexOptions.NONE) { Terms terms = context.reader().terms(field); count += terms.getDocCount(); + } else { + return -1; // no shortcut possible for fields that are not indexed } } return count; diff --git a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java index 52fa09208d15d..16b18efe62322 100644 --- a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java +++ b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java @@ -21,6 +21,8 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.LatLonDocValuesField; +import org.apache.lucene.document.LatLonPoint; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.StringField; @@ -121,10 +123,13 @@ private void countTestCase(boolean withDeletions) throws Exception { if (randomBoolean()) { doc.add(new StringField("foo", "bar", Store.NO)); doc.add(new SortedSetDocValuesField("foo", new BytesRef("bar"))); + doc.add(new SortedSetDocValuesField("docValuesOnlyField", new BytesRef("bar"))); + doc.add(new LatLonDocValuesField("latLonDVField", 1.0, 1.0)); + doc.add(new LatLonPoint("latLonDVField", 1.0, 1.0)); } if (randomBoolean()) { - doc.add(new StringField("foo", "bar", Store.NO)); - doc.add(new SortedSetDocValuesField("foo", new BytesRef("bar"))); + doc.add(new StringField("foo", "baz", Store.NO)); + doc.add(new SortedSetDocValuesField("foo", new BytesRef("baz"))); } if (withDeletions && (rarely() || i == 0)) { doc.add(new StringField("delete", "yes", Store.NO)); @@ -140,6 +145,10 @@ private void countTestCase(boolean withDeletions) throws Exception { Query tq = new TermQuery(new Term("foo", "bar")); Query tCsq = new ConstantScoreQuery(tq); Query dvfeq = new DocValuesFieldExistsQuery("foo"); + Query dvfeq_points = new DocValuesFieldExistsQuery("latLonDVField"); + Query dvfeqCsq = new ConstantScoreQuery(dvfeq); + // field with doc-values but not indexed will need to collect + Query dvOnlyfeq = new DocValuesFieldExistsQuery("docValuesOnlyField"); BooleanQuery bq = new BooleanQuery.Builder() .add(matchAll, Occur.SHOULD) .add(tq, Occur.MUST) @@ -150,6 +159,9 @@ private void countTestCase(boolean withDeletions) throws Exception { countTestCase(tq, reader, withDeletions, withDeletions); countTestCase(tCsq, reader, withDeletions, withDeletions); countTestCase(dvfeq, reader, withDeletions, true); + countTestCase(dvfeq_points, reader, withDeletions, true); + countTestCase(dvfeqCsq, reader, withDeletions, true); + countTestCase(dvOnlyfeq, reader, true, true); countTestCase(bq, reader, true, true); reader.close(); w.close(); From 4d2cf3c66f2df75c08c42ea0b51ad34b4a29fd33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Mon, 4 Mar 2019 12:46:10 +0100 Subject: [PATCH 3/4] Adding null checks --- .../search/query/TopDocsCollectorContext.java | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java b/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java index 60e73ed474b72..6ba8bffb06472 100644 --- a/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java +++ b/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java @@ -370,13 +370,17 @@ static int shortcutTotalHitCount(IndexReader reader, Query query) throws IOExcep for (LeafReaderContext context : reader.leaves()) { FieldInfos fieldInfos = context.reader().getFieldInfos(); FieldInfo fieldInfo = fieldInfos.fieldInfo(field); - if (fieldInfo.getPointIndexDimensionCount() > 0) { - count += PointValues.getDocCount(context.reader(), field); - } else if (fieldInfo.getIndexOptions() != IndexOptions.NONE) { - Terms terms = context.reader().terms(field); - count += terms.getDocCount(); - } else { - return -1; // no shortcut possible for fields that are not indexed + if (fieldInfo != null) { + if (fieldInfo.getPointIndexDimensionCount() > 0) { + count += PointValues.getDocCount(context.reader(), field); + } else if (fieldInfo.getIndexOptions() != IndexOptions.NONE) { + Terms terms = context.reader().terms(field); + if (terms != null) { + count += terms.getDocCount(); + } + } else { + return -1; // no shortcut possible for fields that are not indexed + } } } return count; From 745dd0756d348cfbbe58c48feb59a0c8d695f6a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Mon, 4 Mar 2019 15:57:21 +0100 Subject: [PATCH 4/4] iter --- .../elasticsearch/search/query/TopDocsCollectorContext.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java b/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java index 6ba8bffb06472..1e2cd7541f944 100644 --- a/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java +++ b/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java @@ -372,7 +372,10 @@ static int shortcutTotalHitCount(IndexReader reader, Query query) throws IOExcep FieldInfo fieldInfo = fieldInfos.fieldInfo(field); if (fieldInfo != null) { if (fieldInfo.getPointIndexDimensionCount() > 0) { - count += PointValues.getDocCount(context.reader(), field); + PointValues points = context.reader().getPointValues(field); + if (points != null) { + count += points.getDocCount(); + } } else if (fieldInfo.getIndexOptions() != IndexOptions.NONE) { Terms terms = context.reader().terms(field); if (terms != null) {