From dd98abd882fcb9a69dd00859dfd47f6b6e491eeb Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Mon, 30 Nov 2020 18:30:43 +0200 Subject: [PATCH 1/6] initial commit for doc_count field --- .../index/mapper/CustomTermFreqField.java | 107 ++++++++++++++++++ .../index/mapper/DocCountFieldMapper.java | 24 ++-- .../bucket/BucketsAggregator.java | 2 +- .../aggregations/bucket/DocCountProvider.java | 23 ++-- .../bucket/composite/CompositeAggregator.java | 2 +- .../bucket/composite/SortedDocsProducer.java | 2 +- .../GlobalOrdinalsStringTermsAggregator.java | 4 +- .../mapper/DocCountFieldMapperTests.java | 6 +- .../index/mapper/DocCountFieldTypeTests.java | 12 +- .../bucket/DocCountProviderTests.java | 10 +- 10 files changed, 149 insertions(+), 43 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/index/mapper/CustomTermFreqField.java diff --git a/server/src/main/java/org/elasticsearch/index/mapper/CustomTermFreqField.java b/server/src/main/java/org/elasticsearch/index/mapper/CustomTermFreqField.java new file mode 100644 index 0000000000000..28961c1258921 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/CustomTermFreqField.java @@ -0,0 +1,107 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.IndexOptions; + +/** + * Custom field that allow storing an integer value as a term frequency in lucene. + */ +public final class CustomTermFreqField extends Field { + + private static final FieldType FIELD_TYPE = new FieldType(); + static { + FIELD_TYPE.setTokenized(false); + FIELD_TYPE.setOmitNorms(true); + FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS); + } + + private int fieldValue; + + public CustomTermFreqField(String fieldName, CharSequence term, int fieldValue) { + super(fieldName, term, FIELD_TYPE); + this.fieldValue = fieldValue; + } + + public CustomTermFreqField(String fieldName, int fieldValue) { + this(fieldName, fieldName, fieldValue); + } + + public void setFieldValue(int fieldValue) { + this.fieldValue = fieldValue; + } + + @Override + public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) { + CustomTermFreqTokenStream stream; + if (reuse instanceof CustomTermFreqTokenStream) { + stream = (CustomTermFreqTokenStream) reuse; + } else { + stream = new CustomTermFreqTokenStream(); + } + stream.setValues((String) fieldsData, fieldValue); + return stream; + } + + private static final class CustomTermFreqTokenStream extends TokenStream { + private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); + private final TermFrequencyAttribute freqAttribute = addAttribute(TermFrequencyAttribute.class); + private boolean used = true; + private String value = null; + private int freq = 0; + + private CustomTermFreqTokenStream() { + } + + /** Sets the values */ + void setValues(String value, int freq) { + this.value = value; + this.freq = freq; + } + + @Override + public boolean incrementToken() { + if (used) { + return false; + } + clearAttributes(); + termAttribute.append(value); + freqAttribute.setTermFrequency(freq); + used = true; + return true; + } + + @Override + public void reset() { + used = false; + } + + @Override + public void close() { + value = null; + } + } +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index 2a7f8dc93299f..3bb7bb5e813bb 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -18,9 +18,6 @@ */ package org.elasticsearch.index.mapper; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.NumericDocValuesField; -import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.Query; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentParserUtils; @@ -42,10 +39,10 @@ public static final class DocCountFieldType extends MappedFieldType { public static final DocCountFieldType INSTANCE = new DocCountFieldType(); - private static final Long defaultValue = 1L; + private static final int defaultValue = 1; public DocCountFieldType() { - super(NAME, false, false, true, TextSearchInfo.NONE, Collections.emptyMap()); + super(NAME, false, false, false, TextSearchInfo.NONE, Collections.emptyMap()); } @Override @@ -55,12 +52,12 @@ public String typeName() { @Override public String familyTypeName() { - return NumberFieldMapper.NumberType.LONG.typeName(); + return NumberFieldMapper.NumberType.INTEGER.typeName(); } @Override public Query existsQuery(QueryShardContext context) { - return new DocValuesFieldExistsQuery(NAME); + throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] does not support exists queries"); } @Override @@ -80,7 +77,7 @@ protected Object parseSourceValue(Object value) { if ("".equals(value)) { return defaultValue; } else { - return NumberFieldMapper.NumberType.objectToLong(value, false); + return NumberFieldMapper.NumberType.INTEGER.parse(value, false); } } }; @@ -96,17 +93,14 @@ protected void parseCreateField(ParseContext context) throws IOException { XContentParser parser = context.parser(); XContentParserUtils.ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, parser.currentToken(), parser); - long value = parser.longValue(false); + int value = parser.intValue(false); if (value <= 0) { - throw new IllegalArgumentException("Field [" + fieldType().name() + "] must be a positive integer."); + throw new IllegalArgumentException("Field [" + fieldType().name() + "] must be a positive integer. Value [" + + value + "] is not allowed."); } - final Field docCount = new NumericDocValuesField(NAME, value); - context.doc().add(docCount); + context.doc().add(new CustomTermFreqField(NAME, value)); } - @Override - public void preParse(ParseContext context) { } - @Override public DocCountFieldType fieldType() { return (DocCountFieldType) super.fieldType(); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java index 80db5685001e1..70b3e2f3c0a5b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java @@ -87,7 +87,7 @@ public final void collectBucket(LeafBucketCollector subCollector, int doc, long * Same as {@link #collectBucket(LeafBucketCollector, int, long)}, but doesn't check if the docCounts needs to be re-sized. */ public final void collectExistingBucket(LeafBucketCollector subCollector, int doc, long bucketOrd) throws IOException { - long docCount = docCountProvider.getDocCount(doc); + int docCount = docCountProvider.getDocCount(doc); if (docCounts.increment(bucketOrd, docCount) == docCount) { // We calculate the final number of buckets only during the reduce phase. But we still need to // trigger bucket consumer from time to time in order to give it a chance to check available memory and break diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java index 9cf25e098cb0f..0db6edd58e73f 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java @@ -19,9 +19,9 @@ package org.elasticsearch.search.aggregations.bucket; -import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.Term; import org.elasticsearch.index.mapper.DocCountFieldMapper; import java.io.IOException; @@ -33,17 +33,24 @@ */ public class DocCountProvider { - private NumericDocValues docCountValues; + public static final int DEFAULT_DOC_COUNT = 1; + private PostingsEnum docCountPostings; - public long getDocCount(int doc) throws IOException { - if (docCountValues != null && docCountValues.advanceExact(doc)) { - return docCountValues.longValue(); + public int getDocCount(int doc) throws IOException { + if (docCountPostings == null) { + return DEFAULT_DOC_COUNT; + } + if (docCountPostings.docID() < doc) { + docCountPostings.advance(doc); + } + if (docCountPostings.docID() == doc) { + return docCountPostings.freq(); } else { - return 1L; + return DEFAULT_DOC_COUNT; } } public void setLeafReaderContext(LeafReaderContext ctx) throws IOException { - docCountValues = DocValues.getNumeric(ctx.reader(), DocCountFieldMapper.NAME); + docCountPostings = ctx.reader().postings(new Term(DocCountFieldMapper.NAME, DocCountFieldMapper.NAME)); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java index 9d33c4b499f74..64bea01683496 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java @@ -430,7 +430,7 @@ private LeafBucketCollector getFirstPassCollector(RoaringDocIdSet.Builder builde @Override public void collect(int doc, long bucket) throws IOException { try { - long docCount = docCountProvider.getDocCount(doc); + int docCount = docCountProvider.getDocCount(doc); if (queue.addIfCompetitive(indexSortPrefix, docCount)) { if (builder != null && lastDoc != doc) { builder.add(doc); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/SortedDocsProducer.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/SortedDocsProducer.java index 7a5cef87b6731..1c06e307dd0c2 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/SortedDocsProducer.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/SortedDocsProducer.java @@ -69,7 +69,7 @@ protected boolean processBucket(CompositeValuesCollectorQueue queue, LeafReaderC @Override public void collect(int doc, long bucket) throws IOException { hasCollected[0] = true; - long docCount = docCountProvider.getDocCount(doc); + int docCount = docCountProvider.getDocCount(doc); if (queue.addIfCompetitive(docCount)) { topCompositeCollected[0]++; if (adder != null && doc != lastDoc) { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index e49959248e43f..f8018767d6615 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -315,7 +315,7 @@ public void collect(int doc, long owningBucketOrd) throws IOException { return; } int ord = singleValues.ordValue(); - long docCount = docCountProvider.getDocCount(doc); + int docCount = docCountProvider.getDocCount(doc); segmentDocCounts.increment(ord + 1, docCount); } }); @@ -329,7 +329,7 @@ public void collect(int doc, long owningBucketOrd) throws IOException { return; } for (long segmentOrd = segmentOrds.nextOrd(); segmentOrd != NO_MORE_ORDS; segmentOrd = segmentOrds.nextOrd()) { - long docCount = docCountProvider.getDocCount(doc); + int docCount = docCountProvider.getDocCount(doc); segmentDocCounts.increment(segmentOrd + 1, docCount); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java index 25cf8c064a847..ce1b44a247e6b 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java @@ -18,7 +18,6 @@ */ package org.elasticsearch.index.mapper; -import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexableField; import static org.hamcrest.Matchers.containsString; @@ -39,8 +38,7 @@ public void testParseValue() throws Exception { )); IndexableField field = doc.rootDoc().getField(DOC_COUNT_FIELD); - assertEquals(100L, field.numericValue()); - assertEquals(DocValuesType.NUMERIC, field.fieldType().docValuesType()); + assertEquals(DOC_COUNT_FIELD, field.stringValue()); assertEquals(1, doc.rootDoc().getFields(DOC_COUNT_FIELD).length); } @@ -66,6 +64,6 @@ public void testInvalidDocument_NonNumericDocCount() throws Exception { public void testInvalidDocument_FractionalDocCount() throws Exception { DocumentMapper mapper = createDocumentMapper(mapping(b -> {})); Exception e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.field(CONTENT_TYPE, 100.23)))); - assertThat(e.getCause().getMessage(), containsString("100.23 cannot be converted to Long without data loss")); + assertThat(e.getCause().getMessage(), containsString("100.23 cannot be converted to Integer without data loss")); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java index f8f5d3c2e810c..039e84f5fdf0f 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java @@ -18,7 +18,6 @@ */ package org.elasticsearch.index.mapper; -import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.elasticsearch.index.query.QueryShardException; import java.io.IOException; @@ -42,14 +41,15 @@ public void testRangeQuery() { public void testExistsQuery() { MappedFieldType ft = new DocCountFieldMapper.DocCountFieldType(); - assertTrue(ft.existsQuery(randomMockShardContext()) instanceof DocValuesFieldExistsQuery); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> ft.existsQuery(randomMockShardContext())); + assertEquals("Field [_doc_count] of type [_doc_count] does not support exists queries", e.getMessage()); } public void testFetchSourceValue() throws IOException { MappedFieldType fieldType = new DocCountFieldMapper.DocCountFieldType(); - assertEquals(Arrays.asList(14L), fetchSourceValue(fieldType, 14)); - assertEquals(Arrays.asList(14L), fetchSourceValue(fieldType, "14")); - assertEquals(Arrays.asList(1L), fetchSourceValue(fieldType, "")); - assertEquals(Arrays.asList(1L), fetchSourceValue(fieldType, null)); + assertEquals(Arrays.asList(14), fetchSourceValue(fieldType, 14)); + assertEquals(Arrays.asList(14), fetchSourceValue(fieldType, "14")); + assertEquals(Arrays.asList(1), fetchSourceValue(fieldType, "")); + assertEquals(Arrays.asList(1), fetchSourceValue(fieldType, null)); } } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DocCountProviderTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DocCountProviderTests.java index f7ba8db8a66f2..6455123fa615d 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DocCountProviderTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DocCountProviderTests.java @@ -20,12 +20,12 @@ package org.elasticsearch.search.aggregations.bucket; import org.apache.lucene.document.IntPoint; -import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.elasticsearch.common.CheckedConsumer; +import org.elasticsearch.index.mapper.CustomTermFreqField; import org.elasticsearch.index.mapper.DocCountFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.NumberFieldMapper; @@ -48,11 +48,11 @@ public class DocCountProviderTests extends AggregatorTestCase { public void testDocsWithDocCount() throws IOException { testAggregation(new MatchAllDocsQuery(), iw -> { iw.addDocument(List.of( - new NumericDocValuesField(DOC_COUNT_FIELD, 4), + new CustomTermFreqField(DOC_COUNT_FIELD, 4), new SortedNumericDocValuesField(NUMBER_FIELD, 1) )); iw.addDocument(List.of( - new NumericDocValuesField(DOC_COUNT_FIELD, 5), + new CustomTermFreqField(DOC_COUNT_FIELD, 5), new SortedNumericDocValuesField(NUMBER_FIELD, 7) )); iw.addDocument(List.of( @@ -77,11 +77,11 @@ public void testDocsWithoutDocCount() throws IOException { public void testQueryFiltering() throws IOException { testAggregation(IntPoint.newRangeQuery(NUMBER_FIELD, 4, 5), iw -> { iw.addDocument(List.of( - new NumericDocValuesField(DOC_COUNT_FIELD, 4), + new CustomTermFreqField(DOC_COUNT_FIELD, 4), new IntPoint(NUMBER_FIELD, 6) )); iw.addDocument(List.of( - new NumericDocValuesField(DOC_COUNT_FIELD, 2), + new CustomTermFreqField(DOC_COUNT_FIELD, 2), new IntPoint(NUMBER_FIELD, 5) )); iw.addDocument(List.of( From 9c42ab9bd000b5ae3ec3d94ae94ed8dadd253e05 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Wed, 2 Dec 2020 22:36:13 +0200 Subject: [PATCH 2/6] Minor change --- .../elasticsearch/index/mapper/DocCountFieldMapper.java | 2 +- .../search/aggregations/bucket/DocCountProvider.java | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index 3bb7bb5e813bb..89f9467bf0f02 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -39,7 +39,7 @@ public static final class DocCountFieldType extends MappedFieldType { public static final DocCountFieldType INSTANCE = new DocCountFieldType(); - private static final int defaultValue = 1; + public static final int defaultValue = 1; public DocCountFieldType() { super(NAME, false, false, false, TextSearchInfo.NONE, Collections.emptyMap()); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java index 0db6edd58e73f..837e5f2d67c0a 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java @@ -33,12 +33,13 @@ */ public class DocCountProvider { - public static final int DEFAULT_DOC_COUNT = 1; + public static final int defaultValue = DocCountFieldMapper.DocCountFieldType.defaultValue; + private PostingsEnum docCountPostings; public int getDocCount(int doc) throws IOException { if (docCountPostings == null) { - return DEFAULT_DOC_COUNT; + return defaultValue; } if (docCountPostings.docID() < doc) { docCountPostings.advance(doc); @@ -46,7 +47,7 @@ public int getDocCount(int doc) throws IOException { if (docCountPostings.docID() == doc) { return docCountPostings.freq(); } else { - return DEFAULT_DOC_COUNT; + return defaultValue; } } From 3c37312fafe5f716413a0b32ca071bc6579c2736 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Thu, 3 Dec 2020 01:28:40 +0200 Subject: [PATCH 3/6] Minor changes to variable names --- .../org/elasticsearch/index/mapper/DocCountFieldMapper.java | 6 +++--- .../search/aggregations/bucket/DocCountProvider.java | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index 89f9467bf0f02..8cd501627ae2a 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -39,7 +39,7 @@ public static final class DocCountFieldType extends MappedFieldType { public static final DocCountFieldType INSTANCE = new DocCountFieldType(); - public static final int defaultValue = 1; + public static final int DEFAULT_VALUE = 1; public DocCountFieldType() { super(NAME, false, false, false, TextSearchInfo.NONE, Collections.emptyMap()); @@ -71,11 +71,11 @@ public ValueFetcher valueFetcher(QueryShardContext context, String format) { throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); } - return new SourceValueFetcher(name(), context, defaultValue) { + return new SourceValueFetcher(name(), context, DEFAULT_VALUE) { @Override protected Object parseSourceValue(Object value) { if ("".equals(value)) { - return defaultValue; + return DEFAULT_VALUE; } else { return NumberFieldMapper.NumberType.INTEGER.parse(value, false); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java index 837e5f2d67c0a..5b29c82a405b6 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java @@ -33,13 +33,13 @@ */ public class DocCountProvider { - public static final int defaultValue = DocCountFieldMapper.DocCountFieldType.defaultValue; + public static final int DEFAULT_VALUE = DocCountFieldMapper.DocCountFieldType.DEFAULT_VALUE; private PostingsEnum docCountPostings; public int getDocCount(int doc) throws IOException { if (docCountPostings == null) { - return defaultValue; + return DEFAULT_VALUE; } if (docCountPostings.docID() < doc) { docCountPostings.advance(doc); @@ -47,7 +47,7 @@ public int getDocCount(int doc) throws IOException { if (docCountPostings.docID() == doc) { return docCountPostings.freq(); } else { - return defaultValue; + return DEFAULT_VALUE; } } From 300e8bc01a7d42801a9ae0ed025997ae36975c73 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Thu, 3 Dec 2020 12:09:30 +0200 Subject: [PATCH 4/6] Address reviewer comments --- .../org/elasticsearch/index/mapper/DocCountFieldMapper.java | 2 +- .../index/mapper/DocCountFieldMapperTests.java | 6 ++++++ .../elasticsearch/index/mapper/DocCountFieldTypeTests.java | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index 8cd501627ae2a..9736a7b73fc86 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -57,7 +57,7 @@ public String familyTypeName() { @Override public Query existsQuery(QueryShardContext context) { - throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] does not support exists queries"); + throw new QueryShardException(context, "Field [" + name() + "] of type [" + typeName() + "] does not support exists queries"); } @Override diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java index ce1b44a247e6b..01a220f45ac2c 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java @@ -66,4 +66,10 @@ public void testInvalidDocument_FractionalDocCount() throws Exception { Exception e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.field(CONTENT_TYPE, 100.23)))); assertThat(e.getCause().getMessage(), containsString("100.23 cannot be converted to Integer without data loss")); } + + public void testInvalidDocument_ArrayDocCount() throws Exception { + DocumentMapper mapper = createDocumentMapper(mapping(b -> {})); + Exception e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.array(CONTENT_TYPE, 10, 20, 30)))); + assertThat(e.getCause().getMessage(), containsString("100.23 cannot be converted to Integer without data loss")); + } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java index 039e84f5fdf0f..b08d6d47477f8 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java @@ -41,7 +41,7 @@ public void testRangeQuery() { public void testExistsQuery() { MappedFieldType ft = new DocCountFieldMapper.DocCountFieldType(); - IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> ft.existsQuery(randomMockShardContext())); + QueryShardException e = expectThrows(QueryShardException.class, () -> ft.existsQuery(randomMockShardContext())); assertEquals("Field [_doc_count] of type [_doc_count] does not support exists queries", e.getMessage()); } From 4d77def5b625392cfbb5574f726cffd390cf21f4 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Thu, 3 Dec 2020 13:04:59 +0200 Subject: [PATCH 5/6] Added check to forbid arrays as values of _doc_count field --- .../elasticsearch/index/mapper/DocCountFieldMapper.java | 5 +++++ .../index/mapper/DocCountFieldMapperTests.java | 8 +++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index 9736a7b73fc86..5e4f1bcf465b0 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -93,6 +93,11 @@ protected void parseCreateField(ParseContext context) throws IOException { XContentParser parser = context.parser(); XContentParserUtils.ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, parser.currentToken(), parser); + // Check that _doc_count is a single value and not an array + if (context.doc().getField(NAME) != null) { + throw new IllegalArgumentException("Arrays are not allowed for field [" + fieldType().name() + "]."); + } + int value = parser.intValue(false); if (value <= 0) { throw new IllegalArgumentException("Field [" + fieldType().name() + "] must be a positive integer. Value [" diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java index 01a220f45ac2c..ae00f0a3cd0a1 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java @@ -27,9 +27,6 @@ public class DocCountFieldMapperTests extends MapperServiceTestCase { private static final String CONTENT_TYPE = DocCountFieldMapper.CONTENT_TYPE; private static final String DOC_COUNT_FIELD = DocCountFieldMapper.NAME; - /** - * Test parsing field mapping and adding simple field - */ public void testParseValue() throws Exception { DocumentMapper mapper = createDocumentMapper(mapping(b -> {})); ParsedDocument doc = mapper.parse(source(b -> @@ -69,7 +66,8 @@ public void testInvalidDocument_FractionalDocCount() throws Exception { public void testInvalidDocument_ArrayDocCount() throws Exception { DocumentMapper mapper = createDocumentMapper(mapping(b -> {})); - Exception e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.array(CONTENT_TYPE, 10, 20, 30)))); - assertThat(e.getCause().getMessage(), containsString("100.23 cannot be converted to Integer without data loss")); + Exception e = expectThrows(MapperParsingException.class, + () -> mapper.parse(source(b -> b.array(CONTENT_TYPE, 10, 20, 30)))); + assertThat(e.getCause().getMessage(), containsString("Arrays are not allowed for field [_doc_count].")); } } From ffbcea48871c63de4ee1e5a80fdfd3152ff5d876 Mon Sep 17 00:00:00 2001 From: Christos Soulios Date: Thu, 3 Dec 2020 15:45:54 +0200 Subject: [PATCH 6/6] Addressed reviewer comments --- .../elasticsearch/index/mapper/CustomTermFreqField.java | 6 +----- .../elasticsearch/index/mapper/DocCountFieldMapper.java | 4 ++-- .../search/aggregations/bucket/DocCountProviderTests.java | 8 ++++---- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/CustomTermFreqField.java b/server/src/main/java/org/elasticsearch/index/mapper/CustomTermFreqField.java index 28961c1258921..b057f60015e13 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/CustomTermFreqField.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/CustomTermFreqField.java @@ -28,7 +28,7 @@ import org.apache.lucene.index.IndexOptions; /** - * Custom field that allow storing an integer value as a term frequency in lucene. + * Custom field that allows storing an integer value as a term frequency in lucene. */ public final class CustomTermFreqField extends Field { @@ -46,10 +46,6 @@ public CustomTermFreqField(String fieldName, CharSequence term, int fieldValue) this.fieldValue = fieldValue; } - public CustomTermFreqField(String fieldName, int fieldValue) { - this(fieldName, fieldName, fieldValue); - } - public void setFieldValue(int fieldValue) { this.fieldValue = fieldValue; } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index 5e4f1bcf465b0..b811b10397729 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -94,7 +94,7 @@ protected void parseCreateField(ParseContext context) throws IOException { XContentParserUtils.ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, parser.currentToken(), parser); // Check that _doc_count is a single value and not an array - if (context.doc().getField(NAME) != null) { + if (context.doc().getByKey(NAME) != null) { throw new IllegalArgumentException("Arrays are not allowed for field [" + fieldType().name() + "]."); } @@ -103,7 +103,7 @@ protected void parseCreateField(ParseContext context) throws IOException { throw new IllegalArgumentException("Field [" + fieldType().name() + "] must be a positive integer. Value [" + value + "] is not allowed."); } - context.doc().add(new CustomTermFreqField(NAME, value)); + context.doc().addWithKey(NAME, new CustomTermFreqField(NAME, NAME, value)); } @Override diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DocCountProviderTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DocCountProviderTests.java index 6455123fa615d..72a08f34c9386 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DocCountProviderTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DocCountProviderTests.java @@ -48,11 +48,11 @@ public class DocCountProviderTests extends AggregatorTestCase { public void testDocsWithDocCount() throws IOException { testAggregation(new MatchAllDocsQuery(), iw -> { iw.addDocument(List.of( - new CustomTermFreqField(DOC_COUNT_FIELD, 4), + new CustomTermFreqField(DOC_COUNT_FIELD, DOC_COUNT_FIELD, 4), new SortedNumericDocValuesField(NUMBER_FIELD, 1) )); iw.addDocument(List.of( - new CustomTermFreqField(DOC_COUNT_FIELD, 5), + new CustomTermFreqField(DOC_COUNT_FIELD, DOC_COUNT_FIELD, 5), new SortedNumericDocValuesField(NUMBER_FIELD, 7) )); iw.addDocument(List.of( @@ -77,11 +77,11 @@ public void testDocsWithoutDocCount() throws IOException { public void testQueryFiltering() throws IOException { testAggregation(IntPoint.newRangeQuery(NUMBER_FIELD, 4, 5), iw -> { iw.addDocument(List.of( - new CustomTermFreqField(DOC_COUNT_FIELD, 4), + new CustomTermFreqField(DOC_COUNT_FIELD, DOC_COUNT_FIELD, 4), new IntPoint(NUMBER_FIELD, 6) )); iw.addDocument(List.of( - new CustomTermFreqField(DOC_COUNT_FIELD, 2), + new CustomTermFreqField(DOC_COUNT_FIELD, DOC_COUNT_FIELD, 2), new IntPoint(NUMBER_FIELD, 5) )); iw.addDocument(List.of(