diff --git a/server/src/main/java/org/elasticsearch/index/mapper/CustomTermFreqField.java b/server/src/main/java/org/elasticsearch/index/mapper/CustomTermFreqField.java new file mode 100644 index 0000000000000..b057f60015e13 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/CustomTermFreqField.java @@ -0,0 +1,103 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.IndexOptions; + +/** + * Custom field that allows storing an integer value as a term frequency in lucene. + */ +public final class CustomTermFreqField extends Field { + + private static final FieldType FIELD_TYPE = new FieldType(); + static { + FIELD_TYPE.setTokenized(false); + FIELD_TYPE.setOmitNorms(true); + FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS); + } + + private int fieldValue; + + public CustomTermFreqField(String fieldName, CharSequence term, int fieldValue) { + super(fieldName, term, FIELD_TYPE); + this.fieldValue = fieldValue; + } + + public void setFieldValue(int fieldValue) { + this.fieldValue = fieldValue; + } + + @Override + public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) { + CustomTermFreqTokenStream stream; + if (reuse instanceof CustomTermFreqTokenStream) { + stream = (CustomTermFreqTokenStream) reuse; + } else { + stream = new CustomTermFreqTokenStream(); + } + stream.setValues((String) fieldsData, fieldValue); + return stream; + } + + private static final class CustomTermFreqTokenStream extends TokenStream { + private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); + private final TermFrequencyAttribute freqAttribute = addAttribute(TermFrequencyAttribute.class); + private boolean used = true; + private String value = null; + private int freq = 0; + + private CustomTermFreqTokenStream() { + } + + /** Sets the values */ + void setValues(String value, int freq) { + this.value = value; + this.freq = freq; + } + + @Override + public boolean incrementToken() { + if (used) { + return false; + } + clearAttributes(); + termAttribute.append(value); + freqAttribute.setTermFrequency(freq); + used = true; + return true; + } + + @Override + public void reset() { + used = false; + } + + @Override + public void close() { + value = null; + } + } +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java index 2a7f8dc93299f..b811b10397729 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java @@ -18,9 +18,6 @@ */ package org.elasticsearch.index.mapper; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.NumericDocValuesField; -import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.Query; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentParserUtils; @@ -42,10 +39,10 @@ public static final class DocCountFieldType extends MappedFieldType { public static final DocCountFieldType INSTANCE = new DocCountFieldType(); - private static final Long defaultValue = 1L; + public static final int DEFAULT_VALUE = 1; public DocCountFieldType() { - super(NAME, false, false, true, TextSearchInfo.NONE, Collections.emptyMap()); + super(NAME, false, false, false, TextSearchInfo.NONE, Collections.emptyMap()); } @Override @@ -55,12 +52,12 @@ public String typeName() { @Override public String familyTypeName() { - return NumberFieldMapper.NumberType.LONG.typeName(); + return NumberFieldMapper.NumberType.INTEGER.typeName(); } @Override public Query existsQuery(QueryShardContext context) { - return new DocValuesFieldExistsQuery(NAME); + throw new QueryShardException(context, "Field [" + name() + "] of type [" + typeName() + "] does not support exists queries"); } @Override @@ -74,13 +71,13 @@ public ValueFetcher valueFetcher(QueryShardContext context, String format) { throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); } - return new SourceValueFetcher(name(), context, defaultValue) { + return new SourceValueFetcher(name(), context, DEFAULT_VALUE) { @Override protected Object parseSourceValue(Object value) { if ("".equals(value)) { - return defaultValue; + return DEFAULT_VALUE; } else { - return NumberFieldMapper.NumberType.objectToLong(value, false); + return NumberFieldMapper.NumberType.INTEGER.parse(value, false); } } }; @@ -96,17 +93,19 @@ protected void parseCreateField(ParseContext context) throws IOException { XContentParser parser = context.parser(); XContentParserUtils.ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, parser.currentToken(), parser); - long value = parser.longValue(false); + // Check that _doc_count is a single value and not an array + if (context.doc().getByKey(NAME) != null) { + throw new IllegalArgumentException("Arrays are not allowed for field [" + fieldType().name() + "]."); + } + + int value = parser.intValue(false); if (value <= 0) { - throw new IllegalArgumentException("Field [" + fieldType().name() + "] must be a positive integer."); + throw new IllegalArgumentException("Field [" + fieldType().name() + "] must be a positive integer. Value [" + + value + "] is not allowed."); } - final Field docCount = new NumericDocValuesField(NAME, value); - context.doc().add(docCount); + context.doc().addWithKey(NAME, new CustomTermFreqField(NAME, NAME, value)); } - @Override - public void preParse(ParseContext context) { } - @Override public DocCountFieldType fieldType() { return (DocCountFieldType) super.fieldType(); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java index 80db5685001e1..70b3e2f3c0a5b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java @@ -87,7 +87,7 @@ public final void collectBucket(LeafBucketCollector subCollector, int doc, long * Same as {@link #collectBucket(LeafBucketCollector, int, long)}, but doesn't check if the docCounts needs to be re-sized. */ public final void collectExistingBucket(LeafBucketCollector subCollector, int doc, long bucketOrd) throws IOException { - long docCount = docCountProvider.getDocCount(doc); + int docCount = docCountProvider.getDocCount(doc); if (docCounts.increment(bucketOrd, docCount) == docCount) { // We calculate the final number of buckets only during the reduce phase. But we still need to // trigger bucket consumer from time to time in order to give it a chance to check available memory and break diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java index 9cf25e098cb0f..5b29c82a405b6 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java @@ -19,9 +19,9 @@ package org.elasticsearch.search.aggregations.bucket; -import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.Term; import org.elasticsearch.index.mapper.DocCountFieldMapper; import java.io.IOException; @@ -33,17 +33,25 @@ */ public class DocCountProvider { - private NumericDocValues docCountValues; + public static final int DEFAULT_VALUE = DocCountFieldMapper.DocCountFieldType.DEFAULT_VALUE; - public long getDocCount(int doc) throws IOException { - if (docCountValues != null && docCountValues.advanceExact(doc)) { - return docCountValues.longValue(); + private PostingsEnum docCountPostings; + + public int getDocCount(int doc) throws IOException { + if (docCountPostings == null) { + return DEFAULT_VALUE; + } + if (docCountPostings.docID() < doc) { + docCountPostings.advance(doc); + } + if (docCountPostings.docID() == doc) { + return docCountPostings.freq(); } else { - return 1L; + return DEFAULT_VALUE; } } public void setLeafReaderContext(LeafReaderContext ctx) throws IOException { - docCountValues = DocValues.getNumeric(ctx.reader(), DocCountFieldMapper.NAME); + docCountPostings = ctx.reader().postings(new Term(DocCountFieldMapper.NAME, DocCountFieldMapper.NAME)); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java index 9d33c4b499f74..64bea01683496 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java @@ -430,7 +430,7 @@ private LeafBucketCollector getFirstPassCollector(RoaringDocIdSet.Builder builde @Override public void collect(int doc, long bucket) throws IOException { try { - long docCount = docCountProvider.getDocCount(doc); + int docCount = docCountProvider.getDocCount(doc); if (queue.addIfCompetitive(indexSortPrefix, docCount)) { if (builder != null && lastDoc != doc) { builder.add(doc); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/SortedDocsProducer.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/SortedDocsProducer.java index 7a5cef87b6731..1c06e307dd0c2 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/SortedDocsProducer.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/SortedDocsProducer.java @@ -69,7 +69,7 @@ protected boolean processBucket(CompositeValuesCollectorQueue queue, LeafReaderC @Override public void collect(int doc, long bucket) throws IOException { hasCollected[0] = true; - long docCount = docCountProvider.getDocCount(doc); + int docCount = docCountProvider.getDocCount(doc); if (queue.addIfCompetitive(docCount)) { topCompositeCollected[0]++; if (adder != null && doc != lastDoc) { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index e49959248e43f..f8018767d6615 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -315,7 +315,7 @@ public void collect(int doc, long owningBucketOrd) throws IOException { return; } int ord = singleValues.ordValue(); - long docCount = docCountProvider.getDocCount(doc); + int docCount = docCountProvider.getDocCount(doc); segmentDocCounts.increment(ord + 1, docCount); } }); @@ -329,7 +329,7 @@ public void collect(int doc, long owningBucketOrd) throws IOException { return; } for (long segmentOrd = segmentOrds.nextOrd(); segmentOrd != NO_MORE_ORDS; segmentOrd = segmentOrds.nextOrd()) { - long docCount = docCountProvider.getDocCount(doc); + int docCount = docCountProvider.getDocCount(doc); segmentDocCounts.increment(segmentOrd + 1, docCount); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java index 25cf8c064a847..ae00f0a3cd0a1 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java @@ -18,7 +18,6 @@ */ package org.elasticsearch.index.mapper; -import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexableField; import static org.hamcrest.Matchers.containsString; @@ -28,9 +27,6 @@ public class DocCountFieldMapperTests extends MapperServiceTestCase { private static final String CONTENT_TYPE = DocCountFieldMapper.CONTENT_TYPE; private static final String DOC_COUNT_FIELD = DocCountFieldMapper.NAME; - /** - * Test parsing field mapping and adding simple field - */ public void testParseValue() throws Exception { DocumentMapper mapper = createDocumentMapper(mapping(b -> {})); ParsedDocument doc = mapper.parse(source(b -> @@ -39,8 +35,7 @@ public void testParseValue() throws Exception { )); IndexableField field = doc.rootDoc().getField(DOC_COUNT_FIELD); - assertEquals(100L, field.numericValue()); - assertEquals(DocValuesType.NUMERIC, field.fieldType().docValuesType()); + assertEquals(DOC_COUNT_FIELD, field.stringValue()); assertEquals(1, doc.rootDoc().getFields(DOC_COUNT_FIELD).length); } @@ -66,6 +61,13 @@ public void testInvalidDocument_NonNumericDocCount() throws Exception { public void testInvalidDocument_FractionalDocCount() throws Exception { DocumentMapper mapper = createDocumentMapper(mapping(b -> {})); Exception e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.field(CONTENT_TYPE, 100.23)))); - assertThat(e.getCause().getMessage(), containsString("100.23 cannot be converted to Long without data loss")); + assertThat(e.getCause().getMessage(), containsString("100.23 cannot be converted to Integer without data loss")); + } + + public void testInvalidDocument_ArrayDocCount() throws Exception { + DocumentMapper mapper = createDocumentMapper(mapping(b -> {})); + Exception e = expectThrows(MapperParsingException.class, + () -> mapper.parse(source(b -> b.array(CONTENT_TYPE, 10, 20, 30)))); + assertThat(e.getCause().getMessage(), containsString("Arrays are not allowed for field [_doc_count].")); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java index f8f5d3c2e810c..b08d6d47477f8 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java @@ -18,7 +18,6 @@ */ package org.elasticsearch.index.mapper; -import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.elasticsearch.index.query.QueryShardException; import java.io.IOException; @@ -42,14 +41,15 @@ public void testRangeQuery() { public void testExistsQuery() { MappedFieldType ft = new DocCountFieldMapper.DocCountFieldType(); - assertTrue(ft.existsQuery(randomMockShardContext()) instanceof DocValuesFieldExistsQuery); + QueryShardException e = expectThrows(QueryShardException.class, () -> ft.existsQuery(randomMockShardContext())); + assertEquals("Field [_doc_count] of type [_doc_count] does not support exists queries", e.getMessage()); } public void testFetchSourceValue() throws IOException { MappedFieldType fieldType = new DocCountFieldMapper.DocCountFieldType(); - assertEquals(Arrays.asList(14L), fetchSourceValue(fieldType, 14)); - assertEquals(Arrays.asList(14L), fetchSourceValue(fieldType, "14")); - assertEquals(Arrays.asList(1L), fetchSourceValue(fieldType, "")); - assertEquals(Arrays.asList(1L), fetchSourceValue(fieldType, null)); + assertEquals(Arrays.asList(14), fetchSourceValue(fieldType, 14)); + assertEquals(Arrays.asList(14), fetchSourceValue(fieldType, "14")); + assertEquals(Arrays.asList(1), fetchSourceValue(fieldType, "")); + assertEquals(Arrays.asList(1), fetchSourceValue(fieldType, null)); } } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DocCountProviderTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DocCountProviderTests.java index f7ba8db8a66f2..72a08f34c9386 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DocCountProviderTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DocCountProviderTests.java @@ -20,12 +20,12 @@ package org.elasticsearch.search.aggregations.bucket; import org.apache.lucene.document.IntPoint; -import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.elasticsearch.common.CheckedConsumer; +import org.elasticsearch.index.mapper.CustomTermFreqField; import org.elasticsearch.index.mapper.DocCountFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.NumberFieldMapper; @@ -48,11 +48,11 @@ public class DocCountProviderTests extends AggregatorTestCase { public void testDocsWithDocCount() throws IOException { testAggregation(new MatchAllDocsQuery(), iw -> { iw.addDocument(List.of( - new NumericDocValuesField(DOC_COUNT_FIELD, 4), + new CustomTermFreqField(DOC_COUNT_FIELD, DOC_COUNT_FIELD, 4), new SortedNumericDocValuesField(NUMBER_FIELD, 1) )); iw.addDocument(List.of( - new NumericDocValuesField(DOC_COUNT_FIELD, 5), + new CustomTermFreqField(DOC_COUNT_FIELD, DOC_COUNT_FIELD, 5), new SortedNumericDocValuesField(NUMBER_FIELD, 7) )); iw.addDocument(List.of( @@ -77,11 +77,11 @@ public void testDocsWithoutDocCount() throws IOException { public void testQueryFiltering() throws IOException { testAggregation(IntPoint.newRangeQuery(NUMBER_FIELD, 4, 5), iw -> { iw.addDocument(List.of( - new NumericDocValuesField(DOC_COUNT_FIELD, 4), + new CustomTermFreqField(DOC_COUNT_FIELD, DOC_COUNT_FIELD, 4), new IntPoint(NUMBER_FIELD, 6) )); iw.addDocument(List.of( - new NumericDocValuesField(DOC_COUNT_FIELD, 2), + new CustomTermFreqField(DOC_COUNT_FIELD, DOC_COUNT_FIELD, 2), new IntPoint(NUMBER_FIELD, 5) )); iw.addDocument(List.of(