elastic · csoulios · Dec 3, 2020 · Nov 30, 2020 · Dec 2, 2020 · Dec 2, 2020
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/CustomTermFreqField.java b/server/src/main/java/org/elasticsearch/index/mapper/CustomTermFreqField.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.mapper;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.index.IndexOptions;
+
+/**
+ * Custom field that allows storing an integer value as a term frequency in lucene.
+ */
+public final class CustomTermFreqField extends Field {
+
+  private static final FieldType FIELD_TYPE = new FieldType();
+  static {
+    FIELD_TYPE.setTokenized(false);
+    FIELD_TYPE.setOmitNorms(true);
+    FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
+  }
+
+  private int fieldValue;
+
+  public CustomTermFreqField(String fieldName, CharSequence term, int fieldValue) {
+    super(fieldName, term, FIELD_TYPE);
+    this.fieldValue = fieldValue;
+  }
+
+  public void setFieldValue(int fieldValue) {
+    this.fieldValue = fieldValue;
+  }
+
+  @Override
+  public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
+    CustomTermFreqTokenStream stream;
+    if (reuse instanceof CustomTermFreqTokenStream) {
+      stream = (CustomTermFreqTokenStream) reuse;
+    } else {
+      stream = new CustomTermFreqTokenStream();
+    }
+    stream.setValues((String) fieldsData, fieldValue);
+    return stream;
+  }
+
+  private static final class CustomTermFreqTokenStream extends TokenStream {
+    private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
+    private final TermFrequencyAttribute freqAttribute = addAttribute(TermFrequencyAttribute.class);
+    private boolean used = true;
+    private String value = null;
+    private int freq = 0;
+
+    private CustomTermFreqTokenStream() {
+    }
+
+    /** Sets the values */
+    void setValues(String value, int freq) {
+      this.value = value;
+      this.freq = freq;
+    }
+
+    @Override
+    public boolean incrementToken() {
+      if (used) {
+        return false;
+      }
+      clearAttributes();
+      termAttribute.append(value);
+      freqAttribute.setTermFrequency(freq);
+      used = true;
+      return true;
+    }
+
+    @Override
+    public void reset() {
+      used = false;
+    }
+
+    @Override
+    public void close() {
+      value = null;
+    }
+  }
+}
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocCountFieldMapper.java
@@ -18,9 +18,6 @@
  */
 package org.elasticsearch.index.mapper;
 
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.NumericDocValuesField;
-import org.apache.lucene.search.DocValuesFieldExistsQuery;
 import org.apache.lucene.search.Query;
 import org.elasticsearch.common.xcontent.XContentParser;
 import org.elasticsearch.common.xcontent.XContentParserUtils;
@@ -42,10 +39,10 @@ public static final class DocCountFieldType extends MappedFieldType {
 
         public static final DocCountFieldType INSTANCE = new DocCountFieldType();
 
-        private static final Long defaultValue = 1L;
+        public static final int DEFAULT_VALUE = 1;
 
         public DocCountFieldType() {
-            super(NAME, false, false, true, TextSearchInfo.NONE,  Collections.emptyMap());
+            super(NAME, false, false, false, TextSearchInfo.NONE,  Collections.emptyMap());
         }
 
         @Override
@@ -55,12 +52,12 @@ public String typeName() {
 
         @Override
         public String familyTypeName() {
-            return NumberFieldMapper.NumberType.LONG.typeName();
+            return NumberFieldMapper.NumberType.INTEGER.typeName();
         }
 
         @Override
         public Query existsQuery(QueryShardContext context) {
-            return new DocValuesFieldExistsQuery(NAME);
+            throw new QueryShardException(context, "Field [" + name() + "] of type [" + typeName() + "] does not support exists queries");
         }
 
         @Override
@@ -74,13 +71,13 @@ public ValueFetcher valueFetcher(QueryShardContext context, String format) {
                 throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats.");
             }
 
-            return new SourceValueFetcher(name(), context, defaultValue) {
+            return new SourceValueFetcher(name(), context, DEFAULT_VALUE) {
                 @Override
                 protected Object parseSourceValue(Object value) {
                     if ("".equals(value)) {
-                        return defaultValue;
+                        return DEFAULT_VALUE;
                     } else {
-                        return NumberFieldMapper.NumberType.objectToLong(value, false);
+                        return NumberFieldMapper.NumberType.INTEGER.parse(value, false);
                     }
                 }
             };
@@ -96,17 +93,19 @@ protected void parseCreateField(ParseContext context) throws IOException {
         XContentParser parser = context.parser();
         XContentParserUtils.ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, parser.currentToken(), parser);
 
-        long value = parser.longValue(false);
+        // Check that _doc_count is a single value and not an array
+        if (context.doc().getByKey(NAME) != null) {
+            throw new IllegalArgumentException("Arrays are not allowed for field [" + fieldType().name() + "].");
+        }
+
+        int value = parser.intValue(false);
         if (value <= 0) {
-            throw new IllegalArgumentException("Field [" + fieldType().name() + "] must be a positive integer.");
+            throw new IllegalArgumentException("Field [" + fieldType().name() + "] must be a positive integer. Value ["
+                + value + "] is not allowed.");
         }
-        final Field docCount = new NumericDocValuesField(NAME, value);
-        context.doc().add(docCount);
+        context.doc().addWithKey(NAME, new CustomTermFreqField(NAME, NAME, value));
     }
 
-    @Override
-    public void preParse(ParseContext context) { }
-
     @Override
     public DocCountFieldType fieldType() {
         return (DocCountFieldType) super.fieldType();

diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java
@@ -87,7 +87,7 @@ public final void collectBucket(LeafBucketCollector subCollector, int doc, long
      * Same as {@link #collectBucket(LeafBucketCollector, int, long)}, but doesn't check if the docCounts needs to be re-sized.
      */
     public final void collectExistingBucket(LeafBucketCollector subCollector, int doc, long bucketOrd) throws IOException {
-        long docCount = docCountProvider.getDocCount(doc);
+        int docCount = docCountProvider.getDocCount(doc);
         if (docCounts.increment(bucketOrd, docCount) == docCount) {
             // We calculate the final number of buckets only during the reduce phase. But we still need to
             // trigger bucket consumer from time to time in order to give it a chance to check available memory and break

diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DocCountProvider.java
@@ -19,9 +19,9 @@
 
 package org.elasticsearch.search.aggregations.bucket;
 
-import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.Term;
 import org.elasticsearch.index.mapper.DocCountFieldMapper;
 
 import java.io.IOException;
@@ -33,17 +33,25 @@
  */
 public class DocCountProvider {
 
-    private NumericDocValues docCountValues;
+    public static final int DEFAULT_VALUE = DocCountFieldMapper.DocCountFieldType.DEFAULT_VALUE;
 
-    public long getDocCount(int doc) throws IOException {
-        if (docCountValues != null && docCountValues.advanceExact(doc)) {
-            return docCountValues.longValue();
+    private PostingsEnum docCountPostings;
+
+    public int getDocCount(int doc) throws IOException {
+        if (docCountPostings == null) {
+            return DEFAULT_VALUE;
+        }
+        if (docCountPostings.docID() < doc) {
+            docCountPostings.advance(doc);
+        }
+        if (docCountPostings.docID() == doc) {
+            return docCountPostings.freq();
         } else {
-            return 1L;
+            return DEFAULT_VALUE;
         }
     }
 
     public void setLeafReaderContext(LeafReaderContext ctx) throws IOException {
-        docCountValues = DocValues.getNumeric(ctx.reader(), DocCountFieldMapper.NAME);
+        docCountPostings = ctx.reader().postings(new Term(DocCountFieldMapper.NAME, DocCountFieldMapper.NAME));
     }
 }
diff --git a/...main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java b/...main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java
@@ -430,7 +430,7 @@ private LeafBucketCollector getFirstPassCollector(RoaringDocIdSet.Builder builde
             @Override
             public void collect(int doc, long bucket) throws IOException {
                 try {
-                    long docCount = docCountProvider.getDocCount(doc);
+                    int docCount = docCountProvider.getDocCount(doc);
                     if (queue.addIfCompetitive(indexSortPrefix, docCount)) {
                         if (builder != null && lastDoc != doc) {
                             builder.add(doc);

diff --git a/.../main/java/org/elasticsearch/search/aggregations/bucket/composite/SortedDocsProducer.java b/.../main/java/org/elasticsearch/search/aggregations/bucket/composite/SortedDocsProducer.java
@@ -69,7 +69,7 @@ protected boolean processBucket(CompositeValuesCollectorQueue queue, LeafReaderC
             @Override
             public void collect(int doc, long bucket) throws IOException {
                 hasCollected[0] = true;
-                long docCount = docCountProvider.getDocCount(doc);
+                int docCount = docCountProvider.getDocCount(doc);
                 if (queue.addIfCompetitive(docCount)) {
                     topCompositeCollected[0]++;
                     if (adder != null && doc != lastDoc) {

diff --git a/...g/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/...g/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -315,7 +315,7 @@ public void collect(int doc, long owningBucketOrd) throws IOException {
                             return;
                         }
                         int ord = singleValues.ordValue();
-                        long docCount = docCountProvider.getDocCount(doc);
+                        int docCount = docCountProvider.getDocCount(doc);
                         segmentDocCounts.increment(ord + 1, docCount);
                     }
                 });
@@ -329,7 +329,7 @@ public void collect(int doc, long owningBucketOrd) throws IOException {
                         return;
                     }
                     for (long segmentOrd = segmentOrds.nextOrd(); segmentOrd != NO_MORE_ORDS; segmentOrd = segmentOrds.nextOrd()) {
-                        long docCount = docCountProvider.getDocCount(doc);
+                        int docCount = docCountProvider.getDocCount(doc);
                         segmentDocCounts.increment(segmentOrd + 1, docCount);
                     }
                 }

diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldMapperTests.java
@@ -18,7 +18,6 @@
  */
 package org.elasticsearch.index.mapper;
 
-import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.IndexableField;
 
 import static org.hamcrest.Matchers.containsString;
@@ -28,9 +27,6 @@ public class DocCountFieldMapperTests extends MapperServiceTestCase {
     private static final String CONTENT_TYPE = DocCountFieldMapper.CONTENT_TYPE;
     private static final String DOC_COUNT_FIELD = DocCountFieldMapper.NAME;
 
-    /**
-     * Test parsing field mapping and adding simple field
-     */
     public void testParseValue() throws Exception {
         DocumentMapper mapper = createDocumentMapper(mapping(b -> {}));
         ParsedDocument doc = mapper.parse(source(b ->
@@ -39,8 +35,7 @@ public void testParseValue() throws Exception {
         ));
 
         IndexableField field = doc.rootDoc().getField(DOC_COUNT_FIELD);
-        assertEquals(100L, field.numericValue());
-        assertEquals(DocValuesType.NUMERIC, field.fieldType().docValuesType());
+        assertEquals(DOC_COUNT_FIELD, field.stringValue());
         assertEquals(1, doc.rootDoc().getFields(DOC_COUNT_FIELD).length);
     }
 
@@ -66,6 +61,13 @@ public void testInvalidDocument_NonNumericDocCount() throws Exception {
     public void testInvalidDocument_FractionalDocCount() throws Exception {
         DocumentMapper mapper = createDocumentMapper(mapping(b -> {}));
         Exception e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> b.field(CONTENT_TYPE, 100.23))));
-        assertThat(e.getCause().getMessage(), containsString("100.23 cannot be converted to Long without data loss"));
+        assertThat(e.getCause().getMessage(), containsString("100.23 cannot be converted to Integer without data loss"));
+    }
+
+    public void testInvalidDocument_ArrayDocCount() throws Exception {
+        DocumentMapper mapper = createDocumentMapper(mapping(b -> {}));
+        Exception e = expectThrows(MapperParsingException.class,
+            () -> mapper.parse(source(b -> b.array(CONTENT_TYPE, 10, 20, 30))));
+        assertThat(e.getCause().getMessage(), containsString("Arrays are not allowed for field [_doc_count]."));
     }
 }
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocCountFieldTypeTests.java
@@ -18,7 +18,6 @@
  */
 package org.elasticsearch.index.mapper;
 
-import org.apache.lucene.search.DocValuesFieldExistsQuery;
 import org.elasticsearch.index.query.QueryShardException;
 
 import java.io.IOException;
@@ -42,14 +41,15 @@ public void testRangeQuery() {
 
     public void testExistsQuery() {
         MappedFieldType ft = new DocCountFieldMapper.DocCountFieldType();
-        assertTrue(ft.existsQuery(randomMockShardContext()) instanceof DocValuesFieldExistsQuery);
+        QueryShardException e = expectThrows(QueryShardException.class, () -> ft.existsQuery(randomMockShardContext()));
+        assertEquals("Field [_doc_count] of type [_doc_count] does not support exists queries", e.getMessage());
     }
 
     public void testFetchSourceValue() throws IOException {
         MappedFieldType fieldType = new DocCountFieldMapper.DocCountFieldType();
-        assertEquals(Arrays.asList(14L), fetchSourceValue(fieldType, 14));
-        assertEquals(Arrays.asList(14L), fetchSourceValue(fieldType, "14"));
-        assertEquals(Arrays.asList(1L), fetchSourceValue(fieldType, ""));
-        assertEquals(Arrays.asList(1L), fetchSourceValue(fieldType, null));
+        assertEquals(Arrays.asList(14), fetchSourceValue(fieldType, 14));
+        assertEquals(Arrays.asList(14), fetchSourceValue(fieldType, "14"));
+        assertEquals(Arrays.asList(1), fetchSourceValue(fieldType, ""));
+        assertEquals(Arrays.asList(1), fetchSourceValue(fieldType, null));
     }
 }
diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DocCountProviderTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/DocCountProviderTests.java
@@ -20,12 +20,12 @@
 package org.elasticsearch.search.aggregations.bucket;
 
 import org.apache.lucene.document.IntPoint;
-import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedNumericDocValuesField;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
 import org.elasticsearch.common.CheckedConsumer;
+import org.elasticsearch.index.mapper.CustomTermFreqField;
 import org.elasticsearch.index.mapper.DocCountFieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.NumberFieldMapper;
@@ -48,11 +48,11 @@ public class DocCountProviderTests extends AggregatorTestCase {
     public void testDocsWithDocCount() throws IOException {
         testAggregation(new MatchAllDocsQuery(), iw -> {
             iw.addDocument(List.of(
-                new NumericDocValuesField(DOC_COUNT_FIELD, 4),
+                new CustomTermFreqField(DOC_COUNT_FIELD, DOC_COUNT_FIELD, 4),
                 new SortedNumericDocValuesField(NUMBER_FIELD, 1)
             ));
             iw.addDocument(List.of(
-                new NumericDocValuesField(DOC_COUNT_FIELD, 5),
+                new CustomTermFreqField(DOC_COUNT_FIELD, DOC_COUNT_FIELD, 5),
                 new SortedNumericDocValuesField(NUMBER_FIELD, 7)
             ));
             iw.addDocument(List.of(
@@ -77,11 +77,11 @@ public void testDocsWithoutDocCount() throws IOException {
     public void testQueryFiltering() throws IOException {
         testAggregation(IntPoint.newRangeQuery(NUMBER_FIELD, 4, 5), iw -> {
             iw.addDocument(List.of(
-                new NumericDocValuesField(DOC_COUNT_FIELD, 4),
+                new CustomTermFreqField(DOC_COUNT_FIELD, DOC_COUNT_FIELD, 4),
                 new IntPoint(NUMBER_FIELD, 6)
             ));
             iw.addDocument(List.of(
-                new NumericDocValuesField(DOC_COUNT_FIELD, 2),
+                new CustomTermFreqField(DOC_COUNT_FIELD, DOC_COUNT_FIELD, 2),
                 new IntPoint(NUMBER_FIELD, 5)
             ));
             iw.addDocument(List.of(