elastic · jpountz · Apr 22, 2021 · Dec 7, 2020 · Dec 10, 2020 · Dec 16, 2020
diff --git a/docs/reference/mapping/types.asciidoc b/docs/reference/mapping/types.asciidoc
@@ -69,7 +69,8 @@ values.
 [[text-search-types]]
 ==== Text search types
 
-<<text,`text`>>:: Analyzed, unstructured text.
+<<text,`text` fields>>:: The text family, including `text` and `match_only_text`.
+                         Analyzed, unstructured text.
 {plugins}/mapper-annotated-text.html[`annotated-text`]:: Text containing special
 markup. Used for identifying named entities.
 <<completion-suggester,`completion`>>:: Used for auto-complete suggestions.

diff --git a/docs/reference/mapping/types/match-only-text.asciidoc b/docs/reference/mapping/types/match-only-text.asciidoc
@@ -0,0 +1,73 @@
+[discrete]
+[[match-only-text-field-type]]
+=== Match-only text field type
+
+A variant of <<text-field-type,`text`>> that trades scoring and efficiency of
+positional queries for space efficiency. This field effectively stores data the
+same way as a `text` field that only indexes documents (`index_options: docs`)
+and disables norms (`norms: false`). Term queries perform as fast if not faster
+as on `text` fields, however queries that need positions such as the
+<<query-dsl-match-query-phrase,`match_phrase` query>> perform slower as they
+need to look at the `_source` document to verify whether a phrase matches. All
+queries return constant scores that are equal to 1.0.
+
+<<span-queries,span queries>>, as well as `wildcard` and `fuzzy` rules of
+<<query-dsl-intervals-query,interval queries>> are not supported by this field.
+Use the <<text-field-type,`text`>> field type if you need them.
+
+Other than that, `match_only_text` supports the same queries as `text`. And
+like `text`, it doesn't support sorting or aggregating.
+
+[source,console]
+--------------------------------
+PUT logs
+{
+  "mappings": {
+    "properties": {
+      "@timestamp": {
+        "type": "date"
+      },
+      "message": {
+        "type": "match_only_text"
+      }
+    }
+  }
+}
+--------------------------------
+
+[discrete]
+[[match-only-text-params]]
+==== Parameters for match-only text fields
+
+The following mapping parameters are accepted:
+
+[horizontal]
+
+<<analyzer,`analyzer`>>::
+
+    The <<analysis,analyzer>> which should be used for
+    the `match_only_text` field, both at index-time and at
+    search-time (unless overridden by the  <<search-analyzer,`search_analyzer`>>).
+    Defaults to the default index analyzer, or the
+    <<analysis-standard-analyzer,`standard` analyzer>>.
+
+<<multi-fields,`fields`>>::
+
+    Multi-fields allow the same string value to be indexed in multiple ways for
+    different purposes, such as one field for search and a multi-field for
+    sorting and aggregations, or the same string value analyzed by different
+    analyzers.
+
+<<mapping-field-meta,`meta`>>::
+
+    Metadata about the field.
+
+<<search-analyzer,`search_analyzer`>>::
+
+    The <<analyzer,`analyzer`>> that should be used at search time on
+    the `match_only_text` field. Defaults to the `analyzer` setting.
+
+<<search-quote-analyzer,`search_quote_analyzer`>>::
+
+    The <<analyzer,`analyzer`>> that should be used at search time when a
+    phrase is encountered. Defaults to the `search_analyzer` setting.
diff --git a/docs/reference/mapping/types/text.asciidoc b/docs/reference/mapping/types/text.asciidoc
@@ -1,9 +1,23 @@
+[testenv="basic"]
 [[text]]
-=== Text field type
+=== Text type family
 ++++
 <titleabbrev>Text</titleabbrev>
 ++++
 
+The text family includes the following field types:
+
+* <<text-field-type,`text`>>, the traditional field type for full-text content
+such as the body of an email or the description of a product.
+* <<match-only-text-field-type,`match_only_text`>>, a space-optimized variant
+of `text` that disables scoring and performs slower on queries that need
+positions. It is best suited for indexing log messages.
+
+
+[discrete]
+[[text-field-type]]
+=== Text field type
+
 A field to index full-text values, such as the body of an email or the
 description of a product. These fields are `analyzed`, that is they are passed through an
 <<analysis,analyzer>> to convert the string into a list of individual terms
@@ -253,3 +267,5 @@ PUT my-index-000001
   }
 }
 --------------------------------------------------
+
+include::match-only-text.asciidoc[]
diff --git a/modules/mapper-extras/build.gradle b/modules/mapper-extras/build.gradle
@@ -15,6 +15,6 @@ esplugin {
 
 restResources {
   restApi {
-    include '_common', 'cluster', 'nodes', 'indices', 'index', 'search', 'get'
+    include '_common', 'cluster', 'field_caps', 'nodes', 'indices', 'index', 'search', 'get'
   }
 }
diff --git a/...nternalClusterTest/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapperTests.java b/...nternalClusterTest/java/org/elasticsearch/index/mapper/MatchOnlyTextFieldMapperTests.java
@@ -0,0 +1,267 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.index.mapper;
+
+import org.apache.lucene.analysis.CannedTokenStream;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.KeywordAnalyzer;
+import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.IndexableFieldType;
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.xcontent.ToXContent;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentFactory;
+import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.analysis.AnalyzerScope;
+import org.elasticsearch.index.analysis.CharFilterFactory;
+import org.elasticsearch.index.analysis.CustomAnalyzer;
+import org.elasticsearch.index.analysis.IndexAnalyzers;
+import org.elasticsearch.index.analysis.NamedAnalyzer;
+import org.elasticsearch.index.analysis.StandardTokenizerFactory;
+import org.elasticsearch.index.analysis.TokenFilterFactory;
+import org.elasticsearch.index.query.SearchExecutionContext;
+import org.elasticsearch.plugins.Plugin;
+import org.hamcrest.Matchers;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.instanceOf;
+
+public class MatchOnlyTextFieldMapperTests extends MapperTestCase {
+
+    @Override
+    protected Collection<Plugin> getPlugins() {
+        return List.of(new MapperExtrasPlugin());
+    }
+
+    @Override
+    protected Object getSampleValueForDocument() {
+        return "value";
+    }
+
+    public final void testExists() throws IOException {
+        MapperService mapperService = createMapperService(fieldMapping(b -> { minimalMapping(b); }));
+        assertExistsQuery(mapperService);
+        assertParseMinimalWarnings();
+    }
+
+    @Override
+    protected void registerParameters(ParameterChecker checker) throws IOException {
+        checker.registerUpdateCheck(b -> {
+            b.field("analyzer", "default");
+            b.field("search_analyzer", "keyword");
+        }, m -> assertEquals("keyword", m.fieldType().getTextSearchInfo().getSearchAnalyzer().name()));
+        checker.registerUpdateCheck(b -> {
+            b.field("analyzer", "default");
+            b.field("search_analyzer", "keyword");
+            b.field("search_quote_analyzer", "keyword");
+        }, m -> assertEquals("keyword", m.fieldType().getTextSearchInfo().getSearchQuoteAnalyzer().name()));
+
+        checker.registerConflictCheck("analyzer", b -> b.field("analyzer", "keyword"));
+    }
+
+    @Override
+    protected IndexAnalyzers createIndexAnalyzers(IndexSettings indexSettings) {
+        NamedAnalyzer dflt = new NamedAnalyzer(
+            "default",
+            AnalyzerScope.INDEX,
+            new StandardAnalyzer(),
+            TextFieldMapper.Defaults.POSITION_INCREMENT_GAP
+        );
+        NamedAnalyzer standard = new NamedAnalyzer("standard", AnalyzerScope.INDEX, new StandardAnalyzer());
+        NamedAnalyzer keyword = new NamedAnalyzer("keyword", AnalyzerScope.INDEX, new KeywordAnalyzer());
+        NamedAnalyzer whitespace = new NamedAnalyzer("whitespace", AnalyzerScope.INDEX, new WhitespaceAnalyzer());
+        NamedAnalyzer stop = new NamedAnalyzer(
+            "my_stop_analyzer",
+            AnalyzerScope.INDEX,
+            new CustomAnalyzer(
+                new StandardTokenizerFactory(indexSettings, null, "standard", indexSettings.getSettings()),
+                new CharFilterFactory[0],
+                new TokenFilterFactory[] { new TokenFilterFactory() {
+                    @Override
+                    public String name() {
+                        return "stop";
+                    }
+
+                    @Override
+                    public TokenStream create(TokenStream tokenStream) {
+                        return new StopFilter(tokenStream, EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
+                    }
+                } }
+            )
+        );
+        return new IndexAnalyzers(
+            Map.of("default", dflt, "standard", standard, "keyword", keyword, "whitespace", whitespace, "my_stop_analyzer", stop),
+            Map.of(),
+            Map.of()
+        );
+    }
+
+    @Override
+    protected void minimalMapping(XContentBuilder b) throws IOException {
+        b.field("type", "match_only_text");
+    }
+
+    public void testDefaults() throws IOException {
+        DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping));
+        assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), mapper.mappingSource().toString());
+
+        ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234")));
+        IndexableField[] fields = doc.rootDoc().getFields("field");
+        assertEquals(1, fields.length);
+        assertEquals("1234", fields[0].stringValue());
+        IndexableFieldType fieldType = fields[0].fieldType();
+        assertThat(fieldType.omitNorms(), equalTo(true));
+        assertTrue(fieldType.tokenized());
+        assertFalse(fieldType.stored());
+        assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
+        assertThat(fieldType.storeTermVectors(), equalTo(false));
+        assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
+        assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
+        assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
+        assertEquals(DocValuesType.NONE, fieldType.docValuesType());
+    }
+
+    public void testSearchAnalyzerSerialization() throws IOException {
+        XContentBuilder mapping = fieldMapping(
+            b -> b.field("type", "match_only_text").field("analyzer", "standard").field("search_analyzer", "keyword")
+        );
+        assertEquals(Strings.toString(mapping), createDocumentMapper(mapping).mappingSource().toString());
+
+        // special case: default index analyzer
+        mapping = fieldMapping(b -> b.field("type", "match_only_text").field("analyzer", "default").field("search_analyzer", "keyword"));
+        assertEquals(Strings.toString(mapping), createDocumentMapper(mapping).mappingSource().toString());
+
+        // special case: default search analyzer
+        mapping = fieldMapping(b -> b.field("type", "match_only_text").field("analyzer", "keyword").field("search_analyzer", "default"));
+        assertEquals(Strings.toString(mapping), createDocumentMapper(mapping).mappingSource().toString());
+
+        XContentBuilder builder = XContentFactory.jsonBuilder();
+        builder.startObject();
+        createDocumentMapper(fieldMapping(this::minimalMapping)).mapping().toXContent(
+            builder,
+            new ToXContent.MapParams(Collections.singletonMap("include_defaults", "true"))
+        );
+        builder.endObject();
+        String mappingString = Strings.toString(builder);
+        assertTrue(mappingString.contains("analyzer"));
+        assertTrue(mappingString.contains("search_analyzer"));
+        assertTrue(mappingString.contains("search_quote_analyzer"));
+    }
+
+    public void testSearchQuoteAnalyzerSerialization() throws IOException {
+        XContentBuilder mapping = fieldMapping(
+            b -> b.field("type", "match_only_text")
+                .field("analyzer", "standard")
+                .field("search_analyzer", "standard")
+                .field("search_quote_analyzer", "keyword")
+        );
+        assertEquals(Strings.toString(mapping), createDocumentMapper(mapping).mappingSource().toString());
+
+        // special case: default index/search analyzer
+        mapping = fieldMapping(
+            b -> b.field("type", "match_only_text")
+                .field("analyzer", "default")
+                .field("search_analyzer", "default")
+                .field("search_quote_analyzer", "keyword")
+        );
+        assertEquals(Strings.toString(mapping), createDocumentMapper(mapping).mappingSource().toString());
+    }
+
+    public void testNullConfigValuesFail() throws MapperParsingException {
+        Exception e = expectThrows(
+            MapperParsingException.class,
+            () -> createDocumentMapper(fieldMapping(b -> b.field("type", "match_only_text").field("analyzer", (String) null)))
+        );
+        assertThat(e.getMessage(), containsString("[analyzer] on mapper [field] of type [match_only_text] must not have a [null] value"));
+    }
+
+    public void testSimpleMerge() throws IOException {
+        XContentBuilder startingMapping = fieldMapping(b -> b.field("type", "match_only_text").field("analyzer", "whitespace"));
+        MapperService mapperService = createMapperService(startingMapping);
+        assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(MatchOnlyTextFieldMapper.class));
+
+        merge(mapperService, startingMapping);
+        assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(MatchOnlyTextFieldMapper.class));
+
+        XContentBuilder differentAnalyzer = fieldMapping(b -> b.field("type", "match_only_text").field("analyzer", "keyword"));
+        IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> merge(mapperService, differentAnalyzer));
+        assertThat(e.getMessage(), containsString("Cannot update parameter [analyzer]"));
+
+        XContentBuilder newField = mapping(b -> {
+            b.startObject("field")
+                .field("type", "match_only_text")
+                .field("analyzer", "whitespace")
+                .startObject("meta")
+                .field("key", "value")
+                .endObject()
+                .endObject();
+            b.startObject("other_field").field("type", "keyword").endObject();
+        });
+        merge(mapperService, newField);
+        assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(MatchOnlyTextFieldMapper.class));
+        assertThat(mapperService.documentMapper().mappers().getMapper("other_field"), instanceOf(KeywordFieldMapper.class));
+    }
+
+    public void testDisabledSource() throws IOException {
+        XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("_doc");
+        {
+            mapping.startObject("properties");
+            {
+                mapping.startObject("foo");
+                {
+                    mapping.field("type", "match_only_text");
+                }
+                mapping.endObject();
+            }
+            mapping.endObject();
+
+            mapping.startObject("_source");
+            {
+                mapping.field("enabled", false);
+            }
+            mapping.endObject();
+        }
+        mapping.endObject().endObject();
+
+        MapperService mapperService = createMapperService(mapping);
+        MappedFieldType ft = mapperService.fieldType("foo");
+        SearchExecutionContext context = createSearchExecutionContext(mapperService);
+        TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 4, 7));
+        IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> ft.phraseQuery(ts, 0, true, context));
+        assertThat(e.getMessage(), Matchers.containsString("cannot run positional queries since [_source] is disabled"));
+
+        // Term queries are ok
+        ft.termQuery("a", context); // no exception
+    }
+
+    @Override
+    protected Object generateRandomInputValue(MappedFieldType ft) {
+        assumeFalse("We don't have a way to assert things here", true);
+        return null;
+    }
+
+    @Override
+    protected void randomFetchTestFieldConfig(XContentBuilder b) throws IOException {
+        assumeFalse("We don't have a way to assert things here", true);
+    }
+}
diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MapperExtrasPlugin.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MapperExtrasPlugin.java
@@ -29,6 +29,7 @@ public Map<String, Mapper.TypeParser> getMappers() {
         mappers.put(RankFeatureFieldMapper.CONTENT_TYPE, RankFeatureFieldMapper.PARSER);
         mappers.put(RankFeaturesFieldMapper.CONTENT_TYPE, RankFeaturesFieldMapper.PARSER);
         mappers.put(SearchAsYouTypeFieldMapper.CONTENT_TYPE, SearchAsYouTypeFieldMapper.PARSER);
+        mappers.put(MatchOnlyTextFieldMapper.CONTENT_TYPE, MatchOnlyTextFieldMapper.PARSER);
         return Collections.unmodifiableMap(mappers);
     }