Skip to content

Commit f28412a

Browse files
committed
Add match_only_text, a space-efficient variant of text. (elastic#66172)
This adds a new `match_only_text` field, which indexes the same data as a `text` field that has `index_options: docs` and `norms: false` and uses the `_source` for positional queries like `match_phrase`. Unlike `text`, this field doesn't support scoring.
1 parent 138e856 commit f28412a

File tree

24 files changed

+2241
-44
lines changed

24 files changed

+2241
-44
lines changed

docs/reference/mapping/types.asciidoc

+2-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ values.
6969
[[text-search-types]]
7070
==== Text search types
7171

72-
<<text,`text`>>:: Analyzed, unstructured text.
72+
<<text,`text` fields>>:: The text family, including `text` and `match_only_text`.
73+
Analyzed, unstructured text.
7374
{plugins}/mapper-annotated-text.html[`annotated-text`]:: Text containing special
7475
markup. Used for identifying named entities.
7576
<<completion-suggester,`completion`>>:: Used for auto-complete suggestions.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
[discrete]
2+
[[match-only-text-field-type]]
3+
=== Match-only text field type
4+
5+
A variant of <<text-field-type,`text`>> that trades scoring and efficiency of
6+
positional queries for space efficiency. This field effectively stores data the
7+
same way as a `text` field that only indexes documents (`index_options: docs`)
8+
and disables norms (`norms: false`). Term queries perform as fast if not faster
9+
as on `text` fields, however queries that need positions such as the
10+
<<query-dsl-match-query-phrase,`match_phrase` query>> perform slower as they
11+
need to look at the `_source` document to verify whether a phrase matches. All
12+
queries return constant scores that are equal to 1.0.
13+
14+
Analysis is not configurable: text is always analyzed with the
15+
<<specify-index-time-default-analyzer,default analyzer>>
16+
(<<analysis-standard-analyzer,`standard`>> by default).
17+
18+
<<span-queries,span queries>> are not supported with this field, use
19+
<<query-dsl-intervals-query,interval queries>> instead, or the
20+
<<text-field-type,`text`>> field type if you absolutely need span queries.
21+
22+
Other than that, `match_only_text` supports the same queries as `text`. And
23+
like `text`, it doesn't support sorting or aggregating.
24+
25+
[source,console]
26+
--------------------------------
27+
PUT logs
28+
{
29+
"mappings": {
30+
"properties": {
31+
"@timestamp": {
32+
"type": "date"
33+
},
34+
"message": {
35+
"type": "match_only_text"
36+
}
37+
}
38+
}
39+
}
40+
--------------------------------
41+
42+
[discrete]
43+
[[match-only-text-params]]
44+
==== Parameters for match-only text fields
45+
46+
The following mapping parameters are accepted:
47+
48+
[horizontal]
49+
50+
<<multi-fields,`fields`>>::
51+
52+
Multi-fields allow the same string value to be indexed in multiple ways for
53+
different purposes, such as one field for search and a multi-field for
54+
sorting and aggregations, or the same string value analyzed by different
55+
analyzers.
56+
57+
<<mapping-field-meta,`meta`>>::
58+
59+
Metadata about the field.

docs/reference/mapping/types/text.asciidoc

+17-1
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,23 @@
1+
[testenv="basic"]
12
[[text]]
2-
=== Text field type
3+
=== Text type family
34
++++
45
<titleabbrev>Text</titleabbrev>
56
++++
67

8+
The text family includes the following field types:
9+
10+
* <<text-field-type,`text`>>, the traditional field type for full-text content
11+
such as the body of an email or the description of a product.
12+
* <<match-only-text-field-type,`match_only_text`>>, a space-optimized variant
13+
of `text` that disables scoring and performs slower on queries that need
14+
positions. It is best suited for indexing log messages.
15+
16+
17+
[discrete]
18+
[[text-field-type]]
19+
=== Text field type
20+
721
A field to index full-text values, such as the body of an email or the
822
description of a product. These fields are `analyzed`, that is they are passed through an
923
<<analysis,analyzer>> to convert the string into a list of individual terms
@@ -258,3 +272,5 @@ PUT my-index-000001
258272
}
259273
}
260274
--------------------------------------------------
275+
276+
include::match-only-text.asciidoc[]

modules/mapper-extras/build.gradle

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,6 @@ esplugin {
1616

1717
restResources {
1818
restApi {
19-
include '_common', 'cluster', 'nodes', 'indices', 'index', 'search', 'get'
19+
include '_common', 'cluster', 'field_caps', 'nodes', 'indices', 'index', 'search', 'get'
2020
}
2121
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
9+
package org.elasticsearch.index.mapper;
10+
11+
import org.apache.lucene.analysis.CannedTokenStream;
12+
import org.apache.lucene.analysis.Token;
13+
import org.apache.lucene.analysis.TokenStream;
14+
import org.apache.lucene.index.DocValuesType;
15+
import org.apache.lucene.index.IndexOptions;
16+
import org.apache.lucene.index.IndexableField;
17+
import org.apache.lucene.index.IndexableFieldType;
18+
import org.elasticsearch.common.Strings;
19+
import org.elasticsearch.common.xcontent.XContentBuilder;
20+
import org.elasticsearch.common.xcontent.XContentFactory;
21+
import org.elasticsearch.index.query.SearchExecutionContext;
22+
import org.elasticsearch.plugins.Plugin;
23+
import org.hamcrest.Matchers;
24+
25+
import java.io.IOException;
26+
import java.util.Collection;
27+
import java.util.Collections;
28+
import java.util.List;
29+
30+
import static org.hamcrest.Matchers.containsString;
31+
import static org.hamcrest.Matchers.equalTo;
32+
import static org.hamcrest.Matchers.instanceOf;
33+
34+
public class MatchOnlyTextFieldMapperTests extends MapperTestCase {
35+
36+
@Override
37+
protected Collection<Plugin> getPlugins() {
38+
return List.of(new MapperExtrasPlugin());
39+
}
40+
41+
@Override
42+
protected Object getSampleValueForDocument() {
43+
return "value";
44+
}
45+
46+
public final void testExists() throws IOException {
47+
MapperService mapperService = createMapperService(fieldMapping(b -> { minimalMapping(b); }));
48+
assertExistsQuery(mapperService);
49+
assertParseMinimalWarnings();
50+
}
51+
52+
@Override
53+
protected void registerParameters(ParameterChecker checker) throws IOException {
54+
checker.registerUpdateCheck(b -> {
55+
b.field("meta", Collections.singletonMap("format", "mysql.access"));
56+
}, m -> assertEquals(Collections.singletonMap("format", "mysql.access"), m.fieldType().meta()));
57+
}
58+
59+
@Override
60+
protected void minimalMapping(XContentBuilder b) throws IOException {
61+
b.field("type", "match_only_text");
62+
}
63+
64+
public void testDefaults() throws IOException {
65+
DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping));
66+
assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), mapper.mappingSource().toString());
67+
68+
ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234")));
69+
IndexableField[] fields = doc.rootDoc().getFields("field");
70+
assertEquals(1, fields.length);
71+
assertEquals("1234", fields[0].stringValue());
72+
IndexableFieldType fieldType = fields[0].fieldType();
73+
assertThat(fieldType.omitNorms(), equalTo(true));
74+
assertTrue(fieldType.tokenized());
75+
assertFalse(fieldType.stored());
76+
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
77+
assertThat(fieldType.storeTermVectors(), equalTo(false));
78+
assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
79+
assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
80+
assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
81+
assertEquals(DocValuesType.NONE, fieldType.docValuesType());
82+
}
83+
84+
public void testNullConfigValuesFail() throws MapperParsingException {
85+
Exception e = expectThrows(
86+
MapperParsingException.class,
87+
() -> createDocumentMapper(fieldMapping(b -> b.field("type", "match_only_text").field("meta", (String) null)))
88+
);
89+
assertThat(e.getMessage(), containsString("[meta] on mapper [field] of type [match_only_text] must not have a [null] value"));
90+
}
91+
92+
public void testSimpleMerge() throws IOException {
93+
XContentBuilder startingMapping = fieldMapping(b -> b.field("type", "match_only_text"));
94+
MapperService mapperService = createMapperService(startingMapping);
95+
assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(MatchOnlyTextFieldMapper.class));
96+
97+
merge(mapperService, startingMapping);
98+
assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(MatchOnlyTextFieldMapper.class));
99+
100+
XContentBuilder newField = mapping(b -> {
101+
b.startObject("field")
102+
.field("type", "match_only_text")
103+
.startObject("meta")
104+
.field("key", "value")
105+
.endObject()
106+
.endObject();
107+
b.startObject("other_field").field("type", "keyword").endObject();
108+
});
109+
merge(mapperService, newField);
110+
assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(MatchOnlyTextFieldMapper.class));
111+
assertThat(mapperService.documentMapper().mappers().getMapper("other_field"), instanceOf(KeywordFieldMapper.class));
112+
}
113+
114+
public void testDisabledSource() throws IOException {
115+
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("_doc");
116+
{
117+
mapping.startObject("properties");
118+
{
119+
mapping.startObject("foo");
120+
{
121+
mapping.field("type", "match_only_text");
122+
}
123+
mapping.endObject();
124+
}
125+
mapping.endObject();
126+
127+
mapping.startObject("_source");
128+
{
129+
mapping.field("enabled", false);
130+
}
131+
mapping.endObject();
132+
}
133+
mapping.endObject().endObject();
134+
135+
MapperService mapperService = createMapperService(mapping);
136+
MappedFieldType ft = mapperService.fieldType("foo");
137+
SearchExecutionContext context = createSearchExecutionContext(mapperService);
138+
TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 4, 7));
139+
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> ft.phraseQuery(ts, 0, true, context));
140+
assertThat(e.getMessage(), Matchers.containsString("cannot run positional queries since [_source] is disabled"));
141+
142+
// Term queries are ok
143+
ft.termQuery("a", context); // no exception
144+
}
145+
146+
@Override
147+
protected Object generateRandomInputValue(MappedFieldType ft) {
148+
assumeFalse("We don't have a way to assert things here", true);
149+
return null;
150+
}
151+
152+
@Override
153+
protected void randomFetchTestFieldConfig(XContentBuilder b) throws IOException {
154+
assumeFalse("We don't have a way to assert things here", true);
155+
}
156+
}

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/MapperExtrasPlugin.java

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ public Map<String, Mapper.TypeParser> getMappers() {
2929
mappers.put(RankFeatureFieldMapper.CONTENT_TYPE, RankFeatureFieldMapper.PARSER);
3030
mappers.put(RankFeaturesFieldMapper.CONTENT_TYPE, RankFeaturesFieldMapper.PARSER);
3131
mappers.put(SearchAsYouTypeFieldMapper.CONTENT_TYPE, SearchAsYouTypeFieldMapper.PARSER);
32+
mappers.put(MatchOnlyTextFieldMapper.CONTENT_TYPE, MatchOnlyTextFieldMapper.PARSER);
3233
return Collections.unmodifiableMap(mappers);
3334
}
3435

0 commit comments

Comments
 (0)