Skip to content

Commit 99aca9c

Browse files
authored
Enhances exists queries to reduce need for _field_names (elastic#26930)
* Enhances exists queries to reduce need for `_field_names` Before this change we wrote the name all the fields in a document to a `_field_names` field and then implemented exists queries as a term query on this field. The problem with this approach is that it bloats the index and also affects indexing performance. This change adds a new method `existsQuery()` to `MappedFieldType` which is implemented by each sub-class. For most field types if doc values are available a `DocValuesFieldExistsQuery` is used, falling back to using `_field_names` if doc values are disabled. Note that only fields where no doc values are available are written to `_field_names`. Closes elastic#26770 * Addresses review comments * Addresses more review comments * implements existsQuery explicitly on every mapper * Reinstates ability to perform term query on `_field_names` * Added bwc depending on index created version * Review Comments * Skips tests that are not supported in 6.1.0 These values will need to be changed after backporting this PR to 6.x
1 parent d805c41 commit 99aca9c

File tree

45 files changed

+1880
-190
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1880
-190
lines changed

core/src/main/java/org/elasticsearch/index/mapper/BinaryFieldMapper.java

+18
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,14 @@
2020
package org.elasticsearch.index.mapper;
2121

2222
import com.carrotsearch.hppc.ObjectArrayList;
23+
2324
import org.apache.lucene.document.Field;
2425
import org.apache.lucene.index.IndexOptions;
2526
import org.apache.lucene.index.IndexableField;
27+
import org.apache.lucene.index.Term;
28+
import org.apache.lucene.search.DocValuesFieldExistsQuery;
2629
import org.apache.lucene.search.Query;
30+
import org.apache.lucene.search.TermQuery;
2731
import org.apache.lucene.store.ByteArrayDataOutput;
2832
import org.apache.lucene.util.BytesRef;
2933
import org.elasticsearch.ElasticsearchException;
@@ -126,6 +130,15 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) {
126130
return new BytesBinaryDVIndexFieldData.Builder();
127131
}
128132

133+
@Override
134+
public Query existsQuery(QueryShardContext context) {
135+
if (hasDocValues()) {
136+
return new DocValuesFieldExistsQuery(name());
137+
} else {
138+
return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));
139+
}
140+
}
141+
129142
@Override
130143
public Query termQuery(Object value, QueryShardContext context) {
131144
throw new QueryShardException(context, "Binary fields do not support searching");
@@ -165,6 +178,11 @@ protected void parseCreateField(ParseContext context, List<IndexableField> field
165178
} else {
166179
field.add(value);
167180
}
181+
} else {
182+
// Only add an entry to the field names field if the field is stored
183+
// but has no doc values so exists query will work on a field with
184+
// no doc values
185+
createFieldNamesField(context, fields);
168186
}
169187

170188
}

core/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java

+14
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,10 @@
2323
import org.apache.lucene.document.SortedNumericDocValuesField;
2424
import org.apache.lucene.index.IndexOptions;
2525
import org.apache.lucene.index.IndexableField;
26+
import org.apache.lucene.index.Term;
27+
import org.apache.lucene.search.DocValuesFieldExistsQuery;
2628
import org.apache.lucene.search.Query;
29+
import org.apache.lucene.search.TermQuery;
2730
import org.apache.lucene.search.TermRangeQuery;
2831
import org.apache.lucene.util.BytesRef;
2932
import org.elasticsearch.Version;
@@ -136,6 +139,15 @@ public String typeName() {
136139
return CONTENT_TYPE;
137140
}
138141

142+
@Override
143+
public Query existsQuery(QueryShardContext context) {
144+
if (hasDocValues()) {
145+
return new DocValuesFieldExistsQuery(name());
146+
} else {
147+
return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));
148+
}
149+
}
150+
139151
@Override
140152
public Boolean nullValue() {
141153
return (Boolean)super.nullValue();
@@ -253,6 +265,8 @@ protected void parseCreateField(ParseContext context, List<IndexableField> field
253265
}
254266
if (fieldType().hasDocValues()) {
255267
fields.add(new SortedNumericDocValuesField(fieldType().name(), value ? 1 : 0));
268+
} else {
269+
createFieldNamesField(context, fields);
256270
}
257271
}
258272

core/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java

+14
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
import org.apache.lucene.codecs.PostingsFormat;
2222
import org.apache.lucene.index.IndexableField;
2323
import org.apache.lucene.index.Term;
24+
import org.apache.lucene.search.Query;
25+
import org.apache.lucene.search.TermQuery;
2426
import org.apache.lucene.search.suggest.document.Completion50PostingsFormat;
2527
import org.apache.lucene.search.suggest.document.CompletionAnalyzer;
2628
import org.apache.lucene.search.suggest.document.CompletionQuery;
@@ -40,11 +42,13 @@
4042
import org.elasticsearch.common.xcontent.XContentParser.Token;
4143
import org.elasticsearch.index.analysis.AnalyzerScope;
4244
import org.elasticsearch.index.analysis.NamedAnalyzer;
45+
import org.elasticsearch.index.query.QueryShardContext;
4346
import org.elasticsearch.search.suggest.completion.CompletionSuggester;
4447
import org.elasticsearch.search.suggest.completion.context.ContextMapping;
4548
import org.elasticsearch.search.suggest.completion.context.ContextMappings;
4649

4750
import java.io.IOException;
51+
import java.util.ArrayList;
4852
import java.util.Collections;
4953
import java.util.HashMap;
5054
import java.util.HashSet;
@@ -257,6 +261,11 @@ public static synchronized PostingsFormat postingsFormat() {
257261
return postingsFormat;
258262
}
259263

264+
@Override
265+
public Query existsQuery(QueryShardContext context) {
266+
return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));
267+
}
268+
260269
/**
261270
* Completion prefix query
262271
*/
@@ -456,6 +465,11 @@ public Mapper parse(ParseContext context) throws IOException {
456465
context.doc().add(new SuggestField(fieldType().name(), input, metaData.weight));
457466
}
458467
}
468+
List<IndexableField> fields = new ArrayList<>(1);
469+
createFieldNamesField(context, fields);
470+
for (IndexableField field : fields) {
471+
context.doc().add(field);
472+
}
459473
multiFields.parse(this, context);
460474
return null;
461475
}

core/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java

+14
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,12 @@
2626
import org.apache.lucene.index.IndexReader;
2727
import org.apache.lucene.index.IndexableField;
2828
import org.apache.lucene.index.PointValues;
29+
import org.apache.lucene.index.Term;
2930
import org.apache.lucene.search.BoostQuery;
31+
import org.apache.lucene.search.DocValuesFieldExistsQuery;
3032
import org.apache.lucene.search.IndexOrDocValuesQuery;
3133
import org.apache.lucene.search.Query;
34+
import org.apache.lucene.search.TermQuery;
3235
import org.apache.lucene.util.BytesRef;
3336
import org.elasticsearch.Version;
3437
import org.elasticsearch.common.Explicit;
@@ -245,6 +248,15 @@ long parse(String value) {
245248
return dateTimeFormatter().parser().parseMillis(value);
246249
}
247250

251+
@Override
252+
public Query existsQuery(QueryShardContext context) {
253+
if (hasDocValues()) {
254+
return new DocValuesFieldExistsQuery(name());
255+
} else {
256+
return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));
257+
}
258+
}
259+
248260
@Override
249261
public Query termQuery(Object value, @Nullable QueryShardContext context) {
250262
Query query = rangeQuery(value, value, true, true, ShapeRelation.INTERSECTS, null, null, context);
@@ -451,6 +463,8 @@ protected void parseCreateField(ParseContext context, List<IndexableField> field
451463
}
452464
if (fieldType().hasDocValues()) {
453465
fields.add(new SortedNumericDocValuesField(fieldType().name(), timestamp));
466+
} else if (fieldType().stored() || fieldType().indexOptions() != IndexOptions.NONE) {
467+
createFieldNamesField(context, fields);
454468
}
455469
if (fieldType().stored()) {
456470
fields.add(new StoredField(fieldType().name(), timestamp));

core/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java

+12
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import com.carrotsearch.hppc.cursors.ObjectCursor;
2323
import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
2424

25+
import org.apache.lucene.document.Field;
2526
import org.apache.lucene.document.FieldType;
2627
import org.apache.lucene.index.IndexOptions;
2728
import org.apache.lucene.index.IndexableField;
@@ -33,6 +34,7 @@
3334
import org.elasticsearch.common.settings.Settings;
3435
import org.elasticsearch.common.xcontent.XContentBuilder;
3536
import org.elasticsearch.index.analysis.NamedAnalyzer;
37+
import org.elasticsearch.index.mapper.FieldNamesFieldMapper.FieldNamesFieldType;
3638
import org.elasticsearch.index.similarity.SimilarityProvider;
3739
import org.elasticsearch.index.similarity.SimilarityService;
3840

@@ -285,6 +287,16 @@ public Mapper parse(ParseContext context) throws IOException {
285287
*/
286288
protected abstract void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException;
287289

290+
protected void createFieldNamesField(ParseContext context, List<IndexableField> fields) {
291+
FieldNamesFieldType fieldNamesFieldType = (FieldNamesFieldMapper.FieldNamesFieldType) context.docMapper()
292+
.metadataMapper(FieldNamesFieldMapper.class).fieldType();
293+
if (fieldNamesFieldType != null && fieldNamesFieldType.isEnabled()) {
294+
for (String fieldName : FieldNamesFieldMapper.extractFieldNames(fieldType().name())) {
295+
fields.add(new Field(FieldNamesFieldMapper.NAME, fieldName, fieldNamesFieldType));
296+
}
297+
}
298+
}
299+
288300
@Override
289301
public Iterator<Mapper> iterator() {
290302
return multiFields.iterator();

core/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java

+18-2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@
2323
import org.apache.lucene.index.IndexOptions;
2424
import org.apache.lucene.index.IndexableField;
2525
import org.apache.lucene.search.Query;
26+
import org.elasticsearch.Version;
27+
import org.elasticsearch.cluster.metadata.IndexMetaData;
28+
import org.elasticsearch.common.logging.DeprecationLogger;
29+
import org.elasticsearch.common.logging.ESLoggerFactory;
2630
import org.elasticsearch.common.lucene.Lucene;
2731
import org.elasticsearch.common.settings.Settings;
2832
import org.elasticsearch.common.xcontent.XContentBuilder;
@@ -44,6 +48,9 @@
4448
*/
4549
public class FieldNamesFieldMapper extends MetadataFieldMapper {
4650

51+
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(
52+
ESLoggerFactory.getLogger(FieldNamesFieldMapper.class));
53+
4754
public static final String NAME = "_field_names";
4855

4956
public static final String CONTENT_TYPE = "_field_names";
@@ -178,11 +185,18 @@ public boolean isEnabled() {
178185
return enabled;
179186
}
180187

188+
@Override
189+
public Query existsQuery(QueryShardContext context) {
190+
throw new UnsupportedOperationException("Cannot run exists query on _field_names");
191+
}
192+
181193
@Override
182194
public Query termQuery(Object value, QueryShardContext context) {
183195
if (isEnabled() == false) {
184196
throw new IllegalStateException("Cannot run [exists] queries if the [_field_names] field is disabled");
185197
}
198+
DEPRECATION_LOGGER.deprecated(
199+
"terms query on the _field_names field is deprecated and will be removed, use exists query instead");
186200
return super.termQuery(value, context);
187201
}
188202
}
@@ -206,12 +220,14 @@ public void preParse(ParseContext context) throws IOException {
206220

207221
@Override
208222
public void postParse(ParseContext context) throws IOException {
209-
super.parse(context);
223+
if (context.indexSettings().getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).before(Version.V_6_1_0)) {
224+
super.parse(context);
225+
}
210226
}
211227

212228
@Override
213229
public Mapper parse(ParseContext context) throws IOException {
214-
// we parse in post parse
230+
// Adding values to the _field_names field is handled by the mappers for each field type
215231
return null;
216232
}
217233

core/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java

+19
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,10 @@
2323
import org.apache.lucene.document.StoredField;
2424
import org.apache.lucene.index.IndexOptions;
2525
import org.apache.lucene.index.IndexableField;
26+
import org.apache.lucene.index.Term;
27+
import org.apache.lucene.search.DocValuesFieldExistsQuery;
2628
import org.apache.lucene.search.Query;
29+
import org.apache.lucene.search.TermQuery;
2730
import org.elasticsearch.ElasticsearchParseException;
2831
import org.elasticsearch.common.Explicit;
2932
import org.elasticsearch.common.geo.GeoPoint;
@@ -37,6 +40,7 @@
3740
import org.elasticsearch.index.query.QueryShardException;
3841

3942
import java.io.IOException;
43+
import java.util.ArrayList;
4044
import java.util.Iterator;
4145
import java.util.List;
4246
import java.util.Map;
@@ -180,6 +184,15 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) {
180184
return new AbstractLatLonPointDVIndexFieldData.Builder();
181185
}
182186

187+
@Override
188+
public Query existsQuery(QueryShardContext context) {
189+
if (hasDocValues()) {
190+
return new DocValuesFieldExistsQuery(name());
191+
} else {
192+
return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));
193+
}
194+
}
195+
183196
@Override
184197
public Query termQuery(Object value, QueryShardContext context) {
185198
throw new QueryShardException(context, "Geo fields do not support exact searching, use dedicated geo queries instead: ["
@@ -207,6 +220,12 @@ protected void parse(ParseContext context, GeoPoint point) throws IOException {
207220
}
208221
if (fieldType.hasDocValues()) {
209222
context.doc().add(new LatLonDocValuesField(fieldType().name(), point.lat(), point.lon()));
223+
} else if (fieldType().stored() || fieldType().indexOptions() != IndexOptions.NONE) {
224+
List<IndexableField> fields = new ArrayList<>(1);
225+
createFieldNamesField(context, fields);
226+
for (IndexableField field : fields) {
227+
context.doc().add(field);
228+
}
210229
}
211230
// if the mapping contains multifields then use the geohash string
212231
if (multiFields.iterator().hasNext()) {

core/src/main/java/org/elasticsearch/index/mapper/GeoShapeFieldMapper.java

+19-6
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,20 @@
1818
*/
1919
package org.elasticsearch.index.mapper;
2020

21-
import org.apache.lucene.document.Field;
2221
import org.apache.lucene.index.IndexOptions;
2322
import org.apache.lucene.index.IndexableField;
23+
import org.apache.lucene.index.Term;
24+
import org.apache.lucene.search.DocValuesFieldExistsQuery;
2425
import org.apache.lucene.search.Query;
26+
import org.apache.lucene.search.TermQuery;
2527
import org.apache.lucene.spatial.prefix.PrefixTreeStrategy;
2628
import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
2729
import org.apache.lucene.spatial.prefix.TermQueryPrefixTreeStrategy;
2830
import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree;
2931
import org.apache.lucene.spatial.prefix.tree.PackedQuadPrefixTree;
3032
import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree;
3133
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
34+
import org.elasticsearch.Version;
3235
import org.elasticsearch.common.Explicit;
3336
import org.elasticsearch.common.geo.GeoUtils;
3437
import org.elasticsearch.common.geo.SpatialStrategy;
@@ -44,6 +47,8 @@
4447
import org.locationtech.spatial4j.shape.jts.JtsGeometry;
4548

4649
import java.io.IOException;
50+
import java.util.ArrayList;
51+
import java.util.Arrays;
4752
import java.util.Iterator;
4853
import java.util.List;
4954
import java.util.Map;
@@ -125,6 +130,11 @@ public Builder coerce(boolean coerce) {
125130
return builder;
126131
}
127132

133+
@Override
134+
protected boolean defaultDocValues(Version indexCreated) {
135+
return false;
136+
}
137+
128138
protected Explicit<Boolean> coerce(BuilderContext context) {
129139
if (coerce != null) {
130140
return new Explicit<>(coerce, true);
@@ -406,6 +416,11 @@ public PrefixTreeStrategy resolveStrategy(String strategyName) {
406416
throw new IllegalArgumentException("Unknown prefix tree strategy [" + strategyName + "]");
407417
}
408418

419+
@Override
420+
public Query existsQuery(QueryShardContext context) {
421+
return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));
422+
}
423+
409424
@Override
410425
public Query termQuery(Object value, QueryShardContext context) {
411426
throw new QueryShardException(context, "Geo fields do not support exact searching, use dedicated geo queries instead");
@@ -440,11 +455,9 @@ public Mapper parse(ParseContext context) throws IOException {
440455
throw new MapperParsingException("[{" + fieldType().name() + "}] is configured for points only but a " +
441456
((shape instanceof JtsGeometry) ? ((JtsGeometry)shape).getGeom().getGeometryType() : shape.getClass()) + " was found");
442457
}
443-
Field[] fields = fieldType().defaultStrategy().createIndexableFields(shape);
444-
if (fields == null || fields.length == 0) {
445-
return null;
446-
}
447-
for (Field field : fields) {
458+
List<IndexableField> fields = new ArrayList<>(Arrays.asList(fieldType().defaultStrategy().createIndexableFields(shape)));
459+
createFieldNamesField(context, fields);
460+
for (IndexableField field : fields) {
448461
context.doc().add(field);
449462
}
450463
} catch (Exception e) {

0 commit comments

Comments
 (0)