Skip to content

Commit 8fba6e4

Browse files
authored
Handle ignored fields directly in SourceValueFetcher (#68738)
Currently, the value fetcher framework handles ignored fields by reading the stored values of the _ignored metadata field, and passing these through on calls to fetchValues(). However, this means that if a document has multiple values indexed for a field, and one malformed value, then the fields API will ignore everything, including the valid values, and return an empty list for this document. If a document source contains a malformed value, then it must have been ignored at index time. Therefore, we can safely assume that if we get an exception parsing values from source at fetch time, they were also ignored at index time and they can be skipped. This commit moves this exception handling directly into SourceValueFetcher and ArraySourceValueFetcher, removing the need to inspect the _ignored metadata and fixing the case of mixed valid and invalid values.
1 parent 8f582c1 commit 8fba6e4

File tree

19 files changed

+139
-104
lines changed

19 files changed

+139
-104
lines changed

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/TokenCountFieldMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ static class TokenCountFieldType extends NumberFieldMapper.NumberFieldType {
8383
@Override
8484
public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
8585
if (hasDocValues() == false) {
86-
return (lookup, ignoredFields) -> List.of();
86+
return lookup -> List.of();
8787
}
8888
return new DocValueFetcher(docValueFormat(format, null), context.getForField(this));
8989
}

rest-api-spec/src/main/resources/rest-api-spec/test/search/330_fetch_fields.yml

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ Test unmapped field:
335335
- f1
336336
- { "field" : "f4", "include_unmapped" : true }
337337
- match:
338-
hits.hits.0.fields.f1:
338+
hits.hits.0.fields.f1:
339339
- some text
340340
- match:
341341
hits.hits.0.fields.f4:
@@ -347,7 +347,7 @@ Test unmapped field:
347347
fields:
348348
- { "field" : "f*", "include_unmapped" : true }
349349
- match:
350-
hits.hits.0.fields.f1:
350+
hits.hits.0.fields.f1:
351351
- some text
352352
- match:
353353
hits.hits.0.fields.f2\.a:
@@ -381,7 +381,7 @@ Test unmapped fields inside disabled objects:
381381
id: 1
382382
refresh: true
383383
body:
384-
f1:
384+
f1:
385385
- some text
386386
- a: b
387387
-
@@ -809,9 +809,44 @@ Test nested field with sibling field resolving to DocValueFetcher:
809809
hits.hits.0.fields.owner:
810810
- "Anna Ott"
811811
- match:
812-
hits.hits.0.fields.owner\.length:
812+
hits.hits.0.fields.owner\.length:
813813
- 2
814814
- match:
815815
hits.hits.0.fields.products.0: { "manufacturer" : ["Supersoft"]}
816816
- match:
817817
hits.hits.0.fields.products.1: { "manufacturer" : ["HyperSmart"]}
818+
819+
---
820+
"Test ignores malformed values while returning valid ones":
821+
- skip:
822+
version: ' - 8.0.0'
823+
reason: 'Added in 8.0 - change on backport'
824+
- do:
825+
indices.create:
826+
index: test
827+
body:
828+
mappings:
829+
properties:
830+
number:
831+
type: long
832+
ignore_malformed: true
833+
- do:
834+
index:
835+
index: test
836+
id: 1
837+
refresh: true
838+
body:
839+
number: [ 1, 2, "3", "four", 5, 6 ]
840+
841+
- do:
842+
search:
843+
index: test
844+
body:
845+
fields: [ "*" ]
846+
847+
- length: { hits.hits.0.fields.number : 5 }
848+
- match: { hits.hits.0.fields.number.0 : 1 }
849+
- match: { hits.hits.0.fields.number.1 : 2 }
850+
- match: { hits.hits.0.fields.number.2 : 3 }
851+
- match: { hits.hits.0.fields.number.3 : 5 }
852+
- match: { hits.hits.0.fields.number.4 : 6 }

server/src/main/java/org/elasticsearch/index/mapper/ArraySourceValueFetcher.java

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
public abstract class ArraySourceValueFetcher implements ValueFetcher {
2828
private final Set<String> sourcePaths;
2929
private final @Nullable Object nullValue;
30-
private final String fieldName;
3130

3231
public ArraySourceValueFetcher(String fieldName, SearchExecutionContext context) {
3332
this(fieldName, context, null);
@@ -41,21 +40,23 @@ public ArraySourceValueFetcher(String fieldName, SearchExecutionContext context)
4140
public ArraySourceValueFetcher(String fieldName, SearchExecutionContext context, Object nullValue) {
4241
this.sourcePaths = context.sourcePath(fieldName);
4342
this.nullValue = nullValue;
44-
this.fieldName = fieldName;
4543
}
4644

4745
@Override
48-
public List<Object> fetchValues(SourceLookup lookup, Set<String> ignoredFields) {
46+
public List<Object> fetchValues(SourceLookup lookup) {
4947
List<Object> values = new ArrayList<>();
50-
if (ignoredFields.contains(fieldName)) {
51-
return values;
52-
}
5348
for (String path : sourcePaths) {
5449
Object sourceValue = lookup.extractValue(path, nullValue);
5550
if (sourceValue == null) {
5651
return List.of();
5752
}
58-
values.addAll((List<?>) parseSourceValue(sourceValue));
53+
try {
54+
values.addAll((List<?>) parseSourceValue(sourceValue));
55+
} catch (Exception e) {
56+
// if parsing fails here then it would have failed at index time
57+
// as well, meaning that we must be ignoring malformed values.
58+
// So ignore it here too.
59+
}
5960
}
6061
return values;
6162
}

server/src/main/java/org/elasticsearch/index/mapper/DocValueFetcher.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import java.io.IOException;
1818
import java.util.ArrayList;
1919
import java.util.List;
20-
import java.util.Set;
2120

2221
import static java.util.Collections.emptyList;
2322

@@ -40,7 +39,7 @@ public void setNextReader(LeafReaderContext context) {
4039
}
4140

4241
@Override
43-
public List<Object> fetchValues(SourceLookup lookup, Set<String> ignoredFields) throws IOException {
42+
public List<Object> fetchValues(SourceLookup lookup) throws IOException {
4443
if (false == formattedDocValues.advanceExact(lookup.docId())) {
4544
return emptyList();
4645
}

server/src/main/java/org/elasticsearch/index/mapper/NestedValueFetcher.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import java.util.HashMap;
2020
import java.util.List;
2121
import java.util.Map;
22-
import java.util.Set;
2322

2423
public class NestedValueFetcher implements ValueFetcher {
2524

@@ -39,7 +38,7 @@ public NestedValueFetcher(String nestedField, FieldFetcher nestedFieldFetcher) {
3938
}
4039

4140
@Override
42-
public List<Object> fetchValues(SourceLookup lookup, Set<String> ignoreFields) throws IOException {
41+
public List<Object> fetchValues(SourceLookup lookup) throws IOException {
4342
List<Object> nestedEntriesToReturn = new ArrayList<>();
4443
Map<String, Object> filteredSource = new HashMap<>();
4544
Map<String, Object> stub = createSourceMapStub(filteredSource);
@@ -53,7 +52,7 @@ public List<Object> fetchValues(SourceLookup lookup, Set<String> ignoreFields) t
5352
SourceLookup nestedSourceLookup = new SourceLookup();
5453
nestedSourceLookup.setSource(filteredSource);
5554

56-
Map<String, DocumentField> fetchResult = nestedFieldFetcher.fetch(nestedSourceLookup, ignoreFields);
55+
Map<String, DocumentField> fetchResult = nestedFieldFetcher.fetch(nestedSourceLookup);
5756

5857
Map<String, Object> nestedEntry = new HashMap<>();
5958
for (DocumentField field : fetchResult.values()) {

server/src/main/java/org/elasticsearch/index/mapper/SourceValueFetcher.java

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,29 +28,23 @@
2828
public abstract class SourceValueFetcher implements ValueFetcher {
2929
private final Set<String> sourcePaths;
3030
private final @Nullable Object nullValue;
31-
private final String fieldName;
3231

3332
public SourceValueFetcher(String fieldName, SearchExecutionContext context) {
3433
this(fieldName, context, null);
3534
}
3635

3736
/**
38-
* @param fieldName The name of the field.
3937
* @param context The query shard context
4038
* @param nullValue A optional substitute value if the _source value is 'null'.
4139
*/
4240
public SourceValueFetcher(String fieldName, SearchExecutionContext context, Object nullValue) {
4341
this.sourcePaths = context.sourcePath(fieldName);
4442
this.nullValue = nullValue;
45-
this.fieldName = fieldName;
4643
}
4744

4845
@Override
49-
public List<Object> fetchValues(SourceLookup lookup, Set<String> ignoredFields) {
46+
public List<Object> fetchValues(SourceLookup lookup) {
5047
List<Object> values = new ArrayList<>();
51-
if (ignoredFields.contains(fieldName)) {
52-
return values;
53-
}
5448
for (String path : sourcePaths) {
5549
Object sourceValue = lookup.extractValue(path, nullValue);
5650
if (sourceValue == null) {
@@ -66,9 +60,16 @@ public List<Object> fetchValues(SourceLookup lookup, Set<String> ignoredFields)
6660
if (value instanceof List) {
6761
queue.addAll((List<?>) value);
6862
} else {
69-
Object parsedValue = parseSourceValue(value);
70-
if (parsedValue != null) {
71-
values.add(parsedValue);
63+
try {
64+
Object parsedValue = parseSourceValue(value);
65+
if (parsedValue != null) {
66+
values.add(parsedValue);
67+
}
68+
} catch (Exception e) {
69+
// if we get a parsing exception here, that means that the
70+
// value in _source would have also caused a parsing
71+
// exception at index time and the value ignored.
72+
// so ignore it here as well
7273
}
7374
}
7475
}

server/src/main/java/org/elasticsearch/index/mapper/ValueFetcher.java

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,11 @@
99
package org.elasticsearch.index.mapper;
1010

1111
import org.apache.lucene.index.LeafReaderContext;
12-
import org.elasticsearch.common.Nullable;
1312
import org.elasticsearch.search.fetch.subphase.FetchFieldsPhase;
1413
import org.elasticsearch.search.lookup.SourceLookup;
1514

1615
import java.io.IOException;
1716
import java.util.List;
18-
import java.util.Set;
1917

2018
/**
2119
* A helper class for fetching field values during the {@link FetchFieldsPhase}. Each {@link MappedFieldType}
@@ -33,10 +31,9 @@ public interface ValueFetcher {
3331
* should not be relied on.
3432
*
3533
* @param lookup a lookup structure over the document's source.
36-
* @param ignoredFields the fields in _ignored that have been ignored for this document because they were malformed
3734
* @return a list a standardized field values.
3835
*/
39-
List<Object> fetchValues(SourceLookup lookup, @Nullable Set<String> ignoredFields) throws IOException;
36+
List<Object> fetchValues(SourceLookup lookup) throws IOException;
4037

4138
/**
4239
* Update the leaf reader used to fetch values.

server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchDocValuesPhase.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
import java.io.IOException;
2020
import java.util.ArrayList;
21-
import java.util.Collections;
2221
import java.util.List;
2322

2423
/**
@@ -71,7 +70,7 @@ public void process(HitContext hit) throws IOException {
7170
// docValues fields will still be document fields, and put under "fields" section of a hit.
7271
hit.hit().setDocumentField(f.field, hitField);
7372
}
74-
hitField.getValues().addAll(f.fetcher.fetchValues(hit.sourceLookup(), Collections.emptySet()));
73+
hitField.getValues().addAll(f.fetcher.fetchValues(hit.sourceLookup()));
7574
}
7675
}
7776
};

server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchFieldsPhase.java

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,14 @@
1010

1111
import org.apache.lucene.index.LeafReaderContext;
1212
import org.elasticsearch.common.document.DocumentField;
13-
import org.elasticsearch.index.mapper.IgnoredFieldMapper;
1413
import org.elasticsearch.search.SearchHit;
1514
import org.elasticsearch.search.fetch.FetchContext;
1615
import org.elasticsearch.search.fetch.FetchSubPhase;
1716
import org.elasticsearch.search.fetch.FetchSubPhaseProcessor;
1817
import org.elasticsearch.search.lookup.SourceLookup;
1918

2019
import java.io.IOException;
21-
import java.util.HashSet;
2220
import java.util.Map;
23-
import java.util.Set;
2421

2522
/**
2623
* A fetch sub-phase for high-level field retrieval. Given a list of fields, it
@@ -53,25 +50,11 @@ public void process(HitContext hitContext) throws IOException {
5350
SearchHit hit = hitContext.hit();
5451
SourceLookup sourceLookup = hitContext.sourceLookup();
5552

56-
Set<String> ignoredFields = getIgnoredFields(hit);
57-
Map<String, DocumentField> documentFields = fieldFetcher.fetch(sourceLookup, ignoredFields);
53+
Map<String, DocumentField> documentFields = fieldFetcher.fetch(sourceLookup);
5854
for (Map.Entry<String, DocumentField> entry : documentFields.entrySet()) {
5955
hit.setDocumentField(entry.getKey(), entry.getValue());
6056
}
6157
}
6258
};
6359
}
64-
65-
private Set<String> getIgnoredFields(SearchHit hit) {
66-
DocumentField field = hit.field(IgnoredFieldMapper.NAME);
67-
if (field == null) {
68-
return Set.of();
69-
}
70-
71-
Set<String> ignoredFields = new HashSet<>();
72-
for (Object value : field.getValues()) {
73-
ignoredFields.add((String) value);
74-
}
75-
return ignoredFields;
76-
}
7760
}

server/src/main/java/org/elasticsearch/search/fetch/subphase/FieldFetcher.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,14 +134,13 @@ private FieldFetcher(
134134
this.unmappedFieldsFetchAutomaton = unmappedFieldsFetchAutomaton;
135135
}
136136

137-
public Map<String, DocumentField> fetch(SourceLookup sourceLookup, Set<String> ignoredFields) throws IOException {
138-
assert ignoredFields != null;
137+
public Map<String, DocumentField> fetch(SourceLookup sourceLookup) throws IOException {
139138
Map<String, DocumentField> documentFields = new HashMap<>();
140139
for (FieldContext context : fieldContexts.values()) {
141140
String field = context.fieldName;
142141

143142
ValueFetcher valueFetcher = context.valueFetcher;
144-
List<Object> parsedValues = valueFetcher.fetchValues(sourceLookup, ignoredFields);
143+
List<Object> parsedValues = valueFetcher.fetchValues(sourceLookup);
145144
if (parsedValues.isEmpty() == false) {
146145
documentFields.put(field, new DocumentField(field, parsedValues));
147146
}

server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightUtils.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ public static List<Object> loadFieldValues(MappedFieldType fieldType,
4848
}
4949
ValueFetcher fetcher = fieldType.valueFetcher(searchContext, null);
5050
fetcher.setNextReader(hitContext.readerContext());
51-
return fetcher.fetchValues(hitContext.sourceLookup(), Collections.emptySet());
51+
return fetcher.fetchValues(hitContext.sourceLookup());
5252
}
5353

5454
public static class Encoders {

server/src/test/java/org/elasticsearch/index/mapper/GeoPointFieldTypeTests.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,5 +41,9 @@ public void testFetchSourceValue() throws IOException {
4141
sourceValue = "POINT (42.0 27.1)";
4242
assertEquals(List.of(jsonPoint), fetchSourceValue(mapper, sourceValue, null));
4343
assertEquals(List.of(wktPoint), fetchSourceValue(mapper, sourceValue, "wkt"));
44+
45+
// Test a malformed value
46+
sourceValue = "malformed";
47+
assertEquals(List.of(), fetchSourceValue(mapper, sourceValue, null));
4448
}
4549
}

server/src/test/java/org/elasticsearch/index/mapper/NumberFieldTypeTests.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -639,6 +639,7 @@ public void testFetchSourceValue() throws IOException {
639639
.fieldType();
640640
assertEquals(List.of(3), fetchSourceValue(mapper, 3.14));
641641
assertEquals(List.of(42), fetchSourceValue(mapper, "42.9"));
642+
assertEquals(List.of(3, 42), fetchSourceValues(mapper, 3.14, "foo", "42.9"));
642643

643644
MappedFieldType nullValueMapper = new NumberFieldMapper.Builder("field", NumberType.FLOAT, false, true)
644645
.nullValue(2.71f)

0 commit comments

Comments
 (0)