Skip to content

Commit 01eee1a

Browse files
Highlighters skip ignored keyword values (#53408)
Keyword field values with length more than ignore_above are not indexed. But highlighters still were retrieving these values from _source and were trying to highlight them. This sometimes lead to errors if a field length exceeded max_analyzed_offset. But also this is a wrong behaviour to attempt to highlight something that was not ignored during indexing. This PR checks if a keyword value was ignored because of its length, and if yes, skips highlighting it. Closes #43800
1 parent 340fcd1 commit 01eee1a

File tree

4 files changed

+89
-5
lines changed

4 files changed

+89
-5
lines changed
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
---
2+
setup:
3+
- do:
4+
indices.create:
5+
index: test-index
6+
body:
7+
mappings:
8+
"properties":
9+
"k1":
10+
"type": "keyword"
11+
"k2":
12+
"type": "keyword"
13+
"ignore_above": 3
14+
- do:
15+
bulk:
16+
index: test-index
17+
refresh: true
18+
body:
19+
- '{"index": {"_id": "1"}}'
20+
- '{"k1": "123", "k2" : "123"}'
21+
- '{"index": {"_id": "2"}}'
22+
- '{"k1": "1234", "k2" : "1234"}'
23+
24+
---
25+
"Plain Highligher should skip highlighting ignored keyword values":
26+
- skip:
27+
version: " - 7.9.99"
28+
reason: "skip highlighting of ignored values was introduced in 7.7"
29+
- do:
30+
search:
31+
index: test-index
32+
body:
33+
query:
34+
prefix:
35+
k1: "12"
36+
highlight:
37+
require_field_match: false
38+
fields:
39+
k2:
40+
type: plain
41+
42+
- match: {hits.hits.0.highlight.k2.0: "<em>123</em>"}
43+
- is_false: hits.hits.1.highlight # no highlight for a value that was ignored
44+
45+
---
46+
"Unified Highligher should skip highlighting ignored keyword values":
47+
- skip:
48+
version: " - 7.9.99"
49+
reason: "skip highlighting of ignored values was introduced in 7.7"
50+
- do:
51+
search:
52+
index: test-index
53+
body:
54+
query:
55+
prefix:
56+
k1: "12"
57+
highlight:
58+
require_field_match: false
59+
fields:
60+
k2:
61+
type: unified
62+
63+
- match: {hits.hits.0.highlight.k2.0: "<em>123</em>"}
64+
- is_false: hits.hits.1.highlight # no highlight for a value that was ignored

server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -322,8 +322,7 @@ protected KeywordFieldMapper(String simpleName, MappedFieldType fieldType, Mappe
322322

323323
/** Values that have more chars than the return value of this method will
324324
* be skipped at parsing time. */
325-
// pkg-private for testing
326-
int ignoreAbove() {
325+
public int ignoreAbove() {
327326
return ignoreAbove;
328327
}
329328

server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import org.elasticsearch.ExceptionsHelper;
3737
import org.elasticsearch.common.text.Text;
3838
import org.elasticsearch.index.IndexSettings;
39+
import org.elasticsearch.index.mapper.KeywordFieldMapper;
3940
import org.elasticsearch.index.mapper.MappedFieldType;
4041
import org.elasticsearch.index.query.QueryShardContext;
4142
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
@@ -102,6 +103,12 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
102103
ArrayList<TextFragment> fragsList = new ArrayList<>();
103104
List<Object> textsToHighlight;
104105
Analyzer analyzer = context.getMapperService().documentMapper().mappers().indexAnalyzer();
106+
Integer keywordIgnoreAbove = null;
107+
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
108+
KeywordFieldMapper mapper = (KeywordFieldMapper) context.getMapperService().documentMapper()
109+
.mappers().getMapper(highlighterContext.fieldName);
110+
keywordIgnoreAbove = mapper.ignoreAbove();
111+
}
105112
final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();
106113

107114
try {
@@ -110,7 +117,11 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
110117

111118
for (Object textToHighlight : textsToHighlight) {
112119
String text = convertFieldValue(fieldType, textToHighlight);
113-
if (text.length() > maxAnalyzedOffset) {
120+
int textLength = text.length();
121+
if (keywordIgnoreAbove != null && textLength > keywordIgnoreAbove) {
122+
continue; // skip highlighting keyword terms that were ignored during indexing
123+
}
124+
if (textLength > maxAnalyzedOffset) {
114125
throw new IllegalArgumentException(
115126
"The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() +
116127
"] doc of [" + context.index().getName() + "] index " +

server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import org.elasticsearch.index.IndexSettings;
3737
import org.elasticsearch.index.mapper.DocumentMapper;
3838
import org.elasticsearch.index.mapper.IdFieldMapper;
39+
import org.elasticsearch.index.mapper.KeywordFieldMapper;
3940
import org.elasticsearch.index.mapper.MappedFieldType;
4041
import org.elasticsearch.index.query.QueryShardContext;
4142
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
@@ -65,11 +66,16 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
6566
FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
6667
Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
6768
final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();
69+
Integer keywordIgnoreAbove = null;
70+
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
71+
KeywordFieldMapper mapper = (KeywordFieldMapper) context.getMapperService().documentMapper()
72+
.mappers().getMapper(highlighterContext.fieldName);
73+
keywordIgnoreAbove = mapper.ignoreAbove();
74+
}
6875

6976
List<Snippet> snippets = new ArrayList<>();
7077
int numberOfFragments = field.fieldOptions().numberOfFragments();
7178
try {
72-
7379
final Analyzer analyzer = getAnalyzer(context.getMapperService().documentMapper(), hitContext);
7480
List<Object> fieldValues = loadFieldValues(fieldType, field, context, hitContext,
7581
highlighterContext.highlight.forceSource(field));
@@ -81,7 +87,11 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
8187
final CustomUnifiedHighlighter highlighter;
8288
final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
8389
final OffsetSource offsetSource = getOffsetSource(fieldType);
84-
if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValue.length() > maxAnalyzedOffset)) {
90+
int fieldValueLength = fieldValue.length();
91+
if (keywordIgnoreAbove != null && fieldValueLength > keywordIgnoreAbove) {
92+
return null; // skip highlighting keyword terms that were ignored during indexing
93+
}
94+
if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValueLength > maxAnalyzedOffset)) {
8595
throw new IllegalArgumentException(
8696
"The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() +
8797
"] doc of [" + context.index().getName() + "] index " + "has exceeded [" +

0 commit comments

Comments
 (0)