Skip to content

Commit 1e12c93

Browse files
authored
Fix issue with AnnotatedTextHighlighter and max_analyzed_offset (#69028)
With the newly introduced `max_analyzed_offset` the analyzer of `AnnotatedTextHighlighter` was wrapped twice with the `LimitTokenOffsetAnalyzer` by mistake. Follows: #67325
1 parent 14ea007 commit 1e12c93

File tree

3 files changed

+76
-9
lines changed

3 files changed

+76
-9
lines changed

plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighterTests.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,9 @@ private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
9090
for (int i = 0; i < markedUpInputs.length; i++) {
9191
annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
9292
}
93+
if (queryMaxAnalyzedOffset != null) {
94+
wrapperAnalyzer = new LimitTokenOffsetAnalyzer(wrapperAnalyzer, queryMaxAnalyzedOffset);
95+
}
9396
AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
9497
hiliteAnalyzer.setAnnotations(annotations);
9598
AnnotatedPassageFormatter passageFormatter = new AnnotatedPassageFormatter(new DefaultEncoder());

plugins/mapper-annotated-text/src/yamlRestTest/resources/rest-api-spec/test/mapper_annotatedtext/10_basic.yml

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,3 +213,75 @@
213213
- match: {_shards.failed: 0}
214214
- match: {aggregations.keywords.buckets.0.key: "Apple Inc"}
215215

216+
---
217+
"Annotated highlighter on annotated text exceeding index.highlight.max_analyzed_offset should FAIL":
218+
219+
- do:
220+
indices.create:
221+
index: annotated
222+
body:
223+
settings:
224+
number_of_shards: "1"
225+
number_of_replicas: "0"
226+
index.highlight.max_analyzed_offset: 20
227+
mappings:
228+
properties:
229+
text:
230+
type: annotated_text
231+
entityID:
232+
type: keyword
233+
234+
- do:
235+
index:
236+
index: annotated
237+
body:
238+
"text": "The [quick brown fox](entity_3789) is brown."
239+
"entityID": "entity_3789"
240+
refresh: true
241+
242+
- do:
243+
catch: bad_request
244+
search:
245+
rest_total_hits_as_int: true
246+
index: annotated
247+
body: { "query": { "term": { "entityID": "entity_3789" } }, "highlight": { "type": "annotated", "require_field_match": false, "fields": { "text": { } } } }
248+
- match: { error.root_cause.0.type: "illegal_argument_exception" }
249+
250+
251+
---
252+
"Annotated highlighter on annotated text exceeding index.highlight.max_analyzed_offset with max_analyzed_offset=20 should SUCCEED":
253+
254+
- skip:
255+
version: " - 7.11.99"
256+
reason: max_analyzed_offset query param added in 7.12.0
257+
258+
- do:
259+
indices.create:
260+
index: annotated
261+
body:
262+
settings:
263+
number_of_shards: "1"
264+
number_of_replicas: "0"
265+
index.highlight.max_analyzed_offset: 20
266+
mappings:
267+
properties:
268+
text:
269+
type: annotated_text
270+
entityID:
271+
type: keyword
272+
273+
- do:
274+
index:
275+
index: annotated
276+
body:
277+
"text": "The [quick brown fox](entity_3789) is brown."
278+
"entityID": "entity_3789"
279+
refresh: true
280+
281+
- do:
282+
search:
283+
rest_total_hits_as_int: true
284+
index: annotated
285+
body: { "query": { "term": { "entityID": "entity_3789" } }, "highlight": { "type": "annotated", "require_field_match": false, "fields": { "text": { } }, "max_analyzed_offset": 20 } }
286+
- match: {hits.hits.0.highlight.text.0: "The [quick brown fox](_hit_term=entity_3789&entity_3789) is brown."}
287+

server/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import org.elasticsearch.common.Nullable;
2525
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
2626
import org.elasticsearch.index.IndexSettings;
27-
import org.elasticsearch.search.fetch.subphase.highlight.LimitTokenOffsetAnalyzer;
2827

2928
import java.io.IOException;
3029
import java.text.BreakIterator;
@@ -91,7 +90,7 @@ public CustomUnifiedHighlighter(IndexSearcher searcher,
9190
Predicate<String> fieldMatcher,
9291
int maxAnalyzedOffset,
9392
Integer queryMaxAnalyzedOffset) throws IOException {
94-
super(searcher, wrapAnalyzer(analyzer, queryMaxAnalyzedOffset));
93+
super(searcher, analyzer);
9594
this.offsetSource = offsetSource;
9695
this.breakIterator = breakIterator;
9796
this.breakIteratorLocale = breakIteratorLocale == null ? Locale.ROOT : breakIteratorLocale;
@@ -105,13 +104,6 @@ public CustomUnifiedHighlighter(IndexSearcher searcher,
105104
fieldHighlighter = getFieldHighlighter(field, query, extractTerms(query), maxPassages);
106105
}
107106

108-
protected static Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
109-
if (maxAnalyzedOffset != null) {
110-
analyzer = new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset);
111-
}
112-
return analyzer;
113-
}
114-
115107
/**
116108
* Highlights the field value.
117109
*/

0 commit comments

Comments
 (0)