Skip to content

Commit aebfe1d

Browse files
authored
Fix issue with AnnotatedTextHighlighter and max_analyzed_offset (#69028) (#69058)
With the newly introduced `max_analyzed_offset` the analyzer of `AnnotatedTextHighlighter` was wrapped twice with the `LimitTokenOffsetAnalyzer` by mistake. Follows: #67325
1 parent 831f686 commit aebfe1d

File tree

3 files changed

+117
-9
lines changed

3 files changed

+117
-9
lines changed

plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighterTests.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,9 @@ private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
9090
for (int i = 0; i < markedUpInputs.length; i++) {
9191
annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
9292
}
93+
if (queryMaxAnalyzedOffset != null) {
94+
wrapperAnalyzer = new LimitTokenOffsetAnalyzer(wrapperAnalyzer, queryMaxAnalyzedOffset);
95+
}
9396
AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
9497
hiliteAnalyzer.setAnnotations(annotations);
9598
AnnotatedPassageFormatter passageFormatter = new AnnotatedPassageFormatter(new DefaultEncoder());

plugins/mapper-annotated-text/src/yamlRestTest/resources/rest-api-spec/test/mapper_annotatedtext/10_basic.yml

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,3 +180,116 @@
180180
request_cache: false
181181
body: {"query": {"term": {"my_field": "Beck"} }, "highlight": {"fields": {"my_field": {"type": "annotated", "require_field_match": false }}}}
182182
- match: {_shards.failed: 0}
183+
184+
---
185+
"Signficant text support":
186+
- do:
187+
indices.create:
188+
index: annotated
189+
body:
190+
settings:
191+
number_of_shards: "1"
192+
number_of_replicas: "0"
193+
mappings:
194+
properties:
195+
my_field:
196+
type: annotated_text
197+
198+
- do:
199+
index:
200+
index: annotated
201+
id: 1
202+
body:
203+
"my_field" : "[Apple](Apple+Inc) launched the iphone 12"
204+
- do:
205+
index:
206+
index: annotated
207+
id: 2
208+
body:
209+
"my_field" : "[They](Apple+Inc) make iphone accessories"
210+
- do:
211+
index:
212+
index: annotated
213+
id: 3
214+
body:
215+
"my_field" : "[Apple](Apple+Inc) have a new iphone coming"
216+
refresh: true
217+
- do:
218+
search:
219+
request_cache: false
220+
body: { "query" : {"match" : { "my_field" : "iphone" } }, "aggs" : { "keywords" : { "significant_text" : {"field" : "my_field", "min_doc_count":3, "percentage":{}, "exclude":["iphone"]} } } }
221+
- match: {_shards.failed: 0}
222+
- match: {aggregations.keywords.buckets.0.key: "Apple Inc"}
223+
224+
---
225+
"Annotated highlighter on annotated text exceeding index.highlight.max_analyzed_offset should FAIL":
226+
227+
- do:
228+
indices.create:
229+
index: annotated
230+
body:
231+
settings:
232+
number_of_shards: "1"
233+
number_of_replicas: "0"
234+
index.highlight.max_analyzed_offset: 20
235+
mappings:
236+
properties:
237+
text:
238+
type: annotated_text
239+
entityID:
240+
type: keyword
241+
242+
- do:
243+
index:
244+
index: annotated
245+
body:
246+
"text": "The [quick brown fox](entity_3789) is brown."
247+
"entityID": "entity_3789"
248+
refresh: true
249+
250+
- do:
251+
catch: bad_request
252+
search:
253+
rest_total_hits_as_int: true
254+
index: annotated
255+
body: { "query": { "term": { "entityID": "entity_3789" } }, "highlight": { "type": "annotated", "require_field_match": false, "fields": { "text": { } } } }
256+
- match: { error.root_cause.0.type: "illegal_argument_exception" }
257+
258+
259+
---
260+
"Annotated highlighter on annotated text exceeding index.highlight.max_analyzed_offset with max_analyzed_offset=20 should SUCCEED":
261+
262+
- skip:
263+
version: " - 7.11.99"
264+
reason: max_analyzed_offset query param added in 7.12.0
265+
266+
- do:
267+
indices.create:
268+
index: annotated
269+
body:
270+
settings:
271+
number_of_shards: "1"
272+
number_of_replicas: "0"
273+
index.highlight.max_analyzed_offset: 20
274+
mappings:
275+
properties:
276+
text:
277+
type: annotated_text
278+
entityID:
279+
type: keyword
280+
281+
- do:
282+
index:
283+
index: annotated
284+
body:
285+
"text": "The [quick brown fox](entity_3789) is brown."
286+
"entityID": "entity_3789"
287+
refresh: true
288+
289+
- do:
290+
search:
291+
rest_total_hits_as_int: true
292+
index: annotated
293+
body: { "query": { "term": { "entityID": "entity_3789" } }, "highlight": { "type": "annotated", "require_field_match": false, "fields": { "text": { } }, "max_analyzed_offset": 20 } }
294+
- match: {hits.hits.0.highlight.text.0: "The [quick brown fox](_hit_term=entity_3789&entity_3789) is brown."}
295+

server/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import org.elasticsearch.common.Nullable;
2525
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
2626
import org.elasticsearch.index.IndexSettings;
27-
import org.elasticsearch.search.fetch.subphase.highlight.LimitTokenOffsetAnalyzer;
2827

2928
import java.io.IOException;
3029
import java.text.BreakIterator;
@@ -91,7 +90,7 @@ public CustomUnifiedHighlighter(IndexSearcher searcher,
9190
Predicate<String> fieldMatcher,
9291
int maxAnalyzedOffset,
9392
Integer queryMaxAnalyzedOffset) throws IOException {
94-
super(searcher, wrapAnalyzer(analyzer, queryMaxAnalyzedOffset));
93+
super(searcher, analyzer);
9594
this.offsetSource = offsetSource;
9695
this.breakIterator = breakIterator;
9796
this.breakIteratorLocale = breakIteratorLocale == null ? Locale.ROOT : breakIteratorLocale;
@@ -105,13 +104,6 @@ public CustomUnifiedHighlighter(IndexSearcher searcher,
105104
fieldHighlighter = getFieldHighlighter(field, query, extractTerms(query), maxPassages);
106105
}
107106

108-
protected static Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
109-
if (maxAnalyzedOffset != null) {
110-
analyzer = new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset);
111-
}
112-
return analyzer;
113-
}
114-
115107
/**
116108
* Highlights the field value.
117109
*/

0 commit comments

Comments
 (0)