Skip to content

Fix issue with AnnotatedTextHighlighter and max_analyzed_offset (#69028) #69058

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
for (int i = 0; i < markedUpInputs.length; i++) {
annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
}
if (queryMaxAnalyzedOffset != null) {
wrapperAnalyzer = new LimitTokenOffsetAnalyzer(wrapperAnalyzer, queryMaxAnalyzedOffset);
}
AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
hiliteAnalyzer.setAnnotations(annotations);
AnnotatedPassageFormatter passageFormatter = new AnnotatedPassageFormatter(new DefaultEncoder());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,116 @@
request_cache: false
body: {"query": {"term": {"my_field": "Beck"} }, "highlight": {"fields": {"my_field": {"type": "annotated", "require_field_match": false }}}}
- match: {_shards.failed: 0}

---
"Signficant text support":
- do:
indices.create:
index: annotated
body:
settings:
number_of_shards: "1"
number_of_replicas: "0"
mappings:
properties:
my_field:
type: annotated_text

- do:
index:
index: annotated
id: 1
body:
"my_field" : "[Apple](Apple+Inc) launched the iphone 12"
- do:
index:
index: annotated
id: 2
body:
"my_field" : "[They](Apple+Inc) make iphone accessories"
- do:
index:
index: annotated
id: 3
body:
"my_field" : "[Apple](Apple+Inc) have a new iphone coming"
refresh: true
- do:
search:
request_cache: false
body: { "query" : {"match" : { "my_field" : "iphone" } }, "aggs" : { "keywords" : { "significant_text" : {"field" : "my_field", "min_doc_count":3, "percentage":{}, "exclude":["iphone"]} } } }
- match: {_shards.failed: 0}
- match: {aggregations.keywords.buckets.0.key: "Apple Inc"}

---
"Annotated highlighter on annotated text exceeding index.highlight.max_analyzed_offset should FAIL":

- do:
indices.create:
index: annotated
body:
settings:
number_of_shards: "1"
number_of_replicas: "0"
index.highlight.max_analyzed_offset: 20
mappings:
properties:
text:
type: annotated_text
entityID:
type: keyword

- do:
index:
index: annotated
body:
"text": "The [quick brown fox](entity_3789) is brown."
"entityID": "entity_3789"
refresh: true

- do:
catch: bad_request
search:
rest_total_hits_as_int: true
index: annotated
body: { "query": { "term": { "entityID": "entity_3789" } }, "highlight": { "type": "annotated", "require_field_match": false, "fields": { "text": { } } } }
- match: { error.root_cause.0.type: "illegal_argument_exception" }


---
"Annotated highlighter on annotated text exceeding index.highlight.max_analyzed_offset with max_analyzed_offset=20 should SUCCEED":

- skip:
version: " - 7.11.99"
reason: max_analyzed_offset query param added in 7.12.0

- do:
indices.create:
index: annotated
body:
settings:
number_of_shards: "1"
number_of_replicas: "0"
index.highlight.max_analyzed_offset: 20
mappings:
properties:
text:
type: annotated_text
entityID:
type: keyword

- do:
index:
index: annotated
body:
"text": "The [quick brown fox](entity_3789) is brown."
"entityID": "entity_3789"
refresh: true

- do:
search:
rest_total_hits_as_int: true
index: annotated
body: { "query": { "term": { "entityID": "entity_3789" } }, "highlight": { "type": "annotated", "require_field_match": false, "fields": { "text": { } }, "max_analyzed_offset": 20 } }
- match: {hits.hits.0.highlight.text.0: "The [quick brown fox](_hit_term=entity_3789&entity_3789) is brown."}

Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.search.fetch.subphase.highlight.LimitTokenOffsetAnalyzer;

import java.io.IOException;
import java.text.BreakIterator;
Expand Down Expand Up @@ -91,7 +90,7 @@ public CustomUnifiedHighlighter(IndexSearcher searcher,
Predicate<String> fieldMatcher,
int maxAnalyzedOffset,
Integer queryMaxAnalyzedOffset) throws IOException {
super(searcher, wrapAnalyzer(analyzer, queryMaxAnalyzedOffset));
super(searcher, analyzer);
this.offsetSource = offsetSource;
this.breakIterator = breakIterator;
this.breakIteratorLocale = breakIteratorLocale == null ? Locale.ROOT : breakIteratorLocale;
Expand All @@ -105,13 +104,6 @@ public CustomUnifiedHighlighter(IndexSearcher searcher,
fieldHighlighter = getFieldHighlighter(field, query, extractTerms(query), maxPassages);
}

protected static Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
if (maxAnalyzedOffset != null) {
analyzer = new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset);
}
return analyzer;
}

/**
* Highlights the field value.
*/
Expand Down