Fix issue with AnnotatedTextHighlighter and max_analyzed_offset (#69028)

matriv · web-flow · commit 1e12c93a31d3 · 2021-02-16T17:08:07.000+01:00
With the newly introduced `max_analyzed_offset` the analyzer of `AnnotatedTextHighlighter` was wrapped twice with the `LimitTokenOffsetAnalyzer` by mistake. Follows: #67325
diff --git a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighterTests.java b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighterTests.java
@@ -90,6 +90,9 @@ private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
                 for (int i = 0; i < markedUpInputs.length; i++) {
                     annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
                 }
+                if (queryMaxAnalyzedOffset != null) {
+                    wrapperAnalyzer = new LimitTokenOffsetAnalyzer(wrapperAnalyzer, queryMaxAnalyzedOffset);
+                }
                 AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
                 hiliteAnalyzer.setAnnotations(annotations);
                 AnnotatedPassageFormatter passageFormatter = new AnnotatedPassageFormatter(new DefaultEncoder());
diff --git a/plugins/mapper-annotated-text/src/yamlRestTest/resources/rest-api-spec/test/mapper_annotatedtext/10_basic.yml b/plugins/mapper-annotated-text/src/yamlRestTest/resources/rest-api-spec/test/mapper_annotatedtext/10_basic.yml
@@ -213,3 +213,75 @@
   - match: {_shards.failed: 0}
   - match: {aggregations.keywords.buckets.0.key: "Apple Inc"}
 
+---
+"Annotated highlighter on annotated text exceeding index.highlight.max_analyzed_offset should FAIL":
+
+  - do:
+      indices.create:
+        index: annotated
+        body:
+          settings:
+            number_of_shards: "1"
+            number_of_replicas: "0"
+            index.highlight.max_analyzed_offset: 20
+          mappings:
+            properties:
+              text:
+                type: annotated_text
+              entityID:
+                type: keyword
+
+  - do:
+      index:
+        index: annotated
+        body:
+          "text": "The [quick brown fox](entity_3789) is brown."
+          "entityID": "entity_3789"
+        refresh: true
+
+  - do:
+      catch: bad_request
+      search:
+        rest_total_hits_as_int: true
+        index: annotated
+        body: { "query": { "term": { "entityID": "entity_3789" } }, "highlight": { "type": "annotated", "require_field_match": false, "fields": { "text": { } } } }
+  - match: { error.root_cause.0.type: "illegal_argument_exception" }
+
+
+---
+"Annotated highlighter on annotated text exceeding index.highlight.max_analyzed_offset with max_analyzed_offset=20 should SUCCEED":
+
+  - skip:
+      version: " - 7.11.99"
+      reason: max_analyzed_offset query param added in 7.12.0
+
+  - do:
+      indices.create:
+        index: annotated
+        body:
+          settings:
+            number_of_shards: "1"
+            number_of_replicas: "0"
+            index.highlight.max_analyzed_offset: 20
+          mappings:
+            properties:
+              text:
+                type: annotated_text
+              entityID:
+                type: keyword
+
+  - do:
+      index:
+        index: annotated
+        body:
+          "text": "The [quick brown fox](entity_3789) is brown."
+          "entityID": "entity_3789"
+        refresh: true
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        index: annotated
+        body: { "query": { "term": { "entityID": "entity_3789" } }, "highlight": { "type": "annotated", "require_field_match": false, "fields": { "text": { } }, "max_analyzed_offset": 20 } }
+  - match: {hits.hits.0.highlight.text.0: "The [quick brown fox](_hit_term=entity_3789&entity_3789) is brown."}
+
diff --git a/server/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java b/server/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java
@@ -24,7 +24,6 @@
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
 import org.elasticsearch.index.IndexSettings;
-import org.elasticsearch.search.fetch.subphase.highlight.LimitTokenOffsetAnalyzer;
 
 import java.io.IOException;
 import java.text.BreakIterator;
@@ -91,7 +90,7 @@ public CustomUnifiedHighlighter(IndexSearcher searcher,
                                     Predicate<String> fieldMatcher,
                                     int maxAnalyzedOffset,
                                     Integer queryMaxAnalyzedOffset) throws IOException {
-        super(searcher, wrapAnalyzer(analyzer, queryMaxAnalyzedOffset));
+        super(searcher, analyzer);
         this.offsetSource = offsetSource;
         this.breakIterator = breakIterator;
         this.breakIteratorLocale = breakIteratorLocale == null ? Locale.ROOT : breakIteratorLocale;
@@ -105,13 +104,6 @@ public CustomUnifiedHighlighter(IndexSearcher searcher,
         fieldHighlighter = getFieldHighlighter(field, query, extractTerms(query), maxPassages);
     }
 
-    protected static Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
-        if (maxAnalyzedOffset != null) {
-            analyzer = new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset);
-        }
-        return analyzer;
-    }
-
     /**
      * Highlights the field value.
      */