elastic · matriv · Feb 16, 2021 · Feb 16, 2021
diff --git a/...java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighterTests.java b/...java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighterTests.java
@@ -90,6 +90,9 @@ private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
                 for (int i = 0; i < markedUpInputs.length; i++) {
                     annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
                 }
+                if (queryMaxAnalyzedOffset != null) {
+                    wrapperAnalyzer = new LimitTokenOffsetAnalyzer(wrapperAnalyzer, queryMaxAnalyzedOffset);
+                }
                 AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
                 hiliteAnalyzer.setAnnotations(annotations);
                 AnnotatedPassageFormatter passageFormatter = new AnnotatedPassageFormatter(new DefaultEncoder());

diff --git a/...ated-text/src/yamlRestTest/resources/rest-api-spec/test/mapper_annotatedtext/10_basic.yml b/...ated-text/src/yamlRestTest/resources/rest-api-spec/test/mapper_annotatedtext/10_basic.yml
@@ -180,3 +180,116 @@
         request_cache: false
         body: {"query": {"term": {"my_field": "Beck"}  }, "highlight": {"fields": {"my_field": {"type": "annotated", "require_field_match": false }}}}
   - match: {_shards.failed: 0}
+
+---
+"Signficant text support":
+  - do:
+      indices.create:
+        index: annotated
+        body:
+          settings:
+            number_of_shards: "1"
+            number_of_replicas: "0"
+          mappings:
+            properties:
+              my_field:
+                type: annotated_text
+
+  - do:
+      index:
+        index: annotated
+        id: 1
+        body:
+            "my_field" : "[Apple](Apple+Inc) launched the iphone 12"
+  - do:
+      index:
+        index: annotated
+        id: 2
+        body:
+            "my_field" : "[They](Apple+Inc) make iphone accessories"
+  - do:
+      index:
+        index: annotated
+        id: 3
+        body:
+            "my_field" : "[Apple](Apple+Inc) have a new iphone coming"
+        refresh: true
+  - do:
+      search:
+        request_cache: false
+        body: { "query" : {"match" : { "my_field" : "iphone" } }, "aggs" : { "keywords" : { "significant_text" : {"field" : "my_field", "min_doc_count":3, "percentage":{}, "exclude":["iphone"]} } } }
+  - match: {_shards.failed: 0}
+  - match: {aggregations.keywords.buckets.0.key: "Apple Inc"}
+
+---
+"Annotated highlighter on annotated text exceeding index.highlight.max_analyzed_offset should FAIL":
+
+  - do:
+      indices.create:
+        index: annotated
+        body:
+          settings:
+            number_of_shards: "1"
+            number_of_replicas: "0"
+            index.highlight.max_analyzed_offset: 20
+          mappings:
+            properties:
+              text:
+                type: annotated_text
+              entityID:
+                type: keyword
+
+  - do:
+      index:
+        index: annotated
+        body:
+          "text": "The [quick brown fox](entity_3789) is brown."
+          "entityID": "entity_3789"
+        refresh: true
+
+  - do:
+      catch: bad_request
+      search:
+        rest_total_hits_as_int: true
+        index: annotated
+        body: { "query": { "term": { "entityID": "entity_3789" } }, "highlight": { "type": "annotated", "require_field_match": false, "fields": { "text": { } } } }
+  - match: { error.root_cause.0.type: "illegal_argument_exception" }
+
+
+---
+"Annotated highlighter on annotated text exceeding index.highlight.max_analyzed_offset with max_analyzed_offset=20 should SUCCEED":
+
+  - skip:
+      version: " - 7.11.99"
+      reason: max_analyzed_offset query param added in 7.12.0
+
+  - do:
+      indices.create:
+        index: annotated
+        body:
+          settings:
+            number_of_shards: "1"
+            number_of_replicas: "0"
+            index.highlight.max_analyzed_offset: 20
+          mappings:
+            properties:
+              text:
+                type: annotated_text
+              entityID:
+                type: keyword
+
+  - do:
+      index:
+        index: annotated
+        body:
+          "text": "The [quick brown fox](entity_3789) is brown."
+          "entityID": "entity_3789"
+        refresh: true
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        index: annotated
+        body: { "query": { "term": { "entityID": "entity_3789" } }, "highlight": { "type": "annotated", "require_field_match": false, "fields": { "text": { } }, "max_analyzed_offset": 20 } }
+  - match: {hits.hits.0.highlight.text.0: "The [quick brown fox](_hit_term=entity_3789&entity_3789) is brown."}
+
diff --git a/server/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java b/server/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java
@@ -24,7 +24,6 @@
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
 import org.elasticsearch.index.IndexSettings;
-import org.elasticsearch.search.fetch.subphase.highlight.LimitTokenOffsetAnalyzer;
 
 import java.io.IOException;
 import java.text.BreakIterator;
@@ -91,7 +90,7 @@ public CustomUnifiedHighlighter(IndexSearcher searcher,
                                     Predicate<String> fieldMatcher,
                                     int maxAnalyzedOffset,
                                     Integer queryMaxAnalyzedOffset) throws IOException {
-        super(searcher, wrapAnalyzer(analyzer, queryMaxAnalyzedOffset));
+        super(searcher, analyzer);
         this.offsetSource = offsetSource;
         this.breakIterator = breakIterator;
         this.breakIteratorLocale = breakIteratorLocale == null ? Locale.ROOT : breakIteratorLocale;
@@ -105,13 +104,6 @@ public CustomUnifiedHighlighter(IndexSearcher searcher,
         fieldHighlighter = getFieldHighlighter(field, query, extractTerms(query), maxPassages);
     }
 
-    protected static Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
-        if (maxAnalyzedOffset != null) {
-            analyzer = new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset);
-        }
-        return analyzer;
-    }
-
     /**
      * Highlights the field value.
      */