31
31
import org .apache .lucene .util .BytesRef ;
32
32
import org .apache .lucene .util .CollectionUtil ;
33
33
import org .elasticsearch .common .Strings ;
34
+ import org .elasticsearch .common .logging .DeprecationLogger ;
35
+ import org .elasticsearch .common .logging .Loggers ;
34
36
import org .elasticsearch .common .text .Text ;
35
37
import org .elasticsearch .index .mapper .DocumentMapper ;
36
38
import org .elasticsearch .index .mapper .FieldMapper ;
37
39
import org .elasticsearch .index .mapper .KeywordFieldMapper ;
38
40
import org .elasticsearch .index .mapper .MappedFieldType ;
39
- import org .elasticsearch .index .mapper .MapperService ;
40
41
import org .elasticsearch .search .fetch .FetchPhaseExecutionException ;
41
42
import org .elasticsearch .search .fetch .FetchSubPhase ;
42
43
import org .elasticsearch .search .internal .SearchContext ;
44
+ import org .elasticsearch .index .IndexSettings ;
43
45
44
46
import java .io .IOException ;
45
47
import java .text .BreakIterator ;
46
48
import java .util .ArrayList ;
47
- import java .util .HashMap ;
48
49
import java .util .List ;
49
50
import java .util .Locale ;
50
- import java .util .Map ;
51
51
import java .util .stream .Collectors ;
52
52
53
53
import static org .apache .lucene .search .uhighlight .CustomUnifiedHighlighter .MULTIVAL_SEP_CHAR ;
54
54
55
55
public class UnifiedHighlighter implements Highlighter {
56
+ private static final DeprecationLogger deprecationLogger = new DeprecationLogger (Loggers .getLogger (UnifiedHighlighter .class ));
57
+
56
58
@ Override
57
59
public boolean canHighlight (FieldMapper fieldMapper ) {
58
60
return true ;
@@ -67,8 +69,6 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
67
69
Encoder encoder = field .fieldOptions ().encoder ().equals ("html" ) ? HighlightUtils .Encoders .HTML : HighlightUtils .Encoders .DEFAULT ;
68
70
CustomPassageFormatter passageFormatter = new CustomPassageFormatter (field .fieldOptions ().preTags ()[0 ],
69
71
field .fieldOptions ().postTags ()[0 ], encoder );
70
- final int maxAnalyzedOffset = context .indexShard ().indexSettings ().getHighlightMaxAnalyzedOffset ();
71
-
72
72
List <Snippet > snippets = new ArrayList <>();
73
73
int numberOfFragments ;
74
74
try {
@@ -83,21 +83,41 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
83
83
final CustomUnifiedHighlighter highlighter ;
84
84
final String fieldValue = mergeFieldValues (fieldValues , MULTIVAL_SEP_CHAR );
85
85
final OffsetSource offsetSource = getOffsetSource (fieldMapper .fieldType ());
86
+
87
+ final int maxAnalyzedOffset = context .indexShard ().indexSettings ().getHighlightMaxAnalyzedOffset ();
88
+ // Issue a deprecation warning if maxAnalyzedOffset is not set, and field length > default setting for 7.0
89
+ final int maxAnalyzedOffset7 = 1000000 ;
90
+ if ((offsetSource == OffsetSource .ANALYSIS ) && (maxAnalyzedOffset == -1 ) && (fieldValue .length () > maxAnalyzedOffset7 )) {
91
+ deprecationLogger .deprecated (
92
+ "The length [" + fieldValue .length () + "] of [" + highlighterContext .fieldName + "] field of [" +
93
+ hitContext .hit ().getId () + "] doc of [" + context .indexShard ().shardId ().getIndexName () + "] index has " +
94
+ "exceeded the allowed maximum of [" + maxAnalyzedOffset7 + "] set for the next major Elastic version. " +
95
+ "This maximum can be set by changing the [" + IndexSettings .MAX_ANALYZED_OFFSET_SETTING .getKey () +
96
+ "] index level setting. " + "For large texts, indexing with offsets or term vectors is recommended!" );
97
+ }
98
+ // Throw an error if maxAnalyzedOffset is explicitly set by the user, and field length > maxAnalyzedOffset
99
+ if ((offsetSource == OffsetSource .ANALYSIS ) && (maxAnalyzedOffset > 0 ) && (fieldValue .length () > maxAnalyzedOffset )) {
100
+ throw new IllegalArgumentException (
101
+ "The length [" + fieldValue .length () + "] of [" + highlighterContext .fieldName + "] field of [" +
102
+ hitContext .hit ().getId () + "] doc of [" + context .indexShard ().shardId ().getIndexName () + "] index " +
103
+ "has exceeded [" + maxAnalyzedOffset + "] - maximum allowed to be analyzed for highlighting. " +
104
+ "This maximum can be set by changing the [" + IndexSettings .MAX_ANALYZED_OFFSET_SETTING .getKey () +
105
+ "] index level setting. " + "For large texts, indexing with offsets or term vectors is recommended!" );
106
+ }
107
+
86
108
if (field .fieldOptions ().numberOfFragments () == 0 ) {
87
109
// we use a control char to separate values, which is the only char that the custom break iterator
88
110
// breaks the text on, so we don't lose the distinction between the different values of a field and we
89
111
// get back a snippet per value
90
112
CustomSeparatorBreakIterator breakIterator = new CustomSeparatorBreakIterator (MULTIVAL_SEP_CHAR );
91
113
highlighter = new CustomUnifiedHighlighter (searcher , analyzer , offsetSource , passageFormatter ,
92
- field .fieldOptions ().boundaryScannerLocale (), breakIterator , fieldValue , field .fieldOptions ().noMatchSize (),
93
- maxAnalyzedOffset );
114
+ field .fieldOptions ().boundaryScannerLocale (), breakIterator , fieldValue , field .fieldOptions ().noMatchSize ());
94
115
numberOfFragments = fieldValues .size (); // we are highlighting the whole content, one snippet per value
95
116
} else {
96
117
//using paragraph separator we make sure that each field value holds a discrete passage for highlighting
97
118
BreakIterator bi = getBreakIterator (field );
98
119
highlighter = new CustomUnifiedHighlighter (searcher , analyzer , offsetSource , passageFormatter ,
99
- field .fieldOptions ().boundaryScannerLocale (), bi ,
100
- fieldValue , field .fieldOptions ().noMatchSize (), maxAnalyzedOffset );
120
+ field .fieldOptions ().boundaryScannerLocale (), bi , fieldValue , field .fieldOptions ().noMatchSize ());
101
121
numberOfFragments = field .fieldOptions ().numberOfFragments ();
102
122
}
103
123
0 commit comments