|
32 | 32 | import org.apache.lucene.util.CollectionUtil;
|
33 | 33 | import org.elasticsearch.common.Strings;
|
34 | 34 | import org.elasticsearch.common.text.Text;
|
| 35 | +import org.elasticsearch.index.IndexSettings; |
35 | 36 | import org.elasticsearch.index.mapper.DocumentMapper;
|
36 | 37 | import org.elasticsearch.index.mapper.FieldMapper;
|
37 | 38 | import org.elasticsearch.index.mapper.KeywordFieldMapper;
|
38 | 39 | import org.elasticsearch.index.mapper.MappedFieldType;
|
39 |
| -import org.elasticsearch.index.mapper.MapperService; |
40 | 40 | import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
|
41 | 41 | import org.elasticsearch.search.fetch.FetchSubPhase;
|
42 | 42 | import org.elasticsearch.search.internal.SearchContext;
|
43 | 43 |
|
44 | 44 | import java.io.IOException;
|
45 | 45 | import java.text.BreakIterator;
|
46 | 46 | import java.util.ArrayList;
|
47 |
| -import java.util.HashMap; |
48 | 47 | import java.util.List;
|
49 | 48 | import java.util.Locale;
|
50 |
| -import java.util.Map; |
51 | 49 | import java.util.stream.Collectors;
|
52 | 50 |
|
53 | 51 | import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
|
@@ -83,21 +81,28 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
|
83 | 81 | final CustomUnifiedHighlighter highlighter;
|
84 | 82 | final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
|
85 | 83 | final OffsetSource offsetSource = getOffsetSource(fieldMapper.fieldType());
|
| 84 | + if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValue.length() > maxAnalyzedOffset)) { |
| 85 | + throw new IllegalArgumentException( |
| 86 | + "The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() + |
| 87 | + "] doc of [" + context.indexShard().shardId().getIndexName() + "] index " + "has exceeded [" + |
| 88 | + maxAnalyzedOffset + "] - maximum allowed to be analyzed for highlighting. " + |
| 89 | + "This maximum can be set by changing the [" + IndexSettings.MAX_ANALYZED_OFFSET_SETTING.getKey() + |
| 90 | + "] index level setting. " + "For large texts, indexing with offsets or term vectors is recommended!"); |
| 91 | + } |
86 | 92 | if (field.fieldOptions().numberOfFragments() == 0) {
|
87 | 93 | // we use a control char to separate values, which is the only char that the custom break iterator
|
88 | 94 | // breaks the text on, so we don't lose the distinction between the different values of a field and we
|
89 | 95 | // get back a snippet per value
|
90 | 96 | CustomSeparatorBreakIterator breakIterator = new CustomSeparatorBreakIterator(MULTIVAL_SEP_CHAR);
|
91 | 97 | highlighter = new CustomUnifiedHighlighter(searcher, analyzer, offsetSource, passageFormatter,
|
92 |
| - field.fieldOptions().boundaryScannerLocale(), breakIterator, fieldValue, field.fieldOptions().noMatchSize(), |
93 |
| - maxAnalyzedOffset); |
| 98 | + field.fieldOptions().boundaryScannerLocale(), breakIterator, fieldValue, field.fieldOptions().noMatchSize()); |
94 | 99 | numberOfFragments = fieldValues.size(); // we are highlighting the whole content, one snippet per value
|
95 | 100 | } else {
|
96 | 101 | //using paragraph separator we make sure that each field value holds a discrete passage for highlighting
|
97 | 102 | BreakIterator bi = getBreakIterator(field);
|
98 | 103 | highlighter = new CustomUnifiedHighlighter(searcher, analyzer, offsetSource, passageFormatter,
|
99 | 104 | field.fieldOptions().boundaryScannerLocale(), bi,
|
100 |
| - fieldValue, field.fieldOptions().noMatchSize(), maxAnalyzedOffset); |
| 105 | + fieldValue, field.fieldOptions().noMatchSize()); |
101 | 106 | numberOfFragments = field.fieldOptions().numberOfFragments();
|
102 | 107 | }
|
103 | 108 |
|
|
0 commit comments