Skip to content

Commit ba39f46

Browse files
authored
Speed up empty highlighting many fields (#61860)
Kibana often highlights *everything* like this: ``` POST /_search { "query": ..., "size": 500, "highlight": { "fields": { "*": { ... } } } } ``` This can get slow when there are hundreds of mapped fields. I tested this locally and unscientifically and it took a request from 20ms to 150ms when there are 100 fields. I've seen clusters with 2000 fields where simple search go from 500ms to 1500ms just by turning on this sort of highlighting. Even when the query is just a `range` that and the fields are all numbers and stuff so it won't highlight anything. This speeds up the `unified` highlighter in this case in a few ways: 1. Build the highlighting infrastructure once field rather than once pre document per field. This cuts out a *ton* of work analyzing the query over and over and over again. 2. Bail out of the highlighter before loading values if we can't produce any results. Combined these take that local 150ms case down to 65ms. This is unlikely to be really useful when there are only a few fetched docs and only a few fields, but we often end up having many fields with many fetched docs.
1 parent 9859051 commit ba39f46

File tree

8 files changed

+254
-163
lines changed

8 files changed

+254
-163
lines changed

plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@
4646
import org.elasticsearch.index.mapper.ValueFetcher;
4747
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken;
4848
import org.elasticsearch.index.similarity.SimilarityProvider;
49-
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
5049

5150
import java.io.IOException;
5251
import java.io.Reader;
@@ -299,22 +298,25 @@ public AnnotationToken getAnnotation(int index) {
299298
// original markup form in order to inject annotations.
300299
public static final class AnnotatedHighlighterAnalyzer extends AnalyzerWrapper {
301300
private final Analyzer delegate;
302-
private final HitContext hitContext;
303-
public AnnotatedHighlighterAnalyzer(Analyzer delegate, HitContext hitContext){
301+
private AnnotatedText[] annotations;
302+
303+
public AnnotatedHighlighterAnalyzer(Analyzer delegate){
304304
super(delegate.getReuseStrategy());
305305
this.delegate = delegate;
306-
this.hitContext = hitContext;
307306
}
308307

309308
@Override
310309
public Analyzer getWrappedAnalyzer(String fieldName) {
311310
return delegate;
312311
}
313312

313+
public void setAnnotations(AnnotatedText[] annotations) {
314+
this.annotations = annotations;
315+
}
316+
314317
@Override
315318
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
316319
AnnotationsInjector injector = new AnnotationsInjector(components.getTokenStream());
317-
AnnotatedText[] annotations = (AnnotatedText[]) hitContext.cache().get(AnnotatedText.class.getName());
318320
AtomicInteger readerNum = new AtomicInteger(0);
319321
return new TokenStreamComponents(r -> {
320322
String plainText = readToString(r);

plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedPassageFormatter.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,11 @@ public class AnnotatedPassageFormatter extends PassageFormatter {
4444
private final Encoder encoder;
4545
AnnotatedText[] annotations;
4646

47-
public AnnotatedPassageFormatter(AnnotatedText[] annotations, Encoder encoder) {
47+
public AnnotatedPassageFormatter(Encoder encoder) {
4848
this.encoder = encoder;
49+
}
50+
51+
void setAnnotations(AnnotatedText[] annotations) {
4952
this.annotations = annotations;
5053
}
5154

plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java

Lines changed: 23 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import org.apache.lucene.analysis.Analyzer;
2323
import org.apache.lucene.search.highlight.Encoder;
24+
import org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter;
2425
import org.apache.lucene.search.uhighlight.PassageFormatter;
2526
import org.elasticsearch.index.mapper.DocumentMapper;
2627
import org.elasticsearch.index.mapper.MappedFieldType;
@@ -37,39 +38,37 @@ public class AnnotatedTextHighlighter extends UnifiedHighlighter {
3738

3839
public static final String NAME = "annotated";
3940

40-
@Override
41-
protected Analyzer getAnalyzer(DocumentMapper docMapper, HitContext hitContext) {
42-
return new AnnotatedHighlighterAnalyzer(super.getAnalyzer(docMapper, hitContext), hitContext);
43-
}
44-
4541
// Convert the marked-up values held on-disk to plain-text versions for highlighting
4642
@Override
47-
protected List<Object> loadFieldValues(MappedFieldType fieldType,
48-
Field field,
49-
HitContext hitContext,
50-
boolean forceSource) throws IOException {
51-
List<Object> fieldValues = super.loadFieldValues(fieldType, field, hitContext, forceSource);
52-
String[] fieldValuesAsString = fieldValues.toArray(new String[fieldValues.size()]);
43+
protected List<Object> loadFieldValues(
44+
CustomUnifiedHighlighter highlighter,
45+
MappedFieldType fieldType,
46+
Field field,
47+
HitContext hitContext,
48+
boolean forceSource
49+
) throws IOException {
50+
List<Object> fieldValues = super.loadFieldValues(highlighter, fieldType, field, hitContext, forceSource);
5351

54-
AnnotatedText[] annotations = new AnnotatedText[fieldValuesAsString.length];
55-
for (int i = 0; i < fieldValuesAsString.length; i++) {
56-
annotations[i] = AnnotatedText.parse(fieldValuesAsString[i]);
52+
List<Object> strings = new ArrayList<>(fieldValues.size());
53+
AnnotatedText[] annotations = new AnnotatedText[fieldValues.size()];
54+
for (int i = 0; i < fieldValues.size(); i++) {
55+
annotations[i] = AnnotatedText.parse(fieldValues.get(i).toString());
56+
strings.add(annotations[i].textMinusMarkup);
5757
}
58-
// Store the annotations in the hitContext
59-
hitContext.cache().put(AnnotatedText.class.getName(), annotations);
58+
// Store the annotations in the formatter and analyzer
59+
((AnnotatedPassageFormatter) highlighter.getFormatter()).setAnnotations(annotations);
60+
((AnnotatedHighlighterAnalyzer) highlighter.getIndexAnalyzer()).setAnnotations(annotations);
61+
return strings;
62+
}
6063

61-
ArrayList<Object> result = new ArrayList<>(annotations.length);
62-
for (int i = 0; i < annotations.length; i++) {
63-
result.add(annotations[i].textMinusMarkup);
64-
}
65-
return result;
64+
@Override
65+
protected Analyzer getAnalyzer(DocumentMapper docMapper) {
66+
return new AnnotatedHighlighterAnalyzer(super.getAnalyzer(docMapper));
6667
}
6768

6869
@Override
6970
protected PassageFormatter getPassageFormatter(HitContext hitContext, SearchHighlightContext.Field field, Encoder encoder) {
70-
// Retrieve the annotations from the hitContext
71-
AnnotatedText[] annotations = (AnnotatedText[]) hitContext.cache().get(AnnotatedText.class.getName());
72-
return new AnnotatedPassageFormatter(annotations, encoder);
71+
return new AnnotatedPassageFormatter(encoder);
7372
}
7473

7574
}

plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/highlight/AnnotatedTextHighlighterTests.java renamed to plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighterTests.java

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
* under the License.
1818
*/
1919

20-
package org.elasticsearch.search.highlight;
20+
package org.elasticsearch.search.fetch.subphase.highlight;
2121

2222
import org.apache.lucene.analysis.Analyzer;
2323
import org.apache.lucene.analysis.standard.StandardAnalyzer;
@@ -28,7 +28,6 @@
2828
import org.apache.lucene.index.DirectoryReader;
2929
import org.apache.lucene.index.IndexOptions;
3030
import org.apache.lucene.index.IndexWriterConfig;
31-
import org.apache.lucene.index.LeafReaderContext;
3231
import org.apache.lucene.index.RandomIndexWriter;
3332
import org.apache.lucene.index.Term;
3433
import org.apache.lucene.search.IndexSearcher;
@@ -48,14 +47,11 @@
4847
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
4948
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
5049
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper;
51-
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
52-
import org.elasticsearch.search.fetch.subphase.highlight.AnnotatedPassageFormatter;
5350
import org.elasticsearch.test.ESTestCase;
5451

5552
import java.net.URLEncoder;
5653
import java.text.BreakIterator;
5754
import java.util.ArrayList;
58-
import java.util.HashMap;
5955
import java.util.Locale;
6056

6157
import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
@@ -70,7 +66,6 @@ private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
7066

7167
// Annotated fields wrap the usual analyzer with one that injects extra tokens
7268
Analyzer wrapperAnalyzer = new AnnotationAnalyzerWrapper(new StandardAnalyzer());
73-
7469
Directory dir = newDirectory();
7570
IndexWriterConfig iwc = newIndexWriterConfig(wrapperAnalyzer);
7671
iwc.setMergePolicy(newTieredMergePolicy(random()));
@@ -93,17 +88,14 @@ private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
9388
IndexSearcher searcher = newSearcher(reader);
9489
iw.close();
9590

96-
LeafReaderContext context = searcher.getIndexReader().leaves().get(0);
97-
HitContext mockHitContext = new HitContext(null, context, 0, null, new HashMap<>());
98-
AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer, mockHitContext);
99-
10091
AnnotatedText[] annotations = new AnnotatedText[markedUpInputs.length];
10192
for (int i = 0; i < markedUpInputs.length; i++) {
10293
annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
10394
}
104-
mockHitContext.cache().put(AnnotatedText.class.getName(), annotations);
105-
106-
AnnotatedPassageFormatter passageFormatter = new AnnotatedPassageFormatter(annotations,new DefaultEncoder());
95+
AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
96+
hiliteAnalyzer.setAnnotations(annotations);
97+
AnnotatedPassageFormatter passageFormatter = new AnnotatedPassageFormatter(new DefaultEncoder());
98+
passageFormatter.setAnnotations(annotations);
10799

108100
ArrayList<Object> plainTextForHighlighter = new ArrayList<>(annotations.length);
109101
for (int i = 0; i < annotations.length; i++) {
@@ -113,13 +105,24 @@ private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
113105
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER);
114106
assertThat(topDocs.totalHits.value, equalTo(1L));
115107
String rawValue = Strings.collectionToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR));
116-
117-
CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, hiliteAnalyzer, null,
118-
passageFormatter, locale,
119-
breakIterator, rawValue, noMatchSize);
108+
CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(
109+
searcher,
110+
hiliteAnalyzer,
111+
null,
112+
passageFormatter,
113+
locale,
114+
breakIterator,
115+
"index",
116+
"text",
117+
query,
118+
noMatchSize,
119+
expectedPassages.length,
120+
name -> "text".equals(name),
121+
Integer.MAX_VALUE,
122+
Integer.MAX_VALUE
123+
);
120124
highlighter.setFieldMatcher((name) -> "text".equals(name));
121-
final Snippet[] snippets =
122-
highlighter.highlightField("text", query, topDocs.scoreDocs[0].doc, expectedPassages.length);
125+
final Snippet[] snippets = highlighter.highlightField(getOnlyLeafReader(reader), topDocs.scoreDocs[0].doc, () -> rawValue);
123126
assertEquals(expectedPassages.length, snippets.length);
124127
for (int i = 0; i < snippets.length; i++) {
125128
assertEquals(expectedPassages[i], snippets[i].getText());

server/src/main/java/org/apache/lucene/search/uhighlight/CustomFieldHighlighter.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
package org.apache.lucene.search.uhighlight;
2121

22+
import org.apache.lucene.index.LeafReader;
2223
import org.apache.lucene.util.BytesRef;
2324

2425
import java.io.IOException;
@@ -39,17 +40,27 @@ class CustomFieldHighlighter extends FieldHighlighter {
3940

4041
private final Locale breakIteratorLocale;
4142
private final int noMatchSize;
42-
private final String fieldValue;
43+
private String fieldValue;
4344

4445
CustomFieldHighlighter(String field, FieldOffsetStrategy fieldOffsetStrategy,
4546
Locale breakIteratorLocale, BreakIterator breakIterator,
4647
PassageScorer passageScorer, int maxPassages, int maxNoHighlightPassages,
47-
PassageFormatter passageFormatter, int noMatchSize, String fieldValue) {
48+
PassageFormatter passageFormatter, int noMatchSize) {
4849
super(field, fieldOffsetStrategy, breakIterator, passageScorer, maxPassages,
4950
maxNoHighlightPassages, passageFormatter);
5051
this.breakIteratorLocale = breakIteratorLocale;
5152
this.noMatchSize = noMatchSize;
52-
this.fieldValue = fieldValue;
53+
}
54+
55+
@Override
56+
public Object highlightFieldForDoc(LeafReader reader, int docId, String content) throws IOException {
57+
this.fieldValue = content;
58+
try {
59+
return super.highlightFieldForDoc(reader, docId, content);
60+
} finally {
61+
// Clear the reference to the field value in case it is large
62+
fieldValue = null;
63+
}
5364
}
5465

5566
@Override

0 commit comments

Comments
 (0)