Skip to content

Commit a1fcb8a

Browse files
authored
Bug fix for AnnotatedTextHighlighter - port of 39525 (#39747)
Bug fix for AnnotatedTextHighlighter - port of 39525 Relates to #39395
1 parent 1617e30 commit a1fcb8a

File tree

7 files changed

+153
-65
lines changed

7 files changed

+153
-65
lines changed

plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java

+7-38
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
import org.elasticsearch.index.mapper.TextFieldMapper;
5555
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken;
5656
import org.elasticsearch.index.query.QueryShardContext;
57+
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
5758

5859
import java.io.IOException;
5960
import java.io.Reader;
@@ -315,45 +316,12 @@ public AnnotationToken getAnnotation(int index) {
315316
// When asked to tokenize plain-text versions by the highlighter it tokenizes the
316317
// original markup form in order to inject annotations.
317318
public static final class AnnotatedHighlighterAnalyzer extends AnalyzerWrapper {
318-
private Analyzer delegate;
319-
private AnnotatedText[] annotations;
320-
public AnnotatedHighlighterAnalyzer(Analyzer delegate){
319+
private final Analyzer delegate;
320+
private final HitContext hitContext;
321+
public AnnotatedHighlighterAnalyzer(Analyzer delegate, HitContext hitContext){
321322
super(delegate.getReuseStrategy());
322323
this.delegate = delegate;
323-
}
324-
325-
public void init(String[] markedUpFieldValues) {
326-
this.annotations = new AnnotatedText[markedUpFieldValues.length];
327-
for (int i = 0; i < markedUpFieldValues.length; i++) {
328-
annotations[i] = AnnotatedText.parse(markedUpFieldValues[i]);
329-
}
330-
}
331-
332-
public String [] getPlainTextValuesForHighlighter(){
333-
String [] result = new String[annotations.length];
334-
for (int i = 0; i < annotations.length; i++) {
335-
result[i] = annotations[i].textMinusMarkup;
336-
}
337-
return result;
338-
}
339-
340-
public AnnotationToken[] getIntersectingAnnotations(int start, int end) {
341-
List<AnnotationToken> intersectingAnnotations = new ArrayList<>();
342-
int fieldValueOffset =0;
343-
for (AnnotatedText fieldValueAnnotations : this.annotations) {
344-
//This is called from a highlighter where all of the field values are concatenated
345-
// so each annotation offset will need to be adjusted so that it takes into account
346-
// the previous values AND the MULTIVAL delimiter
347-
for (AnnotationToken token : fieldValueAnnotations.annotations) {
348-
if(token.intersects(start - fieldValueOffset , end - fieldValueOffset)) {
349-
intersectingAnnotations.add(new AnnotationToken(token.offset + fieldValueOffset,
350-
token.endOffset + fieldValueOffset, token.value));
351-
}
352-
}
353-
//add 1 for the fieldvalue separator character
354-
fieldValueOffset +=fieldValueAnnotations.textMinusMarkup.length() +1;
355-
}
356-
return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]);
324+
this.hitContext = hitContext;
357325
}
358326

359327
@Override
@@ -364,10 +332,11 @@ public Analyzer getWrappedAnalyzer(String fieldName) {
364332
@Override
365333
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
366334
AnnotationsInjector injector = new AnnotationsInjector(components.getTokenStream());
335+
AnnotatedText[] annotations = (AnnotatedText[]) hitContext.cache().get(AnnotatedText.class.getName());
367336
AtomicInteger readerNum = new AtomicInteger(0);
368337
return new TokenStreamComponents(r -> {
369338
String plainText = readToString(r);
370-
AnnotatedText at = this.annotations[readerNum.getAndIncrement()];
339+
AnnotatedText at = annotations[readerNum.getAndIncrement()];
371340
assert at.textMinusMarkup.equals(plainText);
372341
injector.setAnnotations(at);
373342
components.getSource().accept(new StringReader(at.textMinusMarkup));

plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedPassageFormatter.java

+26-5
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import org.apache.lucene.search.uhighlight.Passage;
2424
import org.apache.lucene.search.uhighlight.PassageFormatter;
2525
import org.apache.lucene.search.uhighlight.Snippet;
26-
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
26+
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
2727
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken;
2828

2929
import java.io.UnsupportedEncodingException;
@@ -42,11 +42,11 @@ public class AnnotatedPassageFormatter extends PassageFormatter {
4242

4343
public static final String SEARCH_HIT_TYPE = "_hit_term";
4444
private final Encoder encoder;
45-
private AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer;
45+
AnnotatedText[] annotations;
4646

47-
public AnnotatedPassageFormatter(AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer, Encoder encoder) {
48-
this.annotatedHighlighterAnalyzer = annotatedHighlighterAnalyzer;
47+
public AnnotatedPassageFormatter(AnnotatedText[] annotations, Encoder encoder) {
4948
this.encoder = encoder;
49+
this.annotations = annotations;
5050
}
5151

5252
static class MarkupPassage {
@@ -158,7 +158,7 @@ public Snippet[] format(Passage[] passages, String content) {
158158
int pos;
159159
int j = 0;
160160
for (Passage passage : passages) {
161-
AnnotationToken [] annotations = annotatedHighlighterAnalyzer.getIntersectingAnnotations(passage.getStartOffset(),
161+
AnnotationToken [] annotations = getIntersectingAnnotations(passage.getStartOffset(),
162162
passage.getEndOffset());
163163
MarkupPassage mergedMarkup = mergeAnnotations(annotations, passage);
164164

@@ -194,6 +194,27 @@ public Snippet[] format(Passage[] passages, String content) {
194194
}
195195
return snippets;
196196
}
197+
198+
public AnnotationToken[] getIntersectingAnnotations(int start, int end) {
199+
List<AnnotationToken> intersectingAnnotations = new ArrayList<>();
200+
int fieldValueOffset =0;
201+
for (AnnotatedText fieldValueAnnotations : this.annotations) {
202+
//This is called from a highlighter where all of the field values are concatenated
203+
// so each annotation offset will need to be adjusted so that it takes into account
204+
// the previous values AND the MULTIVAL delimiter
205+
for (int i = 0; i < fieldValueAnnotations.numAnnotations(); i++) {
206+
AnnotationToken token = fieldValueAnnotations.getAnnotation(i);
207+
if (token.intersects(start - fieldValueOffset, end - fieldValueOffset)) {
208+
intersectingAnnotations
209+
.add(new AnnotationToken(token.offset + fieldValueOffset, token.endOffset +
210+
fieldValueOffset, token.value));
211+
}
212+
}
213+
//add 1 for the fieldvalue separator character
214+
fieldValueOffset +=fieldValueAnnotations.textMinusMarkup.length() +1;
215+
}
216+
return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]);
217+
}
197218

198219
private void append(StringBuilder dest, String content, int start, int end) {
199220
dest.append(encoder.encodeText(content.substring(start, end)));

plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java

+21-11
Original file line numberDiff line numberDiff line change
@@ -25,24 +25,22 @@
2525
import org.elasticsearch.index.mapper.DocumentMapper;
2626
import org.elasticsearch.index.mapper.MappedFieldType;
2727
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
28+
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
2829
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
2930
import org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.Field;
3031
import org.elasticsearch.search.internal.SearchContext;
3132

3233
import java.io.IOException;
33-
import java.util.Arrays;
34+
import java.util.ArrayList;
3435
import java.util.List;
3536

3637
public class AnnotatedTextHighlighter extends UnifiedHighlighter {
3738

3839
public static final String NAME = "annotated";
39-
40-
AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer = null;
4140

4241
@Override
43-
protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) {
44-
annotatedHighlighterAnalyzer = new AnnotatedHighlighterAnalyzer(super.getAnalyzer(docMapper, type));
45-
return annotatedHighlighterAnalyzer;
42+
protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type, HitContext hitContext) {
43+
return new AnnotatedHighlighterAnalyzer(super.getAnalyzer(docMapper, type, hitContext), hitContext);
4644
}
4745

4846
// Convert the marked-up values held on-disk to plain-text versions for highlighting
@@ -51,14 +49,26 @@ protected List<Object> loadFieldValues(MappedFieldType fieldType, Field field, S
5149
throws IOException {
5250
List<Object> fieldValues = super.loadFieldValues(fieldType, field, context, hitContext);
5351
String[] fieldValuesAsString = fieldValues.toArray(new String[fieldValues.size()]);
54-
annotatedHighlighterAnalyzer.init(fieldValuesAsString);
55-
return Arrays.asList((Object[]) annotatedHighlighterAnalyzer.getPlainTextValuesForHighlighter());
52+
53+
AnnotatedText[] annotations = new AnnotatedText[fieldValuesAsString.length];
54+
for (int i = 0; i < fieldValuesAsString.length; i++) {
55+
annotations[i] = AnnotatedText.parse(fieldValuesAsString[i]);
56+
}
57+
// Store the annotations in the hitContext
58+
hitContext.cache().put(AnnotatedText.class.getName(), annotations);
59+
60+
ArrayList<Object> result = new ArrayList<>(annotations.length);
61+
for (int i = 0; i < annotations.length; i++) {
62+
result.add(annotations[i].textMinusMarkup);
63+
}
64+
return result;
5665
}
5766

5867
@Override
59-
protected PassageFormatter getPassageFormatter(SearchContextHighlight.Field field, Encoder encoder) {
60-
return new AnnotatedPassageFormatter(annotatedHighlighterAnalyzer, encoder);
61-
68+
protected PassageFormatter getPassageFormatter(HitContext hitContext,SearchContextHighlight.Field field, Encoder encoder) {
69+
// Retrieve the annotations from the hitContext
70+
AnnotatedText[] annotations = (AnnotatedText[]) hitContext.cache().get(AnnotatedText.class.getName());
71+
return new AnnotatedPassageFormatter(annotations, encoder);
6272
}
6373

6474
}

plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/highlight/AnnotatedTextHighlighterTests.java

+19-6
Original file line numberDiff line numberDiff line change
@@ -40,18 +40,20 @@
4040
import org.apache.lucene.search.highlight.DefaultEncoder;
4141
import org.apache.lucene.search.uhighlight.CustomSeparatorBreakIterator;
4242
import org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter;
43-
import org.apache.lucene.search.uhighlight.PassageFormatter;
4443
import org.apache.lucene.search.uhighlight.Snippet;
4544
import org.apache.lucene.search.uhighlight.SplittingBreakIterator;
4645
import org.apache.lucene.store.Directory;
4746
import org.elasticsearch.common.Strings;
4847
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
48+
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
4949
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper;
50+
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
5051
import org.elasticsearch.search.fetch.subphase.highlight.AnnotatedPassageFormatter;
5152
import org.elasticsearch.test.ESTestCase;
5253

5354
import java.net.URLEncoder;
5455
import java.text.BreakIterator;
56+
import java.util.ArrayList;
5557
import java.util.Locale;
5658

5759
import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
@@ -63,13 +65,24 @@ private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
6365
Query query, Locale locale, BreakIterator breakIterator,
6466
int noMatchSize, String[] expectedPassages) throws Exception {
6567

68+
6669
// Annotated fields wrap the usual analyzer with one that injects extra tokens
6770
Analyzer wrapperAnalyzer = new AnnotationAnalyzerWrapper(new StandardAnalyzer());
68-
AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
69-
hiliteAnalyzer.init(markedUpInputs);
70-
PassageFormatter passageFormatter = new AnnotatedPassageFormatter(hiliteAnalyzer,new DefaultEncoder());
71-
String []plainTextForHighlighter = hiliteAnalyzer.getPlainTextValuesForHighlighter();
71+
HitContext mockHitContext = new HitContext();
72+
AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer, mockHitContext);
73+
74+
AnnotatedText[] annotations = new AnnotatedText[markedUpInputs.length];
75+
for (int i = 0; i < markedUpInputs.length; i++) {
76+
annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
77+
}
78+
mockHitContext.cache().put(AnnotatedText.class.getName(), annotations);
7279

80+
AnnotatedPassageFormatter passageFormatter = new AnnotatedPassageFormatter(annotations,new DefaultEncoder());
81+
82+
ArrayList<Object> plainTextForHighlighter = new ArrayList<>(annotations.length);
83+
for (int i = 0; i < annotations.length; i++) {
84+
plainTextForHighlighter.add(annotations[i].textMinusMarkup);
85+
}
7386

7487
Directory dir = newDirectory();
7588
IndexWriterConfig iwc = newIndexWriterConfig(wrapperAnalyzer);
@@ -94,7 +107,7 @@ private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
94107
iw.close();
95108
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER);
96109
assertThat(topDocs.totalHits.value, equalTo(1L));
97-
String rawValue = Strings.arrayToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR));
110+
String rawValue = Strings.collectionToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR));
98111

99112
CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, hiliteAnalyzer, null,
100113
passageFormatter, locale,

plugins/mapper-annotated-text/src/test/resources/rest-api-spec/test/mapper_annotatedtext/10_basic.yml

+74
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,77 @@
4242
body: { "query" : {"term" : { "text" : "quick" } }, "highlight" : { "type" : "annotated", "require_field_match": false, "fields" : { "text" : {} } } }
4343

4444
- match: {hits.hits.0.highlight.text.0: "The [quick](_hit_term=quick) brown fox is brown."}
45+
46+
---
47+
"issue 39395 thread safety issue -requires multiple calls to reveal":
48+
- skip:
49+
version: " - 6.4.99"
50+
reason: Annotated text type introduced in 6.5.0
51+
52+
- do:
53+
indices.create:
54+
index: annotated
55+
body:
56+
settings:
57+
number_of_shards: "5"
58+
number_of_replicas: "0"
59+
mappings:
60+
properties:
61+
my_field:
62+
type: annotated_text
63+
64+
- do:
65+
index:
66+
index: annotated
67+
id: 1
68+
body:
69+
"my_field" : "[A](~MARK0&~MARK0) [B](~MARK1)"
70+
- do:
71+
index:
72+
index: annotated
73+
id: 2
74+
body:
75+
"my_field" : "[A](~MARK0) [C](~MARK2)"
76+
refresh: true
77+
- do:
78+
search:
79+
request_cache: false
80+
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
81+
- match: {_shards.failed: 0}
82+
83+
- do:
84+
search:
85+
request_cache: false
86+
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
87+
- match: {_shards.failed: 0}
88+
89+
- do:
90+
search:
91+
request_cache: false
92+
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
93+
- match: {_shards.failed: 0}
94+
95+
- do:
96+
search:
97+
request_cache: false
98+
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
99+
- match: {_shards.failed: 0}
100+
101+
- do:
102+
search:
103+
request_cache: false
104+
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
105+
- match: {_shards.failed: 0}
106+
107+
- do:
108+
search:
109+
request_cache: false
110+
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
111+
- match: {_shards.failed: 0}
112+
113+
- do:
114+
search:
115+
request_cache: false
116+
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
117+
- match: {_shards.failed: 0}
118+

server/src/main/java/org/elasticsearch/search/fetch/FetchSubPhase.java

-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ public Map<String, Object> cache() {
7474
}
7575
return cache;
7676
}
77-
7877
}
7978

8079
/**

server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java

+6-4
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import org.elasticsearch.index.mapper.MappedFieldType;
3939
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
4040
import org.elasticsearch.search.fetch.FetchSubPhase;
41+
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
4142
import org.elasticsearch.search.internal.SearchContext;
4243

4344
import java.io.IOException;
@@ -68,12 +69,13 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
6869
int numberOfFragments;
6970
try {
7071

71-
final Analyzer analyzer = getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType);
72+
final Analyzer analyzer = getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType,
73+
hitContext);
7274
List<Object> fieldValues = loadFieldValues(fieldType, field, context, hitContext);
7375
if (fieldValues.size() == 0) {
7476
return null;
7577
}
76-
final PassageFormatter passageFormatter = getPassageFormatter(field, encoder);
78+
final PassageFormatter passageFormatter = getPassageFormatter(hitContext, field, encoder);
7779
final IndexSearcher searcher = new IndexSearcher(hitContext.reader());
7880
final CustomUnifiedHighlighter highlighter;
7981
final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
@@ -140,14 +142,14 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
140142
return null;
141143
}
142144

143-
protected PassageFormatter getPassageFormatter(SearchContextHighlight.Field field, Encoder encoder) {
145+
protected PassageFormatter getPassageFormatter(HitContext hitContext, SearchContextHighlight.Field field, Encoder encoder) {
144146
CustomPassageFormatter passageFormatter = new CustomPassageFormatter(field.fieldOptions().preTags()[0],
145147
field.fieldOptions().postTags()[0], encoder);
146148
return passageFormatter;
147149
}
148150

149151

150-
protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) {
152+
protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type, HitContext hitContext) {
151153
return HighlightUtils.getAnalyzer(docMapper, type);
152154
}
153155

0 commit comments

Comments
 (0)