Skip to content

Commit bf6cead

Browse files
committed
Highlighting broken when query is on _all field or with prefixes. Add also a flag to highlight to control if filters should be highlighted or not (called highlight_filters) which defaults to true. Closes #148.
1 parent 453ede8 commit bf6cead

File tree

12 files changed

+292
-14
lines changed

12 files changed

+292
-14
lines changed

.idea/runConfigurations/Elastic_Search_Tests.xml

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/runConfigurations/Elastic_Search_Tests__Local_.xml

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* Licensed to Elastic Search and Shay Banon under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. Elastic Search licenses this
6+
* file to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.lucene.search;
21+
22+
import java.util.List;
23+
24+
/**
25+
* @author kimchy (shay.banon)
26+
*/
27+
public class PublicBooleanFilter extends BooleanFilter {
28+
29+
public List<Filter> getShouldFilters() {
30+
return this.shouldFilters;
31+
}
32+
33+
public List<Filter> getMustFilters() {
34+
return this.mustFilters;
35+
}
36+
37+
public List<Filter> getNotFilters() {
38+
return this.notFilters;
39+
}
40+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* Licensed to Elastic Search and Shay Banon under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. Elastic Search licenses this
6+
* file to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.lucene.search;
21+
22+
import org.apache.lucene.index.Term;
23+
24+
import java.util.Set;
25+
26+
/**
27+
* @author kimchy (shay.banon)
28+
*/
29+
public class PublicTermsFilter extends TermsFilter {
30+
31+
public Set<Term> getTerms() {
32+
return terms;
33+
}
34+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
/*
2+
* Licensed to Elastic Search and Shay Banon under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. Elastic Search licenses this
6+
* file to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.lucene.search.vectorhighlight;
21+
22+
import org.apache.lucene.index.IndexReader;
23+
import org.apache.lucene.index.Term;
24+
import org.apache.lucene.search.*;
25+
import org.apache.lucene.search.spans.SpanTermQuery;
26+
import org.elasticsearch.util.lucene.search.TermFilter;
27+
28+
import java.io.IOException;
29+
import java.lang.reflect.Field;
30+
import java.util.Collection;
31+
32+
/**
33+
* @author kimchy (shay.banon)
34+
*/
35+
// LUCENE MONITOR
36+
public class CustomFieldQuery extends FieldQuery {
37+
38+
private static Field multiTermQueryWrapperFilterQueryField;
39+
40+
static {
41+
try {
42+
multiTermQueryWrapperFilterQueryField = MultiTermQueryWrapperFilter.class.getDeclaredField("query");
43+
multiTermQueryWrapperFilterQueryField.setAccessible(true);
44+
} catch (NoSuchFieldException e) {
45+
// ignore
46+
}
47+
}
48+
49+
// hack since flatten is called from the parent constructor, so we can't pass it
50+
public static ThreadLocal<IndexReader> reader = new ThreadLocal<IndexReader>();
51+
52+
public static ThreadLocal<Boolean> highlightFilters = new ThreadLocal<Boolean>();
53+
54+
public CustomFieldQuery(Query query, FastVectorHighlighter highlighter) {
55+
this(query, highlighter.isPhraseHighlight(), highlighter.isFieldMatch());
56+
}
57+
58+
public CustomFieldQuery(Query query, boolean phraseHighlight, boolean fieldMatch) {
59+
super(query, phraseHighlight, fieldMatch);
60+
reader.remove();
61+
highlightFilters.remove();
62+
}
63+
64+
@Override void flatten(Query sourceQuery, Collection<Query> flatQueries) {
65+
if (sourceQuery instanceof DisjunctionMaxQuery) {
66+
DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) sourceQuery;
67+
for (Query query : dmq) {
68+
flatten(query, flatQueries);
69+
}
70+
} else if (sourceQuery instanceof SpanTermQuery) {
71+
TermQuery termQuery = new TermQuery(((SpanTermQuery) sourceQuery).getTerm());
72+
if (!flatQueries.contains(termQuery)) {
73+
flatQueries.add(termQuery);
74+
}
75+
} else if (sourceQuery instanceof ConstantScoreQuery) {
76+
Boolean highlight = highlightFilters.get();
77+
if (highlight != null && highlight.equals(Boolean.TRUE)) {
78+
flatten(((ConstantScoreQuery) sourceQuery).getFilter(), flatQueries);
79+
}
80+
} else if (sourceQuery instanceof MultiTermQuery) {
81+
MultiTermQuery multiTermQuery = (MultiTermQuery) sourceQuery;
82+
MultiTermQuery.RewriteMethod rewriteMethod = multiTermQuery.getRewriteMethod();
83+
if (rewriteMethod != MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE && rewriteMethod != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) {
84+
// we need to rewrite
85+
multiTermQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
86+
try {
87+
flatten(multiTermQuery.rewrite(reader.get()), flatQueries);
88+
} catch (IOException e) {
89+
// ignore
90+
} finally {
91+
multiTermQuery.setRewriteMethod(rewriteMethod);
92+
}
93+
}
94+
} else {
95+
super.flatten(sourceQuery, flatQueries);
96+
}
97+
}
98+
99+
void flatten(Filter sourceFilter, Collection<Query> flatQueries) {
100+
if (sourceFilter instanceof TermFilter) {
101+
flatten(new TermQuery(((TermFilter) sourceFilter).getTerm()), flatQueries);
102+
} else if (sourceFilter instanceof PublicTermsFilter) {
103+
PublicTermsFilter termsFilter = (PublicTermsFilter) sourceFilter;
104+
for (Term term : termsFilter.getTerms()) {
105+
flatten(new TermQuery(term), flatQueries);
106+
}
107+
} else if (sourceFilter instanceof MultiTermQueryWrapperFilter) {
108+
if (multiTermQueryWrapperFilterQueryField != null) {
109+
try {
110+
flatten((Query) multiTermQueryWrapperFilterQueryField.get(sourceFilter), flatQueries);
111+
} catch (IllegalAccessException e) {
112+
// ignore
113+
}
114+
}
115+
} else if (sourceFilter instanceof PublicBooleanFilter) {
116+
PublicBooleanFilter booleanFilter = (PublicBooleanFilter) sourceFilter;
117+
for (Filter filter : booleanFilter.getMustFilters()) {
118+
flatten(filter, flatQueries);
119+
}
120+
for (Filter filter : booleanFilter.getNotFilters()) {
121+
flatten(filter, flatQueries);
122+
}
123+
}
124+
}
125+
}

modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/BoolJsonFilterParser.java

+2-5
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,7 @@
2020
package org.elasticsearch.index.query.json;
2121

2222
import com.google.inject.Inject;
23-
import org.apache.lucene.search.BooleanClause;
24-
import org.apache.lucene.search.BooleanFilter;
25-
import org.apache.lucene.search.Filter;
26-
import org.apache.lucene.search.FilterClause;
23+
import org.apache.lucene.search.*;
2724
import org.codehaus.jackson.JsonParser;
2825
import org.codehaus.jackson.JsonToken;
2926
import org.elasticsearch.index.AbstractIndexComponent;
@@ -85,7 +82,7 @@ public class BoolJsonFilterParser extends AbstractIndexComponent implements Json
8582
}
8683
}
8784

88-
BooleanFilter booleanFilter = new BooleanFilter();
85+
BooleanFilter booleanFilter = new PublicBooleanFilter();
8986
for (FilterClause filterClause : clauses) {
9087
booleanFilter.add(filterClause);
9188
}

modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/TermsJsonFilterParser.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import com.google.inject.Inject;
2323
import org.apache.lucene.index.Term;
2424
import org.apache.lucene.search.Filter;
25+
import org.apache.lucene.search.PublicTermsFilter;
2526
import org.apache.lucene.search.TermsFilter;
2627
import org.codehaus.jackson.JsonParser;
2728
import org.codehaus.jackson.JsonToken;
@@ -73,7 +74,7 @@ public class TermsJsonFilterParser extends AbstractIndexComponent implements Jso
7374
throw new QueryParsingException(index, "Terms filter must define the terms to filter on as an array");
7475
}
7576

76-
TermsFilter termsFilter = new TermsFilter();
77+
TermsFilter termsFilter = new PublicTermsFilter();
7778
while ((token = jp.nextToken()) != JsonToken.END_ARRAY) {
7879
String value = jp.getText();
7980
if (value == null) {

modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java

+4-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,10 @@ public class HighlightPhase implements SearchPhase {
6161
}
6262
FastVectorHighlighter highlighter = new FastVectorHighlighter(true, false, fragListBuilder, fragmentsBuilder);
6363

64-
FieldQuery fieldQuery = highlighter.getFieldQuery(context.query());
64+
CustomFieldQuery.reader.set(context.searcher().getIndexReader());
65+
CustomFieldQuery.highlightFilters.set(context.highlight().highlightFilter());
66+
67+
FieldQuery fieldQuery = new CustomFieldQuery(context.query(), highlighter);
6568
for (SearchHit hit : context.fetchResult().hits().hits()) {
6669
InternalSearchHit internalHit = (InternalSearchHit) hit;
6770

modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java

+13-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.elasticsearch.search.SearchParseElement;
2626
import org.elasticsearch.search.SearchParseException;
2727
import org.elasticsearch.search.internal.SearchContext;
28+
import org.elasticsearch.util.Booleans;
2829

2930
import java.util.List;
3031

@@ -67,6 +68,7 @@ public class HighlighterParseElement implements SearchParseElement {
6768
String[] preTags = DEFAULT_PRE_TAGS;
6869
String[] postTags = DEFAULT_POST_TAGS;
6970
boolean scoreOrdered = false;
71+
boolean highlightFilter = true;
7072
while ((token = jp.nextToken()) != JsonToken.END_OBJECT) {
7173
if (token == JsonToken.FIELD_NAME) {
7274
topLevelFieldName = jp.getCurrentName();
@@ -97,6 +99,16 @@ public class HighlighterParseElement implements SearchParseElement {
9799
preTags = STYLED_PRE_TAG;
98100
postTags = STYLED_POST_TAGS;
99101
}
102+
} else if ("highlight_filter".equals(topLevelFieldName) || "highlightFilter".equals(topLevelFieldName)) {
103+
highlightFilter = Booleans.parseBoolean(jp.getText(), true);
104+
}
105+
} else if (token == JsonToken.VALUE_NUMBER_INT) {
106+
if ("highlight_filter".equals(topLevelFieldName) || "highlightFilter".equals(topLevelFieldName)) {
107+
highlightFilter = jp.getIntValue() != 0;
108+
}
109+
} else if (token == JsonToken.VALUE_FALSE) {
110+
if ("highlight_filter".equals(topLevelFieldName) || "highlightFilter".equals(topLevelFieldName)) {
111+
highlightFilter = false;
100112
}
101113
} else if (token == JsonToken.START_OBJECT) {
102114
if ("fields".equals(topLevelFieldName)) {
@@ -134,6 +146,6 @@ public class HighlighterParseElement implements SearchParseElement {
134146
if (preTags != null && postTags == null) {
135147
throw new SearchParseException(context, "Highlighter preTags are set, but postTags are not set");
136148
}
137-
context.highlight(new SearchContextHighlight(fields, preTags, postTags, scoreOrdered));
149+
context.highlight(new SearchContextHighlight(fields, preTags, postTags, scoreOrdered, highlightFilter));
138150
}
139151
}

modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/SearchContextHighlight.java

+9-1
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,19 @@ public class SearchContextHighlight {
3434

3535
private boolean scoreOrdered = false;
3636

37-
public SearchContextHighlight(List<ParsedHighlightField> fields, String[] preTags, String[] postTags, boolean scoreOrdered) {
37+
private boolean highlightFilter;
38+
39+
public SearchContextHighlight(List<ParsedHighlightField> fields, String[] preTags, String[] postTags,
40+
boolean scoreOrdered, boolean highlightFilter) {
3841
this.fields = fields;
3942
this.preTags = preTags;
4043
this.postTags = postTags;
4144
this.scoreOrdered = scoreOrdered;
45+
this.highlightFilter = highlightFilter;
46+
}
47+
48+
public boolean highlightFilter() {
49+
return highlightFilter;
4250
}
4351

4452
public List<ParsedHighlightField> fields() {

modules/elasticsearch/src/test/java/org/elasticsearch/deps/lucene/VectorHighlighterTests.java

+41-3
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,8 @@
2323
import org.apache.lucene.index.IndexReader;
2424
import org.apache.lucene.index.IndexWriter;
2525
import org.apache.lucene.index.Term;
26-
import org.apache.lucene.search.IndexSearcher;
27-
import org.apache.lucene.search.TermQuery;
28-
import org.apache.lucene.search.TopDocs;
26+
import org.apache.lucene.search.*;
27+
import org.apache.lucene.search.vectorhighlight.CustomFieldQuery;
2928
import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter;
3029
import org.apache.lucene.store.Directory;
3130
import org.apache.lucene.store.RAMDirectory;
@@ -61,6 +60,45 @@ public class VectorHighlighterTests {
6160
System.out.println(fragment);
6261
}
6362

63+
@Test public void testVectorHighlighterPrefixQuery() throws Exception {
64+
Directory dir = new RAMDirectory();
65+
IndexWriter indexWriter = new IndexWriter(dir, Lucene.STANDARD_ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED);
66+
67+
indexWriter.addDocument(doc().add(field("_id", "1")).add(field("content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)).build());
68+
69+
IndexReader reader = indexWriter.getReader();
70+
IndexSearcher searcher = new IndexSearcher(reader);
71+
TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
72+
73+
assertThat(topDocs.totalHits, equalTo(1));
74+
75+
FastVectorHighlighter highlighter = new FastVectorHighlighter();
76+
77+
PrefixQuery prefixQuery = new PrefixQuery(new Term("content", "ba"));
78+
assertThat(prefixQuery.getRewriteMethod().getClass().getName(), equalTo(PrefixQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT.getClass().getName()));
79+
String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(prefixQuery),
80+
reader, topDocs.scoreDocs[0].doc, "content", 30);
81+
assertThat(fragment, nullValue());
82+
83+
prefixQuery.setRewriteMethod(PrefixQuery.SCORING_BOOLEAN_QUERY_REWRITE);
84+
Query rewriteQuery = prefixQuery.rewrite(reader);
85+
fragment = highlighter.getBestFragment(highlighter.getFieldQuery(rewriteQuery),
86+
reader, topDocs.scoreDocs[0].doc, "content", 30);
87+
assertThat(fragment, notNullValue());
88+
89+
System.out.println(fragment);
90+
91+
// now check with the custom field query
92+
prefixQuery = new PrefixQuery(new Term("content", "ba"));
93+
assertThat(prefixQuery.getRewriteMethod().getClass().getName(), equalTo(PrefixQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT.getClass().getName()));
94+
CustomFieldQuery.reader.set(reader);
95+
fragment = highlighter.getBestFragment(new CustomFieldQuery(prefixQuery, highlighter),
96+
reader, topDocs.scoreDocs[0].doc, "content", 30);
97+
assertThat(fragment, notNullValue());
98+
99+
System.out.println(fragment);
100+
}
101+
64102
@Test public void testVectorHighlighterNoStore() throws Exception {
65103
Directory dir = new RAMDirectory();
66104
IndexWriter indexWriter = new IndexWriter(dir, Lucene.STANDARD_ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED);

0 commit comments

Comments
 (0)