Skip to content

Commit 4f1ebfa

Browse files
Add bulkScorer to script score query (#46336)
Some queries return bulk scorers that can be significantly faster than iterating naively over the scorer. By giving script_score a BulkScorer that would delegate to the wrapped query, we could make it faster in some cases. Closes #40837
1 parent ac07248 commit 4f1ebfa

File tree

2 files changed

+170
-38
lines changed

2 files changed

+170
-38
lines changed

server/src/main/java/org/elasticsearch/common/lucene/search/function/ScriptScoreQuery.java

+141-38
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,17 @@
2525
import org.apache.lucene.search.BooleanClause;
2626
import org.apache.lucene.search.DocIdSetIterator;
2727
import org.apache.lucene.search.Explanation;
28-
import org.apache.lucene.search.IndexSearcher;
28+
import org.apache.lucene.search.FilterLeafCollector;
29+
import org.apache.lucene.search.LeafCollector;
2930
import org.apache.lucene.search.Query;
31+
import org.apache.lucene.search.Scorable;
32+
import org.apache.lucene.search.Weight;
33+
import org.apache.lucene.search.IndexSearcher;
3034
import org.apache.lucene.search.QueryVisitor;
3135
import org.apache.lucene.search.ScoreMode;
3236
import org.apache.lucene.search.Scorer;
33-
import org.apache.lucene.search.Weight;
37+
import org.apache.lucene.search.BulkScorer;
38+
import org.apache.lucene.util.Bits;
3439
import org.elasticsearch.ElasticsearchException;
3540
import org.elasticsearch.Version;
3641
import org.elasticsearch.script.ScoreScript;
@@ -83,6 +88,19 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo
8388
Weight subQueryWeight = subQuery.createWeight(searcher, subQueryScoreMode, boost);
8489

8590
return new Weight(this){
91+
@Override
92+
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
93+
if (minScore == null) {
94+
final BulkScorer subQueryBulkScorer = subQueryWeight.bulkScorer(context);
95+
if (subQueryBulkScorer == null) {
96+
return null;
97+
}
98+
return new ScriptScoreBulkScorer(subQueryBulkScorer, subQueryScoreMode, makeScoreScript(context));
99+
} else {
100+
return super.bulkScorer(context);
101+
}
102+
}
103+
86104
@Override
87105
public void extractTerms(Set<Term> terms) {
88106
subQueryWeight.extractTerms(terms);
@@ -94,8 +112,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException {
94112
if (subQueryScorer == null) {
95113
return null;
96114
}
97-
Scorer scriptScorer = makeScriptScorer(subQueryScorer, context, null);
98-
115+
Scorer scriptScorer = new ScriptScorer(this, makeScoreScript(context), subQueryScorer, subQueryScoreMode, null);
99116
if (minScore != null) {
100117
scriptScorer = new MinScoreScorer(this, scriptScorer, minScore);
101118
}
@@ -109,7 +126,8 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio
109126
return subQueryExplanation;
110127
}
111128
ExplanationHolder explanationHolder = new ExplanationHolder();
112-
Scorer scorer = makeScriptScorer(subQueryWeight.scorer(context), context, explanationHolder);
129+
Scorer scorer = new ScriptScorer(this, makeScoreScript(context),
130+
subQueryWeight.scorer(context), subQueryScoreMode, explanationHolder);
113131
int newDoc = scorer.iterator().advance(doc);
114132
assert doc == newDoc; // subquery should have already matched above
115133
float score = scorer.score();
@@ -132,42 +150,13 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio
132150
}
133151
return explanation;
134152
}
135-
136-
private Scorer makeScriptScorer(Scorer subQueryScorer, LeafReaderContext context,
137-
ExplanationHolder explanation) throws IOException {
153+
154+
private ScoreScript makeScoreScript(LeafReaderContext context) throws IOException {
138155
final ScoreScript scoreScript = scriptBuilder.newInstance(context);
139-
scoreScript.setScorer(subQueryScorer);
140156
scoreScript._setIndexName(indexName);
141157
scoreScript._setShard(shardId);
142158
scoreScript._setIndexVersion(indexVersion);
143-
144-
return new Scorer(this) {
145-
@Override
146-
public float score() throws IOException {
147-
int docId = docID();
148-
scoreScript.setDocument(docId);
149-
float score = (float) scoreScript.execute(explanation);
150-
if (score == Float.NEGATIVE_INFINITY || Float.isNaN(score)) {
151-
throw new ElasticsearchException(
152-
"script score query returned an invalid score: " + score + " for doc: " + docId);
153-
}
154-
return score;
155-
}
156-
@Override
157-
public int docID() {
158-
return subQueryScorer.docID();
159-
}
160-
161-
@Override
162-
public DocIdSetIterator iterator() {
163-
return subQueryScorer.iterator();
164-
}
165-
166-
@Override
167-
public float getMaxScore(int upTo) {
168-
return Float.MAX_VALUE; // TODO: what would be a good upper bound?
169-
}
170-
};
159+
return scoreScript;
171160
}
172161

173162
@Override
@@ -187,7 +176,7 @@ public void visit(QueryVisitor visitor) {
187176
@Override
188177
public String toString(String field) {
189178
StringBuilder sb = new StringBuilder();
190-
sb.append("script score (").append(subQuery.toString(field)).append(", script: ");
179+
sb.append("script_score (").append(subQuery.toString(field)).append(", script: ");
191180
sb.append("{" + script.toString() + "}");
192181
return sb.toString();
193182
}
@@ -209,4 +198,118 @@ public boolean equals(Object o) {
209198
public int hashCode() {
210199
return Objects.hash(subQuery, script, minScore, indexName, shardId, indexVersion);
211200
}
201+
202+
203+
private static class ScriptScorer extends Scorer {
204+
private final ScoreScript scoreScript;
205+
private final Scorer subQueryScorer;
206+
private final ExplanationHolder explanation;
207+
208+
ScriptScorer(Weight weight, ScoreScript scoreScript, Scorer subQueryScorer,
209+
ScoreMode subQueryScoreMode, ExplanationHolder explanation) {
210+
super(weight);
211+
this.scoreScript = scoreScript;
212+
if (subQueryScoreMode == ScoreMode.COMPLETE) {
213+
scoreScript.setScorer(subQueryScorer);
214+
}
215+
this.subQueryScorer = subQueryScorer;
216+
this.explanation = explanation;
217+
}
218+
219+
@Override
220+
public float score() throws IOException {
221+
int docId = docID();
222+
scoreScript.setDocument(docId);
223+
float score = (float) scoreScript.execute(explanation);
224+
if (score == Float.NEGATIVE_INFINITY || Float.isNaN(score)) {
225+
throw new ElasticsearchException(
226+
"script_score query returned an invalid score [" + score + "] for doc [" + docId + "].");
227+
}
228+
return score;
229+
}
230+
@Override
231+
public int docID() {
232+
return subQueryScorer.docID();
233+
}
234+
235+
@Override
236+
public DocIdSetIterator iterator() {
237+
return subQueryScorer.iterator();
238+
}
239+
240+
@Override
241+
public float getMaxScore(int upTo) {
242+
return Float.MAX_VALUE; // TODO: what would be a good upper bound?
243+
}
244+
245+
}
246+
247+
private static class ScriptScorable extends Scorable {
248+
private final ScoreScript scoreScript;
249+
private final Scorable subQueryScorer;
250+
private final ExplanationHolder explanation;
251+
252+
ScriptScorable(ScoreScript scoreScript, Scorable subQueryScorer,
253+
ScoreMode subQueryScoreMode, ExplanationHolder explanation) {
254+
this.scoreScript = scoreScript;
255+
if (subQueryScoreMode == ScoreMode.COMPLETE) {
256+
scoreScript.setScorer(subQueryScorer);
257+
}
258+
this.subQueryScorer = subQueryScorer;
259+
this.explanation = explanation;
260+
}
261+
262+
@Override
263+
public float score() throws IOException {
264+
int docId = docID();
265+
scoreScript.setDocument(docId);
266+
float score = (float) scoreScript.execute(explanation);
267+
if (score == Float.NEGATIVE_INFINITY || Float.isNaN(score)) {
268+
throw new ElasticsearchException(
269+
"script_score query returned an invalid score [" + score + "] for doc [" + docId + "].");
270+
}
271+
return score;
272+
}
273+
@Override
274+
public int docID() {
275+
return subQueryScorer.docID();
276+
}
277+
}
278+
279+
/**
280+
* Use the {@link BulkScorer} of the sub-query,
281+
* as it may be significantly faster (e.g. BooleanScorer) than iterating over the scorer
282+
*/
283+
private static class ScriptScoreBulkScorer extends BulkScorer {
284+
private final BulkScorer subQueryBulkScorer;
285+
private final ScoreMode subQueryScoreMode;
286+
private final ScoreScript scoreScript;
287+
288+
ScriptScoreBulkScorer(BulkScorer subQueryBulkScorer, ScoreMode subQueryScoreMode, ScoreScript scoreScript) {
289+
this.subQueryBulkScorer = subQueryBulkScorer;
290+
this.subQueryScoreMode = subQueryScoreMode;
291+
this.scoreScript = scoreScript;
292+
}
293+
294+
@Override
295+
public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
296+
return subQueryBulkScorer.score(wrapCollector(collector), acceptDocs, min, max);
297+
}
298+
299+
private LeafCollector wrapCollector(LeafCollector collector) {
300+
return new FilterLeafCollector(collector) {
301+
@Override
302+
public void setScorer(Scorable scorer) throws IOException {
303+
in.setScorer(new ScriptScorable(scoreScript, scorer, subQueryScoreMode, null));
304+
}
305+
};
306+
}
307+
308+
@Override
309+
public long cost() {
310+
return subQueryBulkScorer.cost();
311+
}
312+
313+
}
314+
212315
}

server/src/test/java/org/elasticsearch/search/query/ScriptScoreQueryIT.java

+29
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.elasticsearch.action.search.SearchResponse;
2323
import org.elasticsearch.common.settings.Settings;
2424
import org.elasticsearch.index.fielddata.ScriptDocValues;
25+
import org.elasticsearch.index.query.QueryBuilder;
2526
import org.elasticsearch.index.query.RangeQueryBuilder;
2627
import org.elasticsearch.plugins.Plugin;
2728
import org.elasticsearch.script.MockScriptPlugin;
@@ -35,6 +36,7 @@
3536
import java.util.Map;
3637
import java.util.function.Function;
3738

39+
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
3840
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
3941
import static org.elasticsearch.index.query.QueryBuilders.scriptScoreQuery;
4042
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
@@ -104,6 +106,33 @@ public void testScriptScore() {
104106
assertOrderedSearchHits(resp, "10", "8", "6");
105107
}
106108

109+
public void testScriptScoreBoolQuery() {
110+
assertAcked(
111+
prepareCreate("test-index").addMapping("_doc", "field1", "type=text", "field2", "type=double")
112+
);
113+
int docCount = 10;
114+
for (int i = 1; i <= docCount; i++) {
115+
client().prepareIndex("test-index").setId("" + i)
116+
.setSource("field1", "text" + i, "field2", i)
117+
.get();
118+
}
119+
refresh();
120+
121+
Map<String, Object> params = new HashMap<>();
122+
params.put("param1", 0.1);
123+
Script script = new Script(ScriptType.INLINE, CustomScriptPlugin.NAME, "doc['field2'].value * param1", params);
124+
QueryBuilder boolQuery = boolQuery().should(matchQuery("field1", "text1")).should(matchQuery("field1", "text10"));
125+
SearchResponse resp = client()
126+
.prepareSearch("test-index")
127+
.setQuery(scriptScoreQuery(boolQuery, script))
128+
.get();
129+
assertNoFailures(resp);
130+
assertOrderedSearchHits(resp, "10", "1");
131+
assertFirstHit(resp, hasScore(1.0f));
132+
assertSecondHit(resp, hasScore(0.1f));
133+
}
134+
135+
107136
// test that when the internal query is rewritten script_score works well
108137
public void testRewrittenQuery() {
109138
assertAcked(

0 commit comments

Comments
 (0)