Skip to content

Commit d805c41

Browse files
committed
Added new terms_set query
This query returns documents that match with at least one ore more of the provided terms. The number of terms that must match varies per document and is either controlled by a minimum should match field or computed per document in a minimum should match script. Closes elastic#26915
1 parent 354862c commit d805c41

File tree

7 files changed

+751
-1
lines changed

7 files changed

+751
-1
lines changed

core/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,7 @@ public static TermsQueryBuilder fromXContent(XContentParser parser) throws IOExc
391391
.queryName(queryName);
392392
}
393393

394-
private static List<Object> parseValues(XContentParser parser) throws IOException {
394+
static List<Object> parseValues(XContentParser parser) throws IOException {
395395
List<Object> values = new ArrayList<>();
396396
while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
397397
Object value = parser.objectBytes();
Lines changed: 369 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,369 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.index.query;
20+
21+
import org.apache.lucene.index.DocValues;
22+
import org.apache.lucene.index.LeafReaderContext;
23+
import org.apache.lucene.index.NumericDocValues;
24+
import org.apache.lucene.index.SortedNumericDocValues;
25+
import org.apache.lucene.index.Term;
26+
import org.apache.lucene.search.BooleanQuery;
27+
import org.apache.lucene.search.CoveringQuery;
28+
import org.apache.lucene.search.DoubleValues;
29+
import org.apache.lucene.search.LongValues;
30+
import org.apache.lucene.search.LongValuesSource;
31+
import org.apache.lucene.search.Query;
32+
import org.apache.lucene.search.TermQuery;
33+
import org.elasticsearch.common.ParseField;
34+
import org.elasticsearch.common.ParsingException;
35+
import org.elasticsearch.common.io.stream.StreamInput;
36+
import org.elasticsearch.common.io.stream.StreamOutput;
37+
import org.elasticsearch.common.lucene.BytesRefs;
38+
import org.elasticsearch.common.lucene.search.Queries;
39+
import org.elasticsearch.common.xcontent.XContentBuilder;
40+
import org.elasticsearch.common.xcontent.XContentParser;
41+
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
42+
import org.elasticsearch.index.mapper.MappedFieldType;
43+
import org.elasticsearch.script.Script;
44+
import org.elasticsearch.script.SearchScript;
45+
46+
import java.io.IOException;
47+
import java.util.ArrayList;
48+
import java.util.HashMap;
49+
import java.util.List;
50+
import java.util.Map;
51+
import java.util.Objects;
52+
53+
public final class TermsSetQueryBuilder extends AbstractQueryBuilder<TermsSetQueryBuilder> {
54+
55+
public static final String NAME = "terms_set";
56+
57+
static final ParseField TERMS_FIELD = new ParseField("terms");
58+
static final ParseField MINIMUM_SHOULD_MATCH_FIELD = new ParseField("minimum_should_match_field");
59+
static final ParseField MINIMUM_SHOULD_MATCH_SCRIPT = new ParseField("minimum_should_match_script");
60+
61+
private final String fieldName;
62+
private final List<?> values;
63+
64+
private String minimumShouldMatchField;
65+
private Script minimumShouldMatchScript;
66+
67+
public TermsSetQueryBuilder(String fieldName, List<?> values) {
68+
this.fieldName = Objects.requireNonNull(fieldName);
69+
this.values = TermsQueryBuilder.convert(Objects.requireNonNull(values));
70+
}
71+
72+
public TermsSetQueryBuilder(StreamInput in) throws IOException {
73+
super(in);
74+
this.fieldName = in.readString();
75+
this.values = (List<?>) in.readGenericValue();
76+
this.minimumShouldMatchField = in.readOptionalString();
77+
this.minimumShouldMatchScript = in.readOptionalWriteable(Script::new);
78+
}
79+
80+
@Override
81+
protected void doWriteTo(StreamOutput out) throws IOException {
82+
out.writeString(fieldName);
83+
out.writeGenericValue(values);
84+
out.writeOptionalString(minimumShouldMatchField);
85+
out.writeOptionalWriteable(minimumShouldMatchScript);
86+
}
87+
88+
public List<?> getValues() {
89+
return values;
90+
}
91+
92+
public String getMinimumShouldMatchField() {
93+
return minimumShouldMatchField;
94+
}
95+
96+
public TermsSetQueryBuilder setMinimumShouldMatchField(String minimumShouldMatchField) {
97+
if (minimumShouldMatchScript != null) {
98+
throw new IllegalArgumentException("A script has already been specified. Cannot specify both a field and script");
99+
}
100+
this.minimumShouldMatchField = minimumShouldMatchField;
101+
return this;
102+
}
103+
104+
public Script getMinimumShouldMatchScript() {
105+
return minimumShouldMatchScript;
106+
}
107+
108+
public TermsSetQueryBuilder setMinimumShouldMatchScript(Script minimumShouldMatchScript) {
109+
if (minimumShouldMatchField != null) {
110+
throw new IllegalArgumentException("A field has already been specified. Cannot specify both a field and script");
111+
}
112+
this.minimumShouldMatchScript = minimumShouldMatchScript;
113+
return this;
114+
}
115+
116+
@Override
117+
protected boolean doEquals(TermsSetQueryBuilder other) {
118+
return Objects.equals(fieldName, this.fieldName) && Objects.equals(values, this.values) &&
119+
Objects.equals(minimumShouldMatchField, this.minimumShouldMatchField) &&
120+
Objects.equals(minimumShouldMatchScript, this.minimumShouldMatchScript);
121+
}
122+
123+
@Override
124+
protected int doHashCode() {
125+
return Objects.hash(fieldName, values, minimumShouldMatchField, minimumShouldMatchScript);
126+
}
127+
128+
@Override
129+
public String getWriteableName() {
130+
return NAME;
131+
}
132+
133+
@Override
134+
protected void doXContent(XContentBuilder builder, Params params) throws IOException {
135+
builder.startObject(NAME);
136+
builder.startObject(fieldName);
137+
builder.field(TERMS_FIELD.getPreferredName(), TermsQueryBuilder.convertBack(values));
138+
if (minimumShouldMatchField != null) {
139+
builder.field(MINIMUM_SHOULD_MATCH_FIELD.getPreferredName(), minimumShouldMatchField);
140+
}
141+
if (minimumShouldMatchScript != null) {
142+
builder.field(MINIMUM_SHOULD_MATCH_SCRIPT.getPreferredName(), minimumShouldMatchScript);
143+
}
144+
printBoostAndQueryName(builder);
145+
builder.endObject();
146+
builder.endObject();
147+
}
148+
149+
public static TermsSetQueryBuilder fromXContent(XContentParser parser) throws IOException {
150+
XContentParser.Token token = parser.nextToken();
151+
if (token != XContentParser.Token.FIELD_NAME) {
152+
throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] unknown token [" + token + "]");
153+
}
154+
String currentFieldName = parser.currentName();
155+
String fieldName = currentFieldName;
156+
157+
token = parser.nextToken();
158+
if (token != XContentParser.Token.START_OBJECT) {
159+
throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] unknown token [" + token + "]");
160+
}
161+
162+
List<Object> values = new ArrayList<>();
163+
String minimumShouldMatchField = null;
164+
Script minimumShouldMatchScript = null;
165+
String queryName = null;
166+
float boost = AbstractQueryBuilder.DEFAULT_BOOST;
167+
168+
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
169+
if (token == XContentParser.Token.FIELD_NAME) {
170+
currentFieldName = parser.currentName();
171+
} else if (token == XContentParser.Token.START_ARRAY) {
172+
if (TERMS_FIELD.match(currentFieldName)) {
173+
values = TermsQueryBuilder.parseValues(parser);
174+
} else {
175+
throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] query does not support ["
176+
+ currentFieldName + "]");
177+
}
178+
} else if (token == XContentParser.Token.START_OBJECT) {
179+
if (MINIMUM_SHOULD_MATCH_SCRIPT.match(currentFieldName)) {
180+
minimumShouldMatchScript = Script.parse(parser);
181+
} else {
182+
throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] query does not support ["
183+
+ currentFieldName + "]");
184+
}
185+
} else if (token.isValue()) {
186+
if (MINIMUM_SHOULD_MATCH_FIELD.match(currentFieldName)) {
187+
minimumShouldMatchField = parser.text();
188+
} else if (AbstractQueryBuilder.BOOST_FIELD.match(currentFieldName)) {
189+
boost = parser.floatValue();
190+
} else if (AbstractQueryBuilder.NAME_FIELD.match(currentFieldName)) {
191+
queryName = parser.text();
192+
} else {
193+
throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] query does not support ["
194+
+ currentFieldName + "]");
195+
}
196+
} else {
197+
throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] unknown token [" + token +
198+
"] after [" + currentFieldName + "]");
199+
}
200+
}
201+
202+
token = parser.nextToken();
203+
if (token != XContentParser.Token.END_OBJECT) {
204+
throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] unknown token [" + token + "]");
205+
}
206+
207+
TermsSetQueryBuilder queryBuilder = new TermsSetQueryBuilder(fieldName, values)
208+
.queryName(queryName).boost(boost);
209+
if (minimumShouldMatchField != null) {
210+
queryBuilder.setMinimumShouldMatchField(minimumShouldMatchField);
211+
}
212+
if (minimumShouldMatchScript != null) {
213+
queryBuilder.setMinimumShouldMatchScript(minimumShouldMatchScript);
214+
}
215+
return queryBuilder;
216+
}
217+
218+
@Override
219+
protected Query doToQuery(QueryShardContext context) throws IOException {
220+
if (values.isEmpty()) {
221+
return Queries.newMatchNoDocsQuery("No terms supplied for \"" + getName() + "\" query.");
222+
}
223+
// Fail before we attempt to create the term queries:
224+
if (values.size() > BooleanQuery.getMaxClauseCount()) {
225+
throw new BooleanQuery.TooManyClauses();
226+
}
227+
228+
final MappedFieldType fieldType = context.fieldMapper(fieldName);
229+
final List<Query> queries = new ArrayList<>(values.size());
230+
for (Object value : values) {
231+
if (fieldType != null) {
232+
queries.add(fieldType.termQuery(value, context));
233+
} else {
234+
queries.add(new TermQuery(new Term(fieldName, BytesRefs.toBytesRef(value))));
235+
}
236+
}
237+
final LongValuesSource longValuesSource;
238+
if (minimumShouldMatchField != null) {
239+
MappedFieldType msmFieldType = context.fieldMapper(minimumShouldMatchField);
240+
if (msmFieldType == null) {
241+
throw new QueryShardException(context, "failed to find minimum_should_match field [" + minimumShouldMatchField + "]");
242+
}
243+
244+
IndexNumericFieldData fieldData = context.getForField(msmFieldType);
245+
longValuesSource = new FieldValuesSource(fieldData);
246+
} else if (minimumShouldMatchScript != null) {
247+
SearchScript.Factory factory = context.getScriptService().compile(minimumShouldMatchScript, SearchScript.CONTEXT);
248+
Map<String, Object> params = new HashMap<>();
249+
params.putAll(minimumShouldMatchScript.getParams());
250+
params.put("num_terms", queries.size());
251+
SearchScript.LeafFactory leafFactory = factory.newFactory(params, context.lookup());
252+
longValuesSource = new ScriptLongValueSource(minimumShouldMatchScript, leafFactory);
253+
} else {
254+
throw new IllegalStateException("No minimum should match has been specified");
255+
}
256+
return new CoveringQuery(queries, longValuesSource);
257+
}
258+
259+
static final class ScriptLongValueSource extends LongValuesSource {
260+
261+
private final Script script;
262+
private final SearchScript.LeafFactory leafFactory;
263+
264+
ScriptLongValueSource(Script script, SearchScript.LeafFactory leafFactory) {
265+
this.script = script;
266+
this.leafFactory = leafFactory;
267+
}
268+
269+
@Override
270+
public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
271+
SearchScript searchScript = leafFactory.newInstance(ctx);
272+
return new LongValues() {
273+
@Override
274+
public long longValue() throws IOException {
275+
return searchScript.runAsLong();
276+
}
277+
278+
@Override
279+
public boolean advanceExact(int doc) throws IOException {
280+
searchScript.setDocument(doc);
281+
return searchScript.run() != null;
282+
}
283+
};
284+
}
285+
286+
@Override
287+
public boolean needsScores() {
288+
return false;
289+
}
290+
291+
@Override
292+
public int hashCode() {
293+
// CoveringQuery with this field value source cannot be cachable
294+
return System.identityHashCode(this);
295+
}
296+
297+
@Override
298+
public boolean equals(Object obj) {
299+
return this == obj;
300+
}
301+
302+
@Override
303+
public String toString() {
304+
return "script(" + script.toString() + ")";
305+
}
306+
307+
}
308+
309+
// Forked from LongValuesSource.FieldValuesSource and changed getValues() method to always use sorted numeric
310+
// doc values, because that is what is being used in NumberFieldMapper.
311+
static class FieldValuesSource extends LongValuesSource {
312+
313+
private final IndexNumericFieldData field;
314+
315+
FieldValuesSource(IndexNumericFieldData field) {
316+
this.field = field;
317+
}
318+
319+
@Override
320+
public boolean equals(Object o) {
321+
if (this == o) return true;
322+
if (o == null || getClass() != o.getClass()) return false;
323+
FieldValuesSource that = (FieldValuesSource) o;
324+
return Objects.equals(field, that.field);
325+
}
326+
327+
@Override
328+
public String toString() {
329+
return "long(" + field + ")";
330+
}
331+
332+
@Override
333+
public int hashCode() {
334+
return Objects.hash(field);
335+
}
336+
337+
@Override
338+
public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
339+
SortedNumericDocValues values = field.load(ctx).getLongValues();
340+
return new LongValues() {
341+
342+
long current = -1;
343+
344+
@Override
345+
public long longValue() throws IOException {
346+
return current;
347+
}
348+
349+
@Override
350+
public boolean advanceExact(int doc) throws IOException {
351+
boolean hasValue = values.advanceExact(doc);
352+
if (hasValue) {
353+
assert values.docValueCount() == 1;
354+
current = values.nextValue();
355+
return true;
356+
} else {
357+
return false;
358+
}
359+
}
360+
};
361+
}
362+
363+
@Override
364+
public boolean needsScores() {
365+
return false;
366+
}
367+
}
368+
369+
}

core/src/main/java/org/elasticsearch/search/SearchModule.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
import org.elasticsearch.index.query.SpanWithinQueryBuilder;
7070
import org.elasticsearch.index.query.TermQueryBuilder;
7171
import org.elasticsearch.index.query.TermsQueryBuilder;
72+
import org.elasticsearch.index.query.TermsSetQueryBuilder;
7273
import org.elasticsearch.index.query.TypeQueryBuilder;
7374
import org.elasticsearch.index.query.WildcardQueryBuilder;
7475
import org.elasticsearch.index.query.WrapperQueryBuilder;
@@ -748,6 +749,7 @@ private void registerQueryParsers(List<SearchPlugin> plugins) {
748749
registerQuery(new QuerySpec<>(GeoPolygonQueryBuilder.NAME, GeoPolygonQueryBuilder::new, GeoPolygonQueryBuilder::fromXContent));
749750
registerQuery(new QuerySpec<>(ExistsQueryBuilder.NAME, ExistsQueryBuilder::new, ExistsQueryBuilder::fromXContent));
750751
registerQuery(new QuerySpec<>(MatchNoneQueryBuilder.NAME, MatchNoneQueryBuilder::new, MatchNoneQueryBuilder::fromXContent));
752+
registerQuery(new QuerySpec<>(TermsSetQueryBuilder.NAME, TermsSetQueryBuilder::new, TermsSetQueryBuilder::fromXContent));
751753

752754
if (ShapesAvailability.JTS_AVAILABLE && ShapesAvailability.SPATIAL4J_AVAILABLE) {
753755
registerQuery(new QuerySpec<>(GeoShapeQueryBuilder.NAME, GeoShapeQueryBuilder::new, GeoShapeQueryBuilder::fromXContent));

0 commit comments

Comments
 (0)