Skip to content

Commit 42b2268

Browse files
authored
Add a limit for graph phrase query expansion (#34061)
Backport of #34031 for 5.6
1 parent 06e8b46 commit 42b2268

File tree

7 files changed

+309
-0
lines changed

7 files changed

+309
-0
lines changed

core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,15 @@
3939
import org.apache.lucene.search.spans.SpanNearQuery;
4040
import org.apache.lucene.search.spans.SpanOrQuery;
4141
import org.apache.lucene.search.spans.SpanQuery;
42+
import org.apache.lucene.search.spans.SpanTermQuery;
4243
import org.apache.lucene.util.BytesRef;
4344
import org.apache.lucene.util.IOUtils;
4445
import org.apache.lucene.util.automaton.RegExp;
46+
import org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings;
47+
import org.apache.lucene.util.QueryBuilder;
48+
import org.elasticsearch.common.Booleans;
49+
import org.elasticsearch.common.logging.DeprecationLogger;
50+
import org.elasticsearch.common.logging.Loggers;
4551
import org.elasticsearch.common.lucene.search.Queries;
4652
import org.elasticsearch.common.unit.Fuzziness;
4753
import org.elasticsearch.index.mapper.AllFieldMapper;
@@ -58,9 +64,11 @@
5864
import java.util.ArrayList;
5965
import java.util.Collection;
6066
import java.util.HashMap;
67+
import java.util.Iterator;
6168
import java.util.List;
6269
import java.util.Map;
6370
import java.util.Collections;
71+
6472
import static java.util.Collections.unmodifiableMap;
6573
import static org.elasticsearch.common.lucene.search.Queries.fixNegativeQueryIfNeeded;
6674

@@ -72,6 +80,7 @@
7280
* as well as the query on the name.
7381
*/
7482
public class MapperQueryParser extends AnalyzingQueryParser {
83+
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(MapperQueryParser.class));
7584

7685
public static final Map<String, FieldQueryExtension> FIELD_QUERY_EXTENSIONS;
7786

@@ -828,6 +837,7 @@ public Query parse(String query) throws ParseException {
828837
* Checks if graph analysis should be enabled for the field depending
829838
* on the provided {@link Analyzer}
830839
*/
840+
@Override
831841
protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field,
832842
String queryText, boolean quoted, int phraseSlop) {
833843
assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST;
@@ -849,4 +859,131 @@ protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator
849859
throw new RuntimeException("Error analyzing query text", e);
850860
}
851861
}
862+
863+
/**
864+
* See {@link MapperQueryParser#analyzeGraphPhraseWithLimit}
865+
*/
866+
@Override
867+
protected SpanQuery analyzeGraphPhrase(TokenStream source, String field, int phraseSlop) throws IOException {
868+
return analyzeGraphPhraseWithLimit(source, field, phraseSlop, this::createSpanQuery, shouldApplyGraphPhraseLimit());
869+
}
870+
871+
/** A BiFuntion that can throw an IOException */
872+
@FunctionalInterface
873+
public interface CheckedBiFunction<T, U, R> {
874+
875+
/**
876+
* Applies this function to the given arguments.
877+
*
878+
* @param t the first function argument
879+
* @param u the second function argument
880+
* @return the function result
881+
*/
882+
R apply(T t, U u) throws IOException;
883+
}
884+
885+
/**
886+
* Checks the value of the JVM option <code>es.query.write.apply_graph_phrase_limit</code> to determine
887+
* if the analysis of graph phrase should be limited to {@link BooleanQuery#getMaxClauseCount()}.
888+
* The JVM option can only be set to <code>true</code> (false is the default value), any other value
889+
* will throw an {@link IllegalArgumentException}.
890+
*/
891+
public static boolean shouldApplyGraphPhraseLimit() {
892+
String value = System.getProperty("es.query.apply_graph_phrase_limit");
893+
if (value == null) {
894+
return false;
895+
} else if ("true".equals(value) == false) {
896+
throw new IllegalArgumentException("[" + value + "] is not a valid value for the JVM option:" +
897+
"[es.query.apply_graph_phrase_limit]. Set it to [true] to activate the limit.");
898+
} else {
899+
return true;
900+
}
901+
}
902+
903+
/**
904+
* Overrides {@link QueryBuilder#analyzeGraphPhrase(TokenStream, String, int)} to add
905+
* a limit (see {@link BooleanQuery#getMaxClauseCount()}) to the number of {@link SpanQuery}
906+
* that this method can create.
907+
*/
908+
public static SpanQuery analyzeGraphPhraseWithLimit(TokenStream source, String field, int phraseSlop,
909+
CheckedBiFunction<TokenStream, String, SpanQuery> spanQueryFunc,
910+
boolean isHardLimit) throws IOException {
911+
GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source);
912+
List<SpanQuery> clauses = new ArrayList<>();
913+
int[] articulationPoints = graph.articulationPoints();
914+
int lastState = 0;
915+
int maxBooleanClause = BooleanQuery.getMaxClauseCount();
916+
for (int i = 0; i <= articulationPoints.length; i++) {
917+
int start = lastState;
918+
int end = -1;
919+
if (i < articulationPoints.length) {
920+
end = articulationPoints[i];
921+
}
922+
lastState = end;
923+
final SpanQuery queryPos;
924+
if (graph.hasSidePath(start)) {
925+
List<SpanQuery> queries = new ArrayList<>();
926+
Iterator<TokenStream> it = graph.getFiniteStrings(start, end);
927+
while (it.hasNext()) {
928+
TokenStream ts = it.next();
929+
SpanQuery q = spanQueryFunc.apply(ts, field);
930+
if (q != null) {
931+
if (queries.size() >= maxBooleanClause) {
932+
if (isHardLimit) {
933+
throw new BooleanQuery.TooManyClauses();
934+
} else {
935+
936+
}
937+
}
938+
queries.add(q);
939+
}
940+
}
941+
if (queries.size() > 0) {
942+
queryPos = new SpanOrQuery(queries.toArray(new SpanQuery[0]));
943+
} else {
944+
queryPos = null;
945+
}
946+
} else {
947+
Term[] terms = graph.getTerms(field, start);
948+
assert terms.length > 0;
949+
if (terms.length >= maxBooleanClause) {
950+
if (isHardLimit) {
951+
throw new BooleanQuery.TooManyClauses();
952+
} else {
953+
DEPRECATION_LOGGER.deprecated("Phrase query on field:[" + field + "] reached the max boolean" +
954+
" clause limit [" + maxBooleanClause + "] after expansion. This query will throw an error in" +
955+
" the next major version.");
956+
}
957+
}
958+
if (terms.length == 1) {
959+
queryPos = new SpanTermQuery(terms[0]);
960+
} else {
961+
SpanTermQuery[] orClauses = new SpanTermQuery[terms.length];
962+
for (int idx = 0; idx < terms.length; idx++) {
963+
orClauses[idx] = new SpanTermQuery(terms[idx]);
964+
}
965+
queryPos = new SpanOrQuery(orClauses);
966+
}
967+
}
968+
if (queryPos != null) {
969+
if (clauses.size() >= maxBooleanClause) {
970+
if (isHardLimit) {
971+
throw new BooleanQuery.TooManyClauses();
972+
} else {
973+
DEPRECATION_LOGGER.deprecated("Phrase query on field:[" + field + "] reached the max boolean" +
974+
" clause limit [" + maxBooleanClause + "] after expansion. This query will throw an error in" +
975+
" the next major version.");
976+
}
977+
}
978+
clauses.add(queryPos);
979+
}
980+
}
981+
if (clauses.isEmpty()) {
982+
return null;
983+
} else if (clauses.size() == 1) {
984+
return clauses.get(0);
985+
} else {
986+
return new SpanNearQuery(clauses.toArray(new SpanQuery[0]), phraseSlop, true);
987+
}
988+
}
852989
}

core/src/main/java/org/elasticsearch/index/query/SimpleQueryParser.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,15 @@
2424
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
2525
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
2626
import org.apache.lucene.index.Term;
27+
import org.apache.lucene.queryparser.classic.MapperQueryParser;
2728
import org.apache.lucene.search.BooleanClause;
2829
import org.apache.lucene.search.BooleanQuery;
2930
import org.apache.lucene.search.BoostQuery;
3031
import org.apache.lucene.search.FuzzyQuery;
3132
import org.apache.lucene.search.PrefixQuery;
3233
import org.apache.lucene.search.Query;
3334
import org.apache.lucene.search.SynonymQuery;
35+
import org.apache.lucene.search.spans.SpanQuery;
3436
import org.apache.lucene.util.BytesRef;
3537
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
3638
import org.elasticsearch.index.mapper.MappedFieldType;
@@ -41,6 +43,9 @@
4143
import java.util.List;
4244
import java.util.ArrayList;
4345

46+
import static org.apache.lucene.queryparser.classic.MapperQueryParser.analyzeGraphPhraseWithLimit;
47+
import static org.apache.lucene.queryparser.classic.MapperQueryParser.shouldApplyGraphPhraseLimit;
48+
4449
/**
4550
* Wrapper class for Lucene's SimpleQueryParser that allows us to redefine
4651
* different types of queries.
@@ -173,6 +178,7 @@ public Query newPrefixQuery(String text) {
173178
* Checks if graph analysis should be enabled for the field depending
174179
* on the provided {@link Analyzer}
175180
*/
181+
@Override
176182
protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field,
177183
String queryText, boolean quoted, int phraseSlop) {
178184
assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST;
@@ -195,6 +201,14 @@ protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator
195201
}
196202
}
197203

204+
/**
205+
* See {@link MapperQueryParser#analyzeGraphPhraseWithLimit}
206+
*/
207+
@Override
208+
protected SpanQuery analyzeGraphPhrase(TokenStream source, String field, int phraseSlop) throws IOException {
209+
return analyzeGraphPhraseWithLimit(source, field, phraseSlop, this::createSpanQuery, shouldApplyGraphPhraseLimit());
210+
}
211+
198212
private static Query wrapWithBoost(Query query, float boost) {
199213
if (boost != AbstractQueryBuilder.DEFAULT_BOOST) {
200214
return new BoostQuery(query, boost);

core/src/main/java/org/elasticsearch/index/search/MatchQuery.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.lucene.analysis.TokenStream;
2525
import org.apache.lucene.index.Term;
2626
import org.apache.lucene.queries.ExtendedCommonTermsQuery;
27+
import org.apache.lucene.queryparser.classic.MapperQueryParser;
2728
import org.apache.lucene.search.BooleanClause;
2829
import org.apache.lucene.search.BooleanClause.Occur;
2930
import org.apache.lucene.search.BooleanQuery;
@@ -58,6 +59,9 @@
5859

5960
import java.io.IOException;
6061

62+
import static org.apache.lucene.queryparser.classic.MapperQueryParser.analyzeGraphPhraseWithLimit;
63+
import static org.apache.lucene.queryparser.classic.MapperQueryParser.shouldApplyGraphPhraseLimit;
64+
6165
public class MatchQuery {
6266

6367
public enum Type implements Writeable {
@@ -349,6 +353,14 @@ protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator
349353
}
350354
}
351355

356+
/**
357+
* See {@link MapperQueryParser#analyzeGraphPhraseWithLimit}
358+
*/
359+
@Override
360+
protected SpanQuery analyzeGraphPhrase(TokenStream source, String field, int phraseSlop) throws IOException {
361+
return analyzeGraphPhraseWithLimit(source, field, phraseSlop, this::createSpanQuery, shouldApplyGraphPhraseLimit());
362+
}
363+
352364
public Query createPhrasePrefixQuery(String field, String queryText, int phraseSlop, int maxExpansions) {
353365
final Query query = createFieldQuery(getAnalyzer(), Occur.MUST, field, queryText, true, phraseSlop);
354366
return toMultiPhrasePrefix(query, phraseSlop, maxExpansions);

core/src/main/java/org/elasticsearch/search/SearchModule.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
package org.elasticsearch.search;
2121

22+
import org.apache.lucene.queryparser.classic.MapperQueryParser;
2223
import org.apache.lucene.search.BooleanQuery;
2324
import org.elasticsearch.common.NamedRegistry;
2425
import org.elasticsearch.common.geo.ShapesAvailability;
@@ -258,6 +259,7 @@
258259

259260
import static java.util.Collections.unmodifiableMap;
260261
import static java.util.Objects.requireNonNull;
262+
import static org.apache.lucene.queryparser.classic.MapperQueryParser.shouldApplyGraphPhraseLimit;
261263

262264
/**
263265
* Sets up things that can be done at search time like queries, aggregations, and suggesters.
@@ -282,6 +284,8 @@ public class SearchModule {
282284
public SearchModule(Settings settings, boolean transportClient, List<SearchPlugin> plugins) {
283285
this.settings = settings;
284286
this.transportClient = transportClient;
287+
// checks if the system property es.query.apply_graph_phrase_limit is set to a valid value
288+
shouldApplyGraphPhraseLimit();
285289
registerSuggesters(plugins);
286290
highlighters = setupHighlighters(settings, plugins);
287291
registerScoreFunctions(plugins);

0 commit comments

Comments
 (0)