39
39
import org .apache .lucene .search .spans .SpanNearQuery ;
40
40
import org .apache .lucene .search .spans .SpanOrQuery ;
41
41
import org .apache .lucene .search .spans .SpanQuery ;
42
+ import org .apache .lucene .search .spans .SpanTermQuery ;
42
43
import org .apache .lucene .util .BytesRef ;
43
44
import org .apache .lucene .util .IOUtils ;
44
45
import org .apache .lucene .util .automaton .RegExp ;
46
+ import org .apache .lucene .util .graph .GraphTokenStreamFiniteStrings ;
47
+ import org .apache .lucene .util .QueryBuilder ;
48
+ import org .elasticsearch .common .Booleans ;
49
+ import org .elasticsearch .common .logging .DeprecationLogger ;
50
+ import org .elasticsearch .common .logging .Loggers ;
45
51
import org .elasticsearch .common .lucene .search .Queries ;
46
52
import org .elasticsearch .common .unit .Fuzziness ;
47
53
import org .elasticsearch .index .mapper .AllFieldMapper ;
58
64
import java .util .ArrayList ;
59
65
import java .util .Collection ;
60
66
import java .util .HashMap ;
67
+ import java .util .Iterator ;
61
68
import java .util .List ;
62
69
import java .util .Map ;
63
70
import java .util .Collections ;
71
+
64
72
import static java .util .Collections .unmodifiableMap ;
65
73
import static org .elasticsearch .common .lucene .search .Queries .fixNegativeQueryIfNeeded ;
66
74
72
80
* as well as the query on the name.
73
81
*/
74
82
public class MapperQueryParser extends AnalyzingQueryParser {
83
+ private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger (Loggers .getLogger (MapperQueryParser .class ));
75
84
76
85
public static final Map <String , FieldQueryExtension > FIELD_QUERY_EXTENSIONS ;
77
86
@@ -828,6 +837,7 @@ public Query parse(String query) throws ParseException {
828
837
* Checks if graph analysis should be enabled for the field depending
829
838
* on the provided {@link Analyzer}
830
839
*/
840
+ @ Override
831
841
protected Query createFieldQuery (Analyzer analyzer , BooleanClause .Occur operator , String field ,
832
842
String queryText , boolean quoted , int phraseSlop ) {
833
843
assert operator == BooleanClause .Occur .SHOULD || operator == BooleanClause .Occur .MUST ;
@@ -849,4 +859,131 @@ protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator
849
859
throw new RuntimeException ("Error analyzing query text" , e );
850
860
}
851
861
}
862
+
863
+ /**
864
+ * See {@link MapperQueryParser#analyzeGraphPhraseWithLimit}
865
+ */
866
+ @ Override
867
+ protected SpanQuery analyzeGraphPhrase (TokenStream source , String field , int phraseSlop ) throws IOException {
868
+ return analyzeGraphPhraseWithLimit (source , field , phraseSlop , this ::createSpanQuery , shouldApplyGraphPhraseLimit ());
869
+ }
870
+
871
+ /** A BiFuntion that can throw an IOException */
872
+ @ FunctionalInterface
873
+ public interface CheckedBiFunction <T , U , R > {
874
+
875
+ /**
876
+ * Applies this function to the given arguments.
877
+ *
878
+ * @param t the first function argument
879
+ * @param u the second function argument
880
+ * @return the function result
881
+ */
882
+ R apply (T t , U u ) throws IOException ;
883
+ }
884
+
885
+ /**
886
+ * Checks the value of the JVM option <code>es.query.write.apply_graph_phrase_limit</code> to determine
887
+ * if the analysis of graph phrase should be limited to {@link BooleanQuery#getMaxClauseCount()}.
888
+ * The JVM option can only be set to <code>true</code> (false is the default value), any other value
889
+ * will throw an {@link IllegalArgumentException}.
890
+ */
891
+ public static boolean shouldApplyGraphPhraseLimit () {
892
+ String value = System .getProperty ("es.query.apply_graph_phrase_limit" );
893
+ if (value == null ) {
894
+ return false ;
895
+ } else if ("true" .equals (value ) == false ) {
896
+ throw new IllegalArgumentException ("[" + value + "] is not a valid value for the JVM option:" +
897
+ "[es.query.apply_graph_phrase_limit]. Set it to [true] to activate the limit." );
898
+ } else {
899
+ return true ;
900
+ }
901
+ }
902
+
903
+ /**
904
+ * Overrides {@link QueryBuilder#analyzeGraphPhrase(TokenStream, String, int)} to add
905
+ * a limit (see {@link BooleanQuery#getMaxClauseCount()}) to the number of {@link SpanQuery}
906
+ * that this method can create.
907
+ */
908
+ public static SpanQuery analyzeGraphPhraseWithLimit (TokenStream source , String field , int phraseSlop ,
909
+ CheckedBiFunction <TokenStream , String , SpanQuery > spanQueryFunc ,
910
+ boolean isHardLimit ) throws IOException {
911
+ GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings (source );
912
+ List <SpanQuery > clauses = new ArrayList <>();
913
+ int [] articulationPoints = graph .articulationPoints ();
914
+ int lastState = 0 ;
915
+ int maxBooleanClause = BooleanQuery .getMaxClauseCount ();
916
+ for (int i = 0 ; i <= articulationPoints .length ; i ++) {
917
+ int start = lastState ;
918
+ int end = -1 ;
919
+ if (i < articulationPoints .length ) {
920
+ end = articulationPoints [i ];
921
+ }
922
+ lastState = end ;
923
+ final SpanQuery queryPos ;
924
+ if (graph .hasSidePath (start )) {
925
+ List <SpanQuery > queries = new ArrayList <>();
926
+ Iterator <TokenStream > it = graph .getFiniteStrings (start , end );
927
+ while (it .hasNext ()) {
928
+ TokenStream ts = it .next ();
929
+ SpanQuery q = spanQueryFunc .apply (ts , field );
930
+ if (q != null ) {
931
+ if (queries .size () >= maxBooleanClause ) {
932
+ if (isHardLimit ) {
933
+ throw new BooleanQuery .TooManyClauses ();
934
+ } else {
935
+
936
+ }
937
+ }
938
+ queries .add (q );
939
+ }
940
+ }
941
+ if (queries .size () > 0 ) {
942
+ queryPos = new SpanOrQuery (queries .toArray (new SpanQuery [0 ]));
943
+ } else {
944
+ queryPos = null ;
945
+ }
946
+ } else {
947
+ Term [] terms = graph .getTerms (field , start );
948
+ assert terms .length > 0 ;
949
+ if (terms .length >= maxBooleanClause ) {
950
+ if (isHardLimit ) {
951
+ throw new BooleanQuery .TooManyClauses ();
952
+ } else {
953
+ DEPRECATION_LOGGER .deprecated ("Phrase query on field:[" + field + "] reached the max boolean" +
954
+ " clause limit [" + maxBooleanClause + "] after expansion. This query will throw an error in" +
955
+ " the next major version." );
956
+ }
957
+ }
958
+ if (terms .length == 1 ) {
959
+ queryPos = new SpanTermQuery (terms [0 ]);
960
+ } else {
961
+ SpanTermQuery [] orClauses = new SpanTermQuery [terms .length ];
962
+ for (int idx = 0 ; idx < terms .length ; idx ++) {
963
+ orClauses [idx ] = new SpanTermQuery (terms [idx ]);
964
+ }
965
+ queryPos = new SpanOrQuery (orClauses );
966
+ }
967
+ }
968
+ if (queryPos != null ) {
969
+ if (clauses .size () >= maxBooleanClause ) {
970
+ if (isHardLimit ) {
971
+ throw new BooleanQuery .TooManyClauses ();
972
+ } else {
973
+ DEPRECATION_LOGGER .deprecated ("Phrase query on field:[" + field + "] reached the max boolean" +
974
+ " clause limit [" + maxBooleanClause + "] after expansion. This query will throw an error in" +
975
+ " the next major version." );
976
+ }
977
+ }
978
+ clauses .add (queryPos );
979
+ }
980
+ }
981
+ if (clauses .isEmpty ()) {
982
+ return null ;
983
+ } else if (clauses .size () == 1 ) {
984
+ return clauses .get (0 );
985
+ } else {
986
+ return new SpanNearQuery (clauses .toArray (new SpanQuery [0 ]), phraseSlop , true );
987
+ }
988
+ }
852
989
}
0 commit comments