Skip to content

Commit 93f29a4

Browse files
authored
Scripting: enable regular expressions by default (#63029)
* Setting `script.painless.regex.enabled` has a new option, `use-factor`, the default. This defaults to using regular expressions but limiting the complexity of the regular expressions. In addition to `use-factor`, the setting can be `true`, as before, which enables regular expressions without limiting them. `false` totally disables regular expressions, which was the old default. * New setting `script.painless.regex.limit-factor`. This limits regular expression complexity by limiting the number characters a regular expression can consider based on input length. The default is `6`, so a regular expression can consider `6` * input length number of characters. With input `foobarbaz` (length `9`), for example, the regular expression can consider `54` (`6 * 9`) characters. This reduces the impact of exponential backtracking in Java's regular expression engine. * add `@inject_constant` annotation to whitelist. This annotation signals that a compiler settings will be injected at the beginning of a whitelisted method. The format is `argnum=settingname`: `1=foo_setting 2=bar_setting`. Argument numbers must start at one and must be sequential. * Augment `Pattern.split(CharSequence)` `Pattern.split(CharSequence, int)`, `Pattern.splitAsStream(CharSequence)` `Pattern.matcher(CharSequence)` to take the value of `script.painless.regex.limit-factor` as a an injected parameter, limiting as explained above when this setting is in use. Fixes: #49873
1 parent cbf1a19 commit 93f29a4

33 files changed

+1367
-189
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.painless.spi.annotation;
21+
22+
import java.util.Collections;
23+
import java.util.List;
24+
25+
/**
26+
* Inject compiler setting constants.
27+
* Format: {@code inject_constant["1=foo_compiler_setting", 2="bar_compiler_setting"]} injects "foo_compiler_setting and
28+
* "bar_compiler_setting" as the first two arguments (other than receiver reference for instance methods) to the annotated method.
29+
*/
30+
public class InjectConstantAnnotation {
31+
public static final String NAME = "inject_constant";
32+
public final List<String> injects;
33+
public InjectConstantAnnotation(List<String> injects) {
34+
this.injects = Collections.unmodifiableList(injects);
35+
}
36+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.painless.spi.annotation;
21+
22+
import java.util.ArrayList;
23+
import java.util.Map;
24+
25+
public class InjectConstantAnnotationParser implements WhitelistAnnotationParser {
26+
27+
public static final InjectConstantAnnotationParser INSTANCE = new InjectConstantAnnotationParser();
28+
29+
private InjectConstantAnnotationParser() {}
30+
31+
@Override
32+
public Object parse(Map<String, String> arguments) {
33+
if (arguments.isEmpty()) {
34+
throw new IllegalArgumentException("[@inject_constant] requires at least one name to inject");
35+
}
36+
ArrayList<String> argList = new ArrayList<>(arguments.size());
37+
for (int i = 1; i <= arguments.size(); i++) {
38+
String argNum = Integer.toString(i);
39+
if (arguments.containsKey(argNum) == false) {
40+
throw new IllegalArgumentException("[@inject_constant] missing argument number [" + argNum + "]");
41+
}
42+
argList.add(arguments.get(argNum));
43+
}
44+
45+
return new InjectConstantAnnotation(argList);
46+
}
47+
}

modules/lang-painless/spi/src/main/java/org/elasticsearch/painless/spi/annotation/WhitelistAnnotationParser.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ public interface WhitelistAnnotationParser {
3535
Stream.of(
3636
new AbstractMap.SimpleEntry<>(NoImportAnnotation.NAME, NoImportAnnotationParser.INSTANCE),
3737
new AbstractMap.SimpleEntry<>(DeprecatedAnnotation.NAME, DeprecatedAnnotationParser.INSTANCE),
38-
new AbstractMap.SimpleEntry<>(NonDeterministicAnnotation.NAME, NonDeterministicAnnotationParser.INSTANCE)
38+
new AbstractMap.SimpleEntry<>(NonDeterministicAnnotation.NAME, NonDeterministicAnnotationParser.INSTANCE),
39+
new AbstractMap.SimpleEntry<>(InjectConstantAnnotation.NAME, InjectConstantAnnotationParser.INSTANCE)
3940
).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))
4041
);
4142

modules/lang-painless/src/main/java/org/elasticsearch/painless/Compiler.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ ScriptScope compile(Loader loader, String name, String source, CompilerSettings
220220
ScriptScope scriptScope = new ScriptScope(painlessLookup, settings, scriptClassInfo, scriptName, source, root.getIdentifier() + 1);
221221
new PainlessSemanticHeaderPhase().visitClass(root, scriptScope);
222222
new PainlessSemanticAnalysisPhase().visitClass(root, scriptScope);
223-
// TODO(stu): Make this phase optional #60156
223+
// TODO: Make this phase optional #60156
224224
new DocFieldsPhase().visitClass(root, scriptScope);
225225
new PainlessUserTreeToIRTreePhase().visitClass(root, scriptScope);
226226
ClassNode classNode = (ClassNode)scriptScope.getDecoration(root, IRNodeDecoration.class).getIRNode();
@@ -255,7 +255,7 @@ byte[] compile(String name, String source, CompilerSettings settings, Printer de
255255
ScriptScope scriptScope = new ScriptScope(painlessLookup, settings, scriptClassInfo, scriptName, source, root.getIdentifier() + 1);
256256
new PainlessSemanticHeaderPhase().visitClass(root, scriptScope);
257257
new PainlessSemanticAnalysisPhase().visitClass(root, scriptScope);
258-
// TODO(stu): Make this phase optional #60156
258+
// TODO: Make this phase optional #60156
259259
new DocFieldsPhase().visitClass(root, scriptScope);
260260
new PainlessUserTreeToIRTreePhase().visitClass(root, scriptScope);
261261
ClassNode classNode = (ClassNode)scriptScope.getDecoration(root, IRNodeDecoration.class).getIRNode();

modules/lang-painless/src/main/java/org/elasticsearch/painless/CompilerSettings.java

+96-12
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,28 @@
2121

2222
import org.elasticsearch.common.settings.Setting;
2323
import org.elasticsearch.common.settings.Setting.Property;
24+
import org.elasticsearch.painless.api.Augmentation;
25+
26+
import java.util.HashMap;
27+
import java.util.Map;
2428

2529
/**
2630
* Settings to use when compiling a script.
2731
*/
2832
public final class CompilerSettings {
2933
/**
30-
* Are regexes enabled? This is a node level setting because regexes break out of painless's lovely sandbox and can cause stack
31-
* overflows and we can't analyze the regex to be sure it won't.
34+
* Are regexes enabled? If {@code true}, regexes are enabled and unlimited by the limit factor. If {@code false}, they are completely
35+
* disabled. If {@code use-limit}, the default, regexes are enabled but limited in complexity according to the
36+
* {@code script.painless.regex.limit-factor} setting.
37+
*/
38+
public static final Setting<RegexEnabled> REGEX_ENABLED =
39+
new Setting<>("script.painless.regex.enabled", RegexEnabled.LIMITED.value, RegexEnabled::parse, Property.NodeScope);
40+
41+
/**
42+
* How complex can a regex be? This is the number of characters that can be considered expressed as a multiple of string length.
3243
*/
33-
public static final Setting<Boolean> REGEX_ENABLED = Setting.boolSetting("script.painless.regex.enabled", false, Property.NodeScope);
44+
public static final Setting<Integer> REGEX_LIMIT_FACTOR =
45+
Setting.intSetting("script.painless.regex.limit-factor", 6, 1, Property.NodeScope);
3446

3547
/**
3648
* Constant to be used when specifying the maximum loop counter when compiling a script.
@@ -65,12 +77,20 @@ public final class CompilerSettings {
6577
* For testing. Do not use.
6678
*/
6779
private int initialCallSiteDepth = 0;
80+
private int testInject0 = 2;
81+
private int testInject1 = 4;
82+
private int testInject2 = 6;
6883

6984
/**
70-
* Are regexes enabled? They are currently disabled by default because they break out of the loop counter and even fairly simple
71-
* <strong>looking</strong> regexes can cause stack overflows.
85+
* Are regexes enabled? Defaults to using the factor setting.
7286
*/
73-
private boolean regexesEnabled = false;
87+
private RegexEnabled regexesEnabled = RegexEnabled.LIMITED;
88+
89+
90+
/**
91+
* How complex can regexes be? Expressed as a multiple of the input string.
92+
*/
93+
private int regexLimitFactor = 0;
7494

7595
/**
7696
* Returns the value for the cumulative total number of statements that can be made in all loops
@@ -123,18 +143,82 @@ public void setInitialCallSiteDepth(int depth) {
123143
}
124144

125145
/**
126-
* Are regexes enabled? They are currently disabled by default because they break out of the loop counter and even fairly simple
127-
* <strong>looking</strong> regexes can cause stack overflows.
146+
* Are regexes enabled?
128147
*/
129-
public boolean areRegexesEnabled() {
148+
public RegexEnabled areRegexesEnabled() {
130149
return regexesEnabled;
131150
}
132151

133152
/**
134-
* Are regexes enabled? They are currently disabled by default because they break out of the loop counter and even fairly simple
135-
* <strong>looking</strong> regexes can cause stack overflows.
153+
* Are regexes enabled or limited?
136154
*/
137-
public void setRegexesEnabled(boolean regexesEnabled) {
155+
public void setRegexesEnabled(RegexEnabled regexesEnabled) {
138156
this.regexesEnabled = regexesEnabled;
139157
}
158+
159+
/**
160+
* What is the limitation on regex complexity? How many multiples of input length can a regular expression consider?
161+
*/
162+
public void setRegexLimitFactor(int regexLimitFactor) {
163+
this.regexLimitFactor = regexLimitFactor;
164+
}
165+
166+
/**
167+
* What is the limit factor for regexes?
168+
*/
169+
public int getRegexLimitFactor() {
170+
return regexLimitFactor;
171+
}
172+
173+
/**
174+
* Get compiler settings as a map. This is used to inject compiler settings into augmented methods with the {@code @inject_constant}
175+
* annotation.
176+
*/
177+
public Map<String, Object> asMap() {
178+
int regexLimitFactor = this.regexLimitFactor;
179+
if (regexesEnabled == RegexEnabled.TRUE) {
180+
regexLimitFactor = Augmentation.UNLIMITED_PATTERN_FACTOR;
181+
} else if (regexesEnabled == RegexEnabled.FALSE) {
182+
regexLimitFactor = Augmentation.DISABLED_PATTERN_FACTOR;
183+
}
184+
Map<String, Object> map = new HashMap<>();
185+
map.put("regex_limit_factor", regexLimitFactor);
186+
187+
// for testing only
188+
map.put("testInject0", testInject0);
189+
map.put("testInject1", testInject1);
190+
map.put("testInject2", testInject2);
191+
192+
return map;
193+
}
194+
195+
/**
196+
* Options for {@code script.painless.regex.enabled} setting.
197+
*/
198+
public enum RegexEnabled {
199+
TRUE("true"),
200+
FALSE("false"),
201+
LIMITED("limited");
202+
final String value;
203+
204+
RegexEnabled(String value) {
205+
this.value = value;
206+
}
207+
208+
/**
209+
* Parse string value, necessary because `valueOf` would require strings to be upper case.
210+
*/
211+
public static RegexEnabled parse(String value) {
212+
if (TRUE.value.equals(value)) {
213+
return TRUE;
214+
} else if (FALSE.value.equals(value)) {
215+
return FALSE;
216+
} else if (LIMITED.value.equals(value)) {
217+
return LIMITED;
218+
}
219+
throw new IllegalArgumentException(
220+
"invalid value [" + value + "] must be one of [" + TRUE.value + "," + FALSE.value + "," + LIMITED.value + "]"
221+
);
222+
}
223+
}
140224
}

modules/lang-painless/src/main/java/org/elasticsearch/painless/Def.java

+49-23
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,8 @@ static MethodHandle arrayLengthGetter(Class<?> arrayType) {
182182
* Otherwise it returns a handle to the matching method.
183183
* <p>
184184
* @param painlessLookup the whitelist
185+
* @param functions user defined functions and lambdas
186+
* @param constants available constants to be used if the method has the {@code InjectConstantAnnotation}
185187
* @param methodHandlesLookup caller's lookup
186188
* @param callSiteType callsite's type
187189
* @param receiverClass Class of the object to invoke the method on.
@@ -191,8 +193,8 @@ static MethodHandle arrayLengthGetter(Class<?> arrayType) {
191193
* @throws IllegalArgumentException if no matching whitelisted method was found.
192194
* @throws Throwable if a method reference cannot be converted to an functional interface
193195
*/
194-
static MethodHandle lookupMethod(PainlessLookup painlessLookup, FunctionTable functions,
195-
MethodHandles.Lookup methodHandlesLookup, MethodType callSiteType, Class<?> receiverClass, String name, Object args[])
196+
static MethodHandle lookupMethod(PainlessLookup painlessLookup, FunctionTable functions, Map<String, Object> constants,
197+
MethodHandles.Lookup methodHandlesLookup, MethodType callSiteType, Class<?> receiverClass, String name, Object[] args)
196198
throws Throwable {
197199

198200
String recipeString = (String) args[0];
@@ -206,7 +208,15 @@ static MethodHandle lookupMethod(PainlessLookup painlessLookup, FunctionTable fu
206208
"[" + typeToCanonicalTypeName(receiverClass) + ", " + name + "/" + (numArguments - 1) + "] not found");
207209
}
208210

209-
return painlessMethod.methodHandle;
211+
MethodHandle handle = painlessMethod.methodHandle;
212+
Object[] injections = PainlessLookupUtility.buildInjections(painlessMethod, constants);
213+
214+
if (injections.length > 0) {
215+
// method handle contains the "this" pointer so start injections at 1
216+
handle = MethodHandles.insertArguments(handle, 1, injections);
217+
}
218+
219+
return handle;
210220
}
211221

212222
// convert recipe string to a bitset for convenience (the code below should be refactored...)
@@ -236,7 +246,13 @@ static MethodHandle lookupMethod(PainlessLookup painlessLookup, FunctionTable fu
236246
"dynamic method [" + typeToCanonicalTypeName(receiverClass) + ", " + name + "/" + arity + "] not found");
237247
}
238248

239-
MethodHandle handle = method.methodHandle;
249+
MethodHandle handle = method.methodHandle;
250+
Object[] injections = PainlessLookupUtility.buildInjections(method, constants);
251+
252+
if (injections.length > 0) {
253+
// method handle contains the "this" pointer so start injections at 1
254+
handle = MethodHandles.insertArguments(handle, 1, injections);
255+
}
240256

241257
int replaced = 0;
242258
upTo = 1;
@@ -257,22 +273,25 @@ static MethodHandle lookupMethod(PainlessLookup painlessLookup, FunctionTable fu
257273
// we have everything.
258274
filter = lookupReferenceInternal(painlessLookup,
259275
functions,
276+
constants,
260277
methodHandlesLookup,
261278
interfaceType,
262279
type,
263280
call,
264-
numCaptures);
281+
numCaptures
282+
);
265283
} else if (signature.charAt(0) == 'D') {
266284
// the interface type is now known, but we need to get the implementation.
267285
// this is dynamically based on the receiver type (and cached separately, underneath
268286
// this cache). It won't blow up since we never nest here (just references)
269-
Class<?> captures[] = new Class<?>[numCaptures];
287+
Class<?>[] captures = new Class<?>[numCaptures];
270288
for (int capture = 0; capture < captures.length; capture++) {
271289
captures[capture] = callSiteType.parameterType(i + 1 + capture);
272290
}
273291
MethodType nestedType = MethodType.methodType(interfaceType, captures);
274292
CallSite nested = DefBootstrap.bootstrap(painlessLookup,
275293
functions,
294+
constants,
276295
methodHandlesLookup,
277296
call,
278297
nestedType,
@@ -300,8 +319,10 @@ static MethodHandle lookupMethod(PainlessLookup painlessLookup, FunctionTable fu
300319
* This is just like LambdaMetaFactory, only with a dynamic type. The interface type is known,
301320
* so we simply need to lookup the matching implementation method based on receiver type.
302321
*/
303-
static MethodHandle lookupReference(PainlessLookup painlessLookup, FunctionTable functions,
304-
MethodHandles.Lookup methodHandlesLookup, String interfaceClass, Class<?> receiverClass, String name) throws Throwable {
322+
static MethodHandle lookupReference(PainlessLookup painlessLookup, FunctionTable functions, Map<String, Object> constants,
323+
MethodHandles.Lookup methodHandlesLookup, String interfaceClass, Class<?> receiverClass, String name)
324+
throws Throwable {
325+
305326
Class<?> interfaceType = painlessLookup.canonicalTypeNameToType(interfaceClass);
306327
if (interfaceType == null) {
307328
throw new IllegalArgumentException("type [" + interfaceClass + "] not found");
@@ -317,25 +338,30 @@ static MethodHandle lookupReference(PainlessLookup painlessLookup, FunctionTable
317338
"dynamic method [" + typeToCanonicalTypeName(receiverClass) + ", " + name + "/" + arity + "] not found");
318339
}
319340

320-
return lookupReferenceInternal(painlessLookup, functions, methodHandlesLookup,
321-
interfaceType, PainlessLookupUtility.typeToCanonicalTypeName(implMethod.targetClass),
322-
implMethod.javaMethod.getName(), 1);
341+
return lookupReferenceInternal(painlessLookup, functions, constants,
342+
methodHandlesLookup, interfaceType, PainlessLookupUtility.typeToCanonicalTypeName(implMethod.targetClass),
343+
implMethod.javaMethod.getName(), 1);
323344
}
324345

325346
/** Returns a method handle to an implementation of clazz, given method reference signature. */
326-
private static MethodHandle lookupReferenceInternal(PainlessLookup painlessLookup, FunctionTable functions,
327-
MethodHandles.Lookup methodHandlesLookup, Class<?> clazz, String type, String call, int captures) throws Throwable {
328-
final FunctionRef ref = FunctionRef.create(painlessLookup, functions, null, clazz, type, call, captures);
347+
private static MethodHandle lookupReferenceInternal(
348+
PainlessLookup painlessLookup, FunctionTable functions, Map<String, Object> constants,
349+
MethodHandles.Lookup methodHandlesLookup, Class<?> clazz, String type, String call, int captures
350+
) throws Throwable {
351+
352+
final FunctionRef ref = FunctionRef.create(painlessLookup, functions, null, clazz, type, call, captures, constants);
329353
final CallSite callSite = LambdaBootstrap.lambdaBootstrap(
330-
methodHandlesLookup,
331-
ref.interfaceMethodName,
332-
ref.factoryMethodType,
333-
ref.interfaceMethodType,
334-
ref.delegateClassName,
335-
ref.delegateInvokeType,
336-
ref.delegateMethodName,
337-
ref.delegateMethodType,
338-
ref.isDelegateInterface ? 1 : 0
354+
methodHandlesLookup,
355+
ref.interfaceMethodName,
356+
ref.factoryMethodType,
357+
ref.interfaceMethodType,
358+
ref.delegateClassName,
359+
ref.delegateInvokeType,
360+
ref.delegateMethodName,
361+
ref.delegateMethodType,
362+
ref.isDelegateInterface ? 1 : 0,
363+
ref.isDelegateAugmented ? 1 : 0,
364+
ref.delegateInjections
339365
);
340366
return callSite.dynamicInvoker().asType(MethodType.methodType(clazz, ref.factoryMethodType.parameterArray()));
341367
}

0 commit comments

Comments
 (0)