Skip to content

Commit a684c4c

Browse files
Christoph BüscherGurkan Kaymak
Christoph Büscher
authored and
Gurkan Kaymak
committed
Make Fuzziness reject illegal values earlier (elastic#33511)
The current java implementation of Fuzziness leaves a lot of room for initializing it with illegal values that either cause errors later when the queries reach the shards where they are executed or values that are silently ignored in favour of defaults. We should instead tighten the java implementation of the class so that we only accept supported values. Currently those are numeric values representing the edit distances 0, 1 and 2, optionally also as float or string, and the "AUTO" fuzziness, which can come in a cusomizable variant that allows specifying two value that define the positions in a term where the AUTO option increases the allowed edit distance. This change removes several redundant ways of object construction and adds input validation to the remaining ones. Java users should either use one of the predefined constants or use the static factory methods `fromEdits(int)` or `fromString(String)` to create instances of the class, while other ctors are hidden. This allows for instance control, e.g. returning one of the constants when creating instances from an integer value. Previously the class would accept any positive integer value and any float value, while in effect the maximum allowed edit distance was capped at 2 in practice. These values while throw an error now, as will any other String value other than "AUTO" that where previously accepted but led to numeric exceptions when the query was executed.
1 parent 4071a73 commit a684c4c

File tree

17 files changed

+264
-183
lines changed

17 files changed

+264
-183
lines changed

docs/reference/migration/migrate_8_0.asciidoc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ coming[8.0.0]
1616
* <<breaking_80_mappings_changes>>
1717
* <<breaking_80_snapshots_changes>>
1818
* <<breaking_80_security_changes>>
19+
* <<breaking_80_java_changes>>
1920

2021
//NOTE: The notable-breaking-changes tagged regions are re-used in the
2122
//Installation and Upgrade Guide
@@ -43,3 +44,4 @@ include::migrate_8_0/discovery.asciidoc[]
4344
include::migrate_8_0/mappings.asciidoc[]
4445
include::migrate_8_0/snapshots.asciidoc[]
4546
include::migrate_8_0/security.asciidoc[]
47+
include::migrate_8_0/java.asciidoc[]
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
[float]
2+
[[breaking_80_java_changes]]
3+
=== Java API changes
4+
5+
[float]
6+
==== Changes to Fuzziness
7+
8+
To create `Fuzziness` instances, use the `fromString` and `fromEdits` method
9+
instead of the `build` method that used to accept both Strings and numeric
10+
values. Several fuzziness setters on query builders (e.g.
11+
MatchQueryBuilder#fuzziness) now accept only a `Fuzziness`instance instead of
12+
an Object. You should preferably use the available constants (e.g.
13+
Fuzziness.ONE, Fuzziness.AUTO) or build your own instance using the above
14+
mentioned factory methods.
15+
16+
Fuzziness used to be lenient when it comes to parsing arbitrary numeric values
17+
while silently truncating them to one of the three allowed edit distances 0, 1
18+
or 2. This leniency is now removed and the class will throw errors when trying
19+
to construct an instance with another value (e.g. floats like 1.3 used to get
20+
accepted but truncated to 1). You should use one of the allowed values.

server/src/main/java/org/elasticsearch/common/unit/Fuzziness.java

Lines changed: 74 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import org.elasticsearch.ElasticsearchParseException;
2222
import org.elasticsearch.common.ParseField;
23+
import org.elasticsearch.common.Strings;
2324
import org.elasticsearch.common.io.stream.StreamInput;
2425
import org.elasticsearch.common.io.stream.StreamOutput;
2526
import org.elasticsearch.common.io.stream.Writeable;
@@ -38,41 +39,73 @@
3839
*/
3940
public final class Fuzziness implements ToXContentFragment, Writeable {
4041

41-
public static final String X_FIELD_NAME = "fuzziness";
42-
public static final Fuzziness ZERO = new Fuzziness(0);
43-
public static final Fuzziness ONE = new Fuzziness(1);
44-
public static final Fuzziness TWO = new Fuzziness(2);
45-
public static final Fuzziness AUTO = new Fuzziness("AUTO");
42+
static final String X_FIELD_NAME = "fuzziness";
4643
public static final ParseField FIELD = new ParseField(X_FIELD_NAME);
47-
private static final int DEFAULT_LOW_DISTANCE = 3;
48-
private static final int DEFAULT_HIGH_DISTANCE = 6;
44+
45+
public static final Fuzziness ZERO = new Fuzziness("0");
46+
public static final Fuzziness ONE = new Fuzziness("1");
47+
public static final Fuzziness TWO = new Fuzziness("2");
48+
public static final Fuzziness AUTO = new Fuzziness("AUTO");
49+
50+
static final int DEFAULT_LOW_DISTANCE = 3;
51+
static final int DEFAULT_HIGH_DISTANCE = 6;
4952

5053
private final String fuzziness;
5154
private int lowDistance = DEFAULT_LOW_DISTANCE;
5255
private int highDistance = DEFAULT_HIGH_DISTANCE;
5356

54-
private Fuzziness(int fuzziness) {
55-
if (fuzziness != 0 && fuzziness != 1 && fuzziness != 2) {
56-
throw new IllegalArgumentException("Valid edit distances are [0, 1, 2] but was [" + fuzziness + "]");
57-
}
58-
this.fuzziness = Integer.toString(fuzziness);
57+
private Fuzziness(String fuzziness) {
58+
this.fuzziness = fuzziness;
5959
}
6060

61-
private Fuzziness(String fuzziness) {
62-
if (fuzziness == null || fuzziness.isEmpty()) {
63-
throw new IllegalArgumentException("fuzziness can't be null!");
61+
/**
62+
* Creates a {@link Fuzziness} instance from an edit distance. The value must be one of {@code [0, 1, 2]}
63+
* Note: Using this method only makes sense if the field you are applying Fuzziness to is some sort of string.
64+
* @throws IllegalArgumentException if the edit distance is not in [0, 1, 2]
65+
*/
66+
public static Fuzziness fromEdits(int edits) {
67+
switch (edits) {
68+
case 0: return Fuzziness.ZERO;
69+
case 1: return Fuzziness.ONE;
70+
case 2: return Fuzziness.TWO;
71+
default:
72+
throw new IllegalArgumentException("Valid edit distances are [0, 1, 2] but was [" + edits + "]");
6473
}
65-
this.fuzziness = fuzziness.toUpperCase(Locale.ROOT);
6674
}
6775

68-
private Fuzziness(String fuzziness, int lowDistance, int highDistance) {
69-
this(fuzziness);
70-
if (lowDistance < 0 || highDistance < 0 || lowDistance > highDistance) {
71-
throw new IllegalArgumentException("fuzziness wrongly configured, must be: lowDistance > 0, highDistance" +
72-
" > 0 and lowDistance <= highDistance ");
76+
/**
77+
* Creates a {@link Fuzziness} instance from a String representation. This can
78+
* either be an edit distance where the value must be one of
79+
* {@code ["0", "1", "2"]} or "AUTO" for a fuzziness that depends on the term
80+
* length. Using the "AUTO" fuzziness, you can optionally supply low and high
81+
* distance arguments in the format {@code "AUTO:[low],[high]"}. See the query
82+
* DSL documentation for more information about how these values affect the
83+
* fuzziness value.
84+
* Note: Using this method only makes sense if the field you
85+
* are applying Fuzziness to is some sort of string.
86+
*/
87+
public static Fuzziness fromString(String fuzzinessString) {
88+
if (Strings.isEmpty(fuzzinessString)) {
89+
throw new IllegalArgumentException("fuzziness cannot be null or empty.");
90+
}
91+
String upperCase = fuzzinessString.toUpperCase(Locale.ROOT);
92+
// check if it is one of the "AUTO" variants
93+
if (upperCase.equals("AUTO")) {
94+
return Fuzziness.AUTO;
95+
} else if (upperCase.startsWith("AUTO:")) {
96+
return parseCustomAuto(upperCase);
97+
} else {
98+
// should be a float or int representing a valid edit distance, otherwise throw error
99+
try {
100+
float parsedFloat = Float.parseFloat(upperCase);
101+
if (parsedFloat % 1 > 0) {
102+
throw new IllegalArgumentException("fuzziness needs to be one of 0.0, 1.0 or 2.0 but was " + parsedFloat);
103+
}
104+
return fromEdits((int) parsedFloat);
105+
} catch (NumberFormatException e) {
106+
throw new IllegalArgumentException("fuzziness cannot be [" + fuzzinessString + "].", e);
107+
}
73108
}
74-
this.lowDistance = lowDistance;
75-
this.highDistance = highDistance;
76109
}
77110

78111
/**
@@ -101,39 +134,23 @@ public void writeTo(StreamOutput out) throws IOException {
101134
}
102135
}
103136

104-
/**
105-
* Creates a {@link Fuzziness} instance from an edit distance. The value must be one of {@code [0, 1, 2]}
106-
*
107-
* Note: Using this method only makes sense if the field you are applying Fuzziness to is some sort of string.
108-
*/
109-
public static Fuzziness fromEdits(int edits) {
110-
return new Fuzziness(edits);
111-
}
112-
113-
public static Fuzziness build(Object fuzziness) {
114-
if (fuzziness instanceof Fuzziness) {
115-
return (Fuzziness) fuzziness;
116-
}
117-
String string = fuzziness.toString();
118-
if (AUTO.asString().equalsIgnoreCase(string)) {
119-
return AUTO;
120-
} else if (string.toUpperCase(Locale.ROOT).startsWith(AUTO.asString() + ":")) {
121-
return parseCustomAuto(string);
122-
}
123-
return new Fuzziness(string);
124-
}
125-
126-
private static Fuzziness parseCustomAuto( final String string) {
127-
assert string.toUpperCase(Locale.ROOT).startsWith(AUTO.asString() + ":");
128-
String[] fuzzinessLimit = string.substring(AUTO.asString().length() + 1).split(",");
137+
private static Fuzziness parseCustomAuto(final String fuzzinessString) {
138+
assert fuzzinessString.toUpperCase(Locale.ROOT).startsWith(AUTO.asString() + ":");
139+
String[] fuzzinessLimit = fuzzinessString.substring(AUTO.asString().length() + 1).split(",");
129140
if (fuzzinessLimit.length == 2) {
130141
try {
131142
int lowerLimit = Integer.parseInt(fuzzinessLimit[0]);
132143
int highLimit = Integer.parseInt(fuzzinessLimit[1]);
133-
return new Fuzziness("AUTO", lowerLimit, highLimit);
144+
if (lowerLimit < 0 || highLimit < 0 || lowerLimit > highLimit) {
145+
throw new ElasticsearchParseException("fuzziness wrongly configured [{}]. Must be 0 < lower value <= higher value.",
146+
fuzzinessString);
147+
}
148+
Fuzziness fuzziness = new Fuzziness("AUTO");
149+
fuzziness.lowDistance = lowerLimit;
150+
fuzziness.highDistance = highLimit;
151+
return fuzziness;
134152
} catch (NumberFormatException e) {
135-
throw new ElasticsearchParseException("failed to parse [{}] as a \"auto:int,int\"", e,
136-
string);
153+
throw new ElasticsearchParseException("failed to parse [{}] as a \"auto:int,int\"", e, fuzzinessString);
137154
}
138155
} else {
139156
throw new ElasticsearchParseException("failed to find low and high distance values");
@@ -144,29 +161,9 @@ public static Fuzziness parse(XContentParser parser) throws IOException {
144161
XContentParser.Token token = parser.currentToken();
145162
switch (token) {
146163
case VALUE_STRING:
164+
return fromString(parser.text());
147165
case VALUE_NUMBER:
148-
final String fuzziness = parser.text();
149-
if (AUTO.asString().equalsIgnoreCase(fuzziness)) {
150-
return AUTO;
151-
} else if (fuzziness.toUpperCase(Locale.ROOT).startsWith(AUTO.asString() + ":")) {
152-
return parseCustomAuto(fuzziness);
153-
}
154-
try {
155-
final int minimumSimilarity = Integer.parseInt(fuzziness);
156-
switch (minimumSimilarity) {
157-
case 0:
158-
return ZERO;
159-
case 1:
160-
return ONE;
161-
case 2:
162-
return TWO;
163-
default:
164-
return build(fuzziness);
165-
}
166-
} catch (NumberFormatException ex) {
167-
return build(fuzziness);
168-
}
169-
166+
return fromEdits(parser.intValue());
170167
default:
171168
throw new IllegalArgumentException("Can't parse fuzziness on token: [" + token + "]");
172169
}
@@ -200,7 +197,7 @@ public float asFloat() {
200197
if (this.equals(AUTO) || isAutoWithCustomValues()) {
201198
return 1f;
202199
}
203-
return Float.parseFloat(fuzziness.toString());
200+
return Float.parseFloat(fuzziness);
204201
}
205202

206203
private int termLen(String text) {
@@ -209,13 +206,13 @@ private int termLen(String text) {
209206

210207
public String asString() {
211208
if (isAutoWithCustomValues()) {
212-
return fuzziness.toString() + ":" + lowDistance + "," + highDistance;
209+
return fuzziness + ":" + lowDistance + "," + highDistance;
213210
}
214-
return fuzziness.toString();
211+
return fuzziness;
215212
}
216213

217214
private boolean isAutoWithCustomValues() {
218-
return fuzziness.startsWith("AUTO") && (lowDistance != DEFAULT_LOW_DISTANCE ||
215+
return fuzziness.equals("AUTO") && (lowDistance != DEFAULT_LOW_DISTANCE ||
219216
highDistance != DEFAULT_HIGH_DISTANCE);
220217
}
221218

server/src/main/java/org/elasticsearch/index/query/MatchBoolPrefixQueryBuilder.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,8 @@ public String minimumShouldMatch() {
161161
}
162162

163163
/** Sets the fuzziness used when evaluated to a fuzzy query type. Defaults to "AUTO". */
164-
public MatchBoolPrefixQueryBuilder fuzziness(Object fuzziness) {
165-
this.fuzziness = Fuzziness.build(fuzziness);
164+
public MatchBoolPrefixQueryBuilder fuzziness(Fuzziness fuzziness) {
165+
this.fuzziness = fuzziness;
166166
return this;
167167
}
168168

server/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,8 @@ public String analyzer() {
183183
}
184184

185185
/** Sets the fuzziness used when evaluated to a fuzzy query type. Defaults to "AUTO". */
186-
public MatchQueryBuilder fuzziness(Object fuzziness) {
187-
this.fuzziness = Fuzziness.build(fuzziness);
186+
public MatchQueryBuilder fuzziness(Fuzziness fuzziness) {
187+
this.fuzziness = Objects.requireNonNull(fuzziness);
188188
return this;
189189
}
190190

server/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -376,10 +376,8 @@ public int slop() {
376376
/**
377377
* Sets the fuzziness used when evaluated to a fuzzy query type. Defaults to "AUTO".
378378
*/
379-
public MultiMatchQueryBuilder fuzziness(Object fuzziness) {
380-
if (fuzziness != null) {
381-
this.fuzziness = Fuzziness.build(fuzziness);
382-
}
379+
public MultiMatchQueryBuilder fuzziness(Fuzziness fuzziness) {
380+
this.fuzziness = Objects.requireNonNull(fuzziness);
383381
return this;
384382
}
385383

@@ -707,7 +705,6 @@ public static MultiMatchQueryBuilder fromXContent(XContentParser parser) throws
707705
.type(type)
708706
.analyzer(analyzer)
709707
.cutoffFrequency(cutoffFrequency)
710-
.fuzziness(fuzziness)
711708
.fuzzyRewrite(fuzzyRewrite)
712709
.maxExpansions(maxExpansions)
713710
.minimumShouldMatch(minimumShouldMatch)
@@ -723,6 +720,9 @@ public static MultiMatchQueryBuilder fromXContent(XContentParser parser) throws
723720
if (lenient != null) {
724721
builder.lenient(lenient);
725722
}
723+
if (fuzziness != null) {
724+
builder.fuzziness(fuzziness);
725+
}
726726
return builder;
727727
}
728728

server/src/main/java/org/elasticsearch/index/search/QueryStringQueryParser.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ protected Query handleBareFuzzy(String field, Token fuzzySlop, String termImage)
423423
if (fuzzySlop.image.length() == 1) {
424424
return getFuzzyQuery(field, termImage, fuzziness.asDistance(termImage));
425425
}
426-
return getFuzzyQuery(field, termImage, Fuzziness.build(fuzzySlop.image.substring(1)).asFloat());
426+
return getFuzzyQuery(field, termImage, Fuzziness.fromString(fuzzySlop.image.substring(1)).asFloat());
427427
}
428428

429429
@Override
@@ -434,7 +434,7 @@ protected Query getFuzzyQuery(String field, String termStr, float minSimilarity)
434434
}
435435
List<Query> queries = new ArrayList<>();
436436
for (Map.Entry<String, Float> entry : fields.entrySet()) {
437-
Query q = getFuzzyQuerySingle(entry.getKey(), termStr, minSimilarity);
437+
Query q = getFuzzyQuerySingle(entry.getKey(), termStr, (int) minSimilarity);
438438
assert q != null;
439439
queries.add(applyBoost(q, entry.getValue()));
440440
}
@@ -447,15 +447,15 @@ protected Query getFuzzyQuery(String field, String termStr, float minSimilarity)
447447
}
448448
}
449449

450-
private Query getFuzzyQuerySingle(String field, String termStr, float minSimilarity) throws ParseException {
450+
private Query getFuzzyQuerySingle(String field, String termStr, int minSimilarity) throws ParseException {
451451
currentFieldType = context.fieldMapper(field);
452452
if (currentFieldType == null) {
453453
return newUnmappedFieldQuery(field);
454454
}
455455
try {
456456
Analyzer normalizer = forceAnalyzer == null ? queryBuilder.context.getSearchAnalyzer(currentFieldType) : forceAnalyzer;
457457
BytesRef term = termStr == null ? null : normalizer.normalize(field, termStr);
458-
return currentFieldType.fuzzyQuery(term, Fuzziness.fromEdits((int) minSimilarity),
458+
return currentFieldType.fuzzyQuery(term, Fuzziness.fromEdits(minSimilarity),
459459
getFuzzyPrefixLength(), fuzzyMaxExpansions, fuzzyTranspositions);
460460
} catch (RuntimeException e) {
461461
if (lenient) {
@@ -467,7 +467,7 @@ private Query getFuzzyQuerySingle(String field, String termStr, float minSimilar
467467

468468
@Override
469469
protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
470-
int numEdits = Fuzziness.build(minimumSimilarity).asDistance(term.text());
470+
int numEdits = Fuzziness.fromEdits((int) minimumSimilarity).asDistance(term.text());
471471
FuzzyQuery query = new FuzzyQuery(term, numEdits, prefixLength,
472472
fuzzyMaxExpansions, fuzzyTranspositions);
473473
QueryParsers.setRewriteMethod(query, fuzzyRewriteMethod);

0 commit comments

Comments
 (0)