Skip to content

Commit cf9c57a

Browse files
committed
Revert "Wildcard field - add normalizer support (elastic#53851) (elastic#54109)"
This reverts commit 6a60f85.
1 parent 8126ad0 commit cf9c57a

File tree

8 files changed

+43
-204
lines changed

8 files changed

+43
-204
lines changed

docs/reference/mapping/types/wildcard.asciidoc

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -50,23 +50,6 @@ POST my_index/_doc/_search
5050
--------------------------------------------------
5151

5252

53-
[[wildcard-params]]
54-
==== Parameters for wildcard fields
55-
56-
The following parameters are accepted by `wildcard` fields:
57-
58-
[horizontal]
59-
60-
<<ignore-above,`ignore_above`>>::
61-
62-
Do not index any string longer than this value. Defaults to `2147483647`
63-
so that all values would be accepted.
64-
65-
<<normalizer,`normalizer`>>::
66-
67-
How to pre-process the value prior to indexing. Defaults to `null`,
68-
meaning the value is kept as-is.
69-
7053
==== Limitations
7154

7255
* `wildcard` fields are untokenized like keyword fields, so do not support queries that rely on word positions such as phrase queries.

server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -358,14 +358,14 @@ public Query fuzzyQuery(Object value, Fuzziness fuzziness, int prefixLength, int
358358
}
359359

360360
public Query prefixQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, QueryShardContext context) {
361-
throw new QueryShardException(context, "Can only use prefix queries on keyword, text and wildcard fields - not on [" + name
361+
throw new QueryShardException(context, "Can only use prefix queries on keyword and text fields - not on [" + name
362362
+ "] which is of type [" + typeName() + "]");
363363
}
364364

365365
public Query wildcardQuery(String value,
366366
@Nullable MultiTermQuery.RewriteMethod method,
367367
QueryShardContext context) {
368-
throw new QueryShardException(context, "Can only use wildcard queries on keyword, text and wildcard fields - not on [" + name
368+
throw new QueryShardException(context, "Can only use wildcard queries on keyword and text fields - not on [" + name
369369
+ "] which is of type [" + typeName() + "]");
370370
}
371371

server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java

Lines changed: 24 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
package org.elasticsearch.index.mapper;
2121

22-
import org.apache.lucene.analysis.Analyzer;
2322
import org.apache.lucene.index.Term;
2423
import org.apache.lucene.search.FuzzyQuery;
2524
import org.apache.lucene.search.MultiTermQuery;
@@ -94,36 +93,6 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, Quer
9493
return query;
9594
}
9695

97-
public static final String normalizeWildcardPattern(String fieldname, String value, Analyzer normalizer) {
98-
if (normalizer == null) {
99-
return value;
100-
}
101-
// we want to normalize everything except wildcard characters, e.g. F?o Ba* to f?o ba*, even if e.g there
102-
// is a char_filter that would otherwise remove them
103-
Matcher wildcardMatcher = WILDCARD_PATTERN.matcher(value);
104-
BytesRefBuilder sb = new BytesRefBuilder();
105-
int last = 0;
106-
107-
while (wildcardMatcher.find()) {
108-
if (wildcardMatcher.start() > 0) {
109-
String chunk = value.substring(last, wildcardMatcher.start());
110-
111-
BytesRef normalized = normalizer.normalize(fieldname, chunk);
112-
sb.append(normalized);
113-
}
114-
// append the matched group - without normalizing
115-
sb.append(new BytesRef(wildcardMatcher.group()));
116-
117-
last = wildcardMatcher.end();
118-
}
119-
if (last < value.length()) {
120-
String chunk = value.substring(last);
121-
BytesRef normalized = normalizer.normalize(fieldname, chunk);
122-
sb.append(normalized);
123-
}
124-
return sb.toBytesRef().utf8ToString();
125-
}
126-
12796
@Override
12897
public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) {
12998
failIfNotIndexed();
@@ -134,8 +103,30 @@ public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, Qu
134103

135104
Term term;
136105
if (searchAnalyzer() != null) {
137-
value = normalizeWildcardPattern(name(), value, searchAnalyzer());
138-
term = new Term(name(), value);
106+
// we want to normalize everything except wildcard characters, e.g. F?o Ba* to f?o ba*, even if e.g there
107+
// is a char_filter that would otherwise remove them
108+
Matcher wildcardMatcher = WILDCARD_PATTERN.matcher(value);
109+
BytesRefBuilder sb = new BytesRefBuilder();
110+
int last = 0;
111+
112+
while (wildcardMatcher.find()) {
113+
if (wildcardMatcher.start() > 0) {
114+
String chunk = value.substring(last, wildcardMatcher.start());
115+
116+
BytesRef normalized = searchAnalyzer().normalize(name(), chunk);
117+
sb.append(normalized);
118+
}
119+
// append the matched group - without normalizing
120+
sb.append(new BytesRef(wildcardMatcher.group()));
121+
122+
last = wildcardMatcher.end();
123+
}
124+
if (last < value.length()) {
125+
String chunk = value.substring(last);
126+
BytesRef normalized = searchAnalyzer().normalize(name(), chunk);
127+
sb.append(normalized);
128+
}
129+
term = new Term(name(), sb.toBytesRef());
139130
} else {
140131
term = new Term(name(), indexedValueForSearch(value));
141132
}

server/src/main/java/org/elasticsearch/index/query/QueryBuilders.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ public static RangeQueryBuilder rangeQuery(String name) {
264264
* which matches any single character. Note this query can be slow, as it
265265
* needs to iterate over many terms. In order to prevent extremely slow WildcardQueries,
266266
* a Wildcard term should not start with one of the wildcards {@code *} or
267-
* {@code ?}. (The wildcard field type however, is optimised for leading wildcards)
267+
* {@code ?}.
268268
*
269269
* @param name The field name
270270
* @param query The wildcard query string

server/src/test/java/org/elasticsearch/index/query/PrefixQueryBuilderTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ public void testNumeric() throws Exception {
116116
QueryShardContext context = createShardContext();
117117
QueryShardException e = expectThrows(QueryShardException.class,
118118
() -> query.toQuery(context));
119-
assertEquals("Can only use prefix queries on keyword, text and wildcard fields - not on [mapped_int] which is of type [integer]",
119+
assertEquals("Can only use prefix queries on keyword and text fields - not on [mapped_int] which is of type [integer]",
120120
e.getMessage());
121121
}
122122

server/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -816,7 +816,7 @@ public void testPrefixNumeric() throws Exception {
816816
QueryShardContext context = createShardContext();
817817
QueryShardException e = expectThrows(QueryShardException.class,
818818
() -> query.toQuery(context));
819-
assertEquals("Can only use prefix queries on keyword, text and wildcard fields - not on [mapped_int] which is of type [integer]",
819+
assertEquals("Can only use prefix queries on keyword and text fields - not on [mapped_int] which is of type [integer]",
820820
e.getMessage());
821821
query.lenient(true);
822822
query.toQuery(context); // no exception

x-pack/plugin/src/test/resources/rest-api-spec/test/wildcard/10_wildcard_basic.yml

Lines changed: 10 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,10 @@ setup:
1010
body:
1111
settings:
1212
number_of_replicas: 0
13-
analysis:
14-
normalizer:
15-
lowercase:
16-
type: custom
17-
char_filter: []
18-
filter: ["lowercase"]
1913
mappings:
2014
properties:
2115
my_wildcard:
2216
type: wildcard
23-
normalizer: lowercase
24-
fields:
25-
case_sensitive:
26-
type: wildcard
2717
- do:
2818
index:
2919
index: test-index
@@ -36,12 +26,6 @@ setup:
3626
id: 2
3727
body:
3828
my_wildcard: goodbye world
39-
- do:
40-
index:
41-
index: test-index
42-
id: 3
43-
body:
44-
my_wildcard: cAsE iNsEnSiTiVe World
4529

4630
- do:
4731
indices.refresh: {}
@@ -96,31 +80,6 @@ setup:
9680
my_wildcard: {value: "*ello worl*" }
9781

9882

99-
- match: {hits.total.value: 1}
100-
---
101-
"Case insensitive query":
102-
- do:
103-
search:
104-
body:
105-
track_total_hits: true
106-
query:
107-
wildcard:
108-
my_wildcard: {value: "*Worl*" }
109-
110-
111-
- match: {hits.total.value: 3}
112-
113-
---
114-
"Case sensitive query":
115-
- do:
116-
search:
117-
body:
118-
track_total_hits: true
119-
query:
120-
wildcard:
121-
my_wildcard.case_sensitive: {value: "*Worl*" }
122-
123-
12483
- match: {hits.total.value: 1}
12584

12685
---
@@ -134,7 +93,7 @@ setup:
13493
my_wildcard: {value: "*ld" }
13594

13695

137-
- match: {hits.total.value: 3}
96+
- match: {hits.total.value: 2}
13897

13998
---
14099
"Long suffix query":
@@ -229,8 +188,8 @@ setup:
229188
terms: {field: "my_wildcard" }
230189

231190

232-
- match: {hits.total.value: 3}
233-
- length: { aggregations.top_vals.buckets: 3 }
191+
- match: {hits.total.value: 2}
192+
- length: { aggregations.top_vals.buckets: 2 }
234193

235194
---
236195
"Sort works":
@@ -240,21 +199,20 @@ setup:
240199
track_total_hits: true
241200
sort: [ { "my_wildcard": "desc" } ]
242201

243-
- match: { hits.total.value: 3 }
244-
- length: { hits.hits: 3 }
202+
- match: { hits.total.value: 2 }
203+
- length: { hits.hits: 2 }
245204
- match: { hits.hits.0._id: "1" }
246205
- match: { hits.hits.1._id: "2" }
247-
- match: { hits.hits.2._id: "3" }
248206

249207
- do:
250208
search:
251209
body:
252210
track_total_hits: true
253211
sort: [ { "my_wildcard": "asc" } ]
254212

255-
- match: { hits.total.value: 3 }
256-
- length: { hits.hits: 3 }
257-
- match: { hits.hits.0._id: "3" }
258-
- match: { hits.hits.1._id: "2" }
259-
- match: { hits.hits.2._id: "1" }
213+
- match: { hits.total.value: 2 }
214+
- length: { hits.hits: 2 }
215+
- match: { hits.hits.0._id: "2" }
216+
- match: { hits.hits.1._id: "1" }
217+
260218

0 commit comments

Comments
 (0)