Skip to content

Commit 6737bf5

Browse files
committed
Improve similarity integration.
This improves the way similarities are plugged in in order to: - reject the classic similarity on 7.x indices and emit a deprecation warning otherwise - reject unkwown parameters on 7.x indices and emit a deprecation warning otherwise Even though this breaks the plugin API, I'd like to backport to 7.x so that users can get deprecation warnings when they are doing something that will become unsupported in the future. Closes elastic#23208 Closes elastic#29035
1 parent 2f6c773 commit 6737bf5

26 files changed

+502
-806
lines changed

docs/reference/index-modules/similarity.asciidoc

+2-16
Original file line numberDiff line numberDiff line change
@@ -82,20 +82,6 @@ This similarity has the following options:
8282

8383
Type name: `BM25`
8484

85-
[float]
86-
[[classic-similarity]]
87-
==== Classic similarity
88-
89-
The classic similarity that is based on the TF/IDF model. This
90-
similarity has the following option:
91-
92-
`discount_overlaps`::
93-
Determines whether overlap tokens (Tokens with
94-
0 position increment) are ignored when computing norm. By default this
95-
is true, meaning overlap tokens do not count when computing norms.
96-
97-
Type name: `classic`
98-
9985
[float]
10086
[[dfr]]
10187
==== DFR similarity
@@ -541,7 +527,7 @@ PUT /index
541527
"index": {
542528
"similarity": {
543529
"default": {
544-
"type": "classic"
530+
"type": "boolean"
545531
}
546532
}
547533
}
@@ -563,7 +549,7 @@ PUT /index/_settings
563549
"index": {
564550
"similarity": {
565551
"default": {
566-
"type": "classic"
552+
"type": "boolean"
567553
}
568554
}
569555
}

docs/reference/mapping/params/similarity.asciidoc

+2-7
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,9 @@ PUT my_index
4444
"default_field": { <1>
4545
"type": "text"
4646
},
47-
"classic_field": {
48-
"type": "text",
49-
"similarity": "classic" <2>
50-
},
5147
"boolean_sim_field": {
5248
"type": "text",
53-
"similarity": "boolean" <3>
49+
"similarity": "boolean" <2>
5450
}
5551
}
5652
}
@@ -59,5 +55,4 @@ PUT my_index
5955
--------------------------------------------------
6056
// CONSOLE
6157
<1> The `default_field` uses the `BM25` similarity.
62-
<2> The `classic_field` uses the `classic` similarity (ie TF/IDF).
63-
<3> The `boolean_sim_field` uses the `boolean` similarity.
58+
<2> The `boolean_sim_field` uses the `boolean` similarity.

docs/reference/migration/migrate_7_0/mappings.asciidoc

+13
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,16 @@ the index setting `index.mapping.nested_objects.limit`.
2424
==== The `update_all_types` option has been removed
2525

2626
This option is useless now that all indices have at most one type.
27+
28+
=== The `classic` similarity has been removed
29+
30+
The `classic` similarity relied on coordination factors for scoring to be good
31+
in presence of stopwords in the query. This feature has been removed from
32+
Lucene, which means that the `classic` similarity now produces scores of lower
33+
quality. It is advised to switch to `BM25` instead, which is widely accepted
34+
as a better alternative.
35+
36+
=== Similarities fail when unsupported options are provided
37+
38+
An error will now be thrown when unknown configuration options are provided
39+
to similarities. Such unknown parameters were ignored before.

modules/parent-join/src/test/java/org/elasticsearch/join/query/HasChildQueryBuilderTests.java

+1-3
Original file line numberDiff line numberDiff line change
@@ -337,9 +337,7 @@ public void testNonDefaultSimilarity() throws Exception {
337337
hasChildQuery(CHILD_DOC, new TermQueryBuilder("custom_string", "value"), ScoreMode.None);
338338
HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext);
339339
Similarity expected = SimilarityService.BUILT_IN.get(similarity)
340-
.create(similarity, Settings.EMPTY,
341-
Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(), null)
342-
.get();
340+
.apply(Settings.EMPTY, Version.CURRENT, null);
343341
assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass()));
344342
}
345343

modules/parent-join/src/test/java/org/elasticsearch/join/query/LegacyHasChildQueryBuilderTests.java

+1-3
Original file line numberDiff line numberDiff line change
@@ -323,9 +323,7 @@ public void testNonDefaultSimilarity() throws Exception {
323323
hasChildQuery(CHILD_TYPE, new TermQueryBuilder("custom_string", "value"), ScoreMode.None);
324324
HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext);
325325
Similarity expected = SimilarityService.BUILT_IN.get(similarity)
326-
.create(similarity, Settings.EMPTY,
327-
Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(), null)
328-
.get();
326+
.apply(Settings.EMPTY, Version.CURRENT, null);
329327
assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass()));
330328
}
331329

server/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java

+7-4
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@
2121
import org.apache.logging.log4j.message.ParameterizedMessage;
2222
import org.apache.logging.log4j.util.Supplier;
2323
import org.apache.lucene.analysis.Analyzer;
24+
import org.apache.lucene.search.similarities.Similarity;
2425
import org.elasticsearch.Version;
26+
import org.elasticsearch.common.TriFunction;
2527
import org.elasticsearch.common.component.AbstractComponent;
2628
import org.elasticsearch.common.settings.IndexScopedSettings;
2729
import org.elasticsearch.common.settings.Settings;
@@ -32,8 +34,8 @@
3234
import org.elasticsearch.index.analysis.NamedAnalyzer;
3335
import org.elasticsearch.index.mapper.MapperService;
3436
import org.elasticsearch.index.similarity.SimilarityService;
35-
import org.elasticsearch.index.similarity.SimilarityProvider;
3637
import org.elasticsearch.indices.mapper.MapperRegistry;
38+
import org.elasticsearch.script.ScriptService;
3739

3840
import java.util.AbstractMap;
3941
import java.util.Collection;
@@ -143,22 +145,23 @@ private void checkMappingsCompatibility(IndexMetaData indexMetaData) {
143145

144146
IndexSettings indexSettings = new IndexSettings(indexMetaData, this.settings);
145147

146-
final Map<String, SimilarityProvider.Factory> similarityMap = new AbstractMap<String, SimilarityProvider.Factory>() {
148+
final Map<String, TriFunction<Settings, Version, ScriptService, Similarity>> similarityMap
149+
= new AbstractMap<String, TriFunction<Settings, Version, ScriptService, Similarity>>() {
147150
@Override
148151
public boolean containsKey(Object key) {
149152
return true;
150153
}
151154

152155
@Override
153-
public SimilarityProvider.Factory get(Object key) {
156+
public TriFunction<Settings, Version, ScriptService, Similarity> get(Object key) {
154157
assert key instanceof String : "key must be a string but was: " + key.getClass();
155158
return SimilarityService.BUILT_IN.get(SimilarityService.DEFAULT_SIMILARITY);
156159
}
157160

158161
// this entrySet impl isn't fully correct but necessary as SimilarityService will iterate
159162
// over all similarities
160163
@Override
161-
public Set<Entry<String, SimilarityProvider.Factory>> entrySet() {
164+
public Set<Entry<String, TriFunction<Settings, Version, ScriptService, Similarity>>> entrySet() {
162165
return Collections.emptySet();
163166
}
164167
};

server/src/main/java/org/elasticsearch/index/IndexModule.java

+5-3
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,12 @@
1919

2020
package org.elasticsearch.index;
2121

22+
import org.apache.lucene.search.similarities.Similarity;
2223
import org.apache.lucene.util.SetOnce;
24+
import org.elasticsearch.Version;
2325
import org.elasticsearch.client.Client;
2426
import org.elasticsearch.common.Strings;
27+
import org.elasticsearch.common.TriFunction;
2528
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
2629
import org.elasticsearch.common.settings.Setting;
2730
import org.elasticsearch.common.settings.Setting.Property;
@@ -40,7 +43,6 @@
4043
import org.elasticsearch.index.shard.IndexingOperationListener;
4144
import org.elasticsearch.index.shard.SearchOperationListener;
4245
import org.elasticsearch.index.shard.ShardId;
43-
import org.elasticsearch.index.similarity.BM25SimilarityProvider;
4446
import org.elasticsearch.index.similarity.SimilarityProvider;
4547
import org.elasticsearch.index.similarity.SimilarityService;
4648
import org.elasticsearch.index.store.IndexStore;
@@ -107,7 +109,7 @@ public final class IndexModule {
107109
final SetOnce<EngineFactory> engineFactory = new SetOnce<>();
108110
private SetOnce<IndexSearcherWrapperFactory> indexSearcherWrapper = new SetOnce<>();
109111
private final Set<IndexEventListener> indexEventListeners = new HashSet<>();
110-
private final Map<String, SimilarityProvider.Factory> similarities = new HashMap<>();
112+
private final Map<String, TriFunction<Settings, Version, ScriptService, Similarity>> similarities = new HashMap<>();
111113
private final Map<String, Function<IndexSettings, IndexStore>> storeTypes = new HashMap<>();
112114
private final SetOnce<BiFunction<IndexSettings, IndicesQueryCache, QueryCache>> forceQueryCacheProvider = new SetOnce<>();
113115
private final List<SearchOperationListener> searchOperationListeners = new ArrayList<>();
@@ -251,7 +253,7 @@ public void addIndexStore(String type, Function<IndexSettings, IndexStore> provi
251253
* @param name Name of the SimilarityProvider
252254
* @param similarity SimilarityProvider to register
253255
*/
254-
public void addSimilarity(String name, SimilarityProvider.Factory similarity) {
256+
public void addSimilarity(String name, TriFunction<Settings, Version, ScriptService, Similarity> similarity) {
255257
ensureNotFrozen();
256258
if (similarities.containsKey(name) || SimilarityService.BUILT_IN.containsKey(name)) {
257259
throw new IllegalArgumentException("similarity for name: [" + name + " is already registered");

server/src/main/java/org/elasticsearch/index/similarity/AbstractSimilarityProvider.java

-82
This file was deleted.

server/src/main/java/org/elasticsearch/index/similarity/BM25SimilarityProvider.java

-59
This file was deleted.

server/src/main/java/org/elasticsearch/index/similarity/BooleanSimilarityProvider.java

-48
This file was deleted.

0 commit comments

Comments
 (0)