Skip to content

Commit 9e61f6d

Browse files
committed
Improve similarity integration. (#29187)
This improves the way similarities are plugged in in order to: - reject the classic similarity on 7.x indices and emit a deprecation warning otherwise - reject unkwown parameters on 7.x indices and emit a deprecation warning otherwise Even though this breaks the plugin API, I'd like to backport to 7.x so that users can get deprecation warnings when they are doing something that will become unsupported in the future. Closes #23208 Closes #29035
1 parent 1fd448b commit 9e61f6d

25 files changed

+484
-819
lines changed

docs/reference/index-modules/similarity.asciidoc

+5-3
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ Type name: `BM25`
8686
[[classic-similarity]]
8787
==== Classic similarity
8888

89+
deprecated[6.3.0, The quality of the produced scores used to rely on coordination factors, which have been removed. It is advised to use BM25 instead.]
90+
8991
The classic similarity that is based on the TF/IDF model. This
9092
similarity has the following option:
9193

@@ -97,7 +99,7 @@ similarity has the following option:
9799
Type name: `classic`
98100

99101
[float]
100-
[[drf]]
102+
[[dfr]]
101103
==== DFR similarity
102104

103105
Similarity that implements the
@@ -510,7 +512,7 @@ PUT /index
510512
"index": {
511513
"similarity": {
512514
"default": {
513-
"type": "classic"
515+
"type": "boolean"
514516
}
515517
}
516518
}
@@ -532,7 +534,7 @@ PUT /index/_settings
532534
"index": {
533535
"similarity": {
534536
"default": {
535-
"type": "classic"
537+
"type": "boolean"
536538
}
537539
}
538540
}

docs/reference/mapping/params/similarity.asciidoc

+2-7
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,9 @@ PUT my_index
4444
"default_field": { <1>
4545
"type": "text"
4646
},
47-
"classic_field": {
48-
"type": "text",
49-
"similarity": "classic" <2>
50-
},
5147
"boolean_sim_field": {
5248
"type": "text",
53-
"similarity": "boolean" <3>
49+
"similarity": "boolean" <2>
5450
}
5551
}
5652
}
@@ -59,5 +55,4 @@ PUT my_index
5955
--------------------------------------------------
6056
// CONSOLE
6157
<1> The `default_field` uses the `BM25` similarity.
62-
<2> The `classic_field` uses the `classic` similarity (ie TF/IDF).
63-
<3> The `boolean_sim_field` uses the `boolean` similarity.
58+
<2> The `boolean_sim_field` uses the `boolean` similarity.

modules/parent-join/src/test/java/org/elasticsearch/join/query/HasChildQueryBuilderTests.java

+2-4
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ protected Settings indexSettings() {
9797

9898
@Override
9999
protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
100-
similarity = randomFrom("classic", "BM25");
100+
similarity = randomFrom("boolean", "BM25");
101101
XContentBuilder mapping = jsonBuilder().startObject().startObject("_doc").startObject("properties")
102102
.startObject("join_field")
103103
.field("type", "join")
@@ -336,9 +336,7 @@ public void testNonDefaultSimilarity() throws Exception {
336336
hasChildQuery(CHILD_DOC, new TermQueryBuilder("custom_string", "value"), ScoreMode.None);
337337
HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext);
338338
Similarity expected = SimilarityService.BUILT_IN.get(similarity)
339-
.create(similarity, Settings.EMPTY,
340-
Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(), null)
341-
.get();
339+
.apply(Settings.EMPTY, Version.CURRENT, null);
342340
assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass()));
343341
}
344342

modules/parent-join/src/test/java/org/elasticsearch/join/query/LegacyHasChildQueryBuilderTests.java

+2-4
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ protected Collection<Class<? extends Plugin>> getPlugins() {
8787

8888
@Override
8989
protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
90-
similarity = randomFrom("classic", "BM25");
90+
similarity = randomFrom("boolean", "BM25");
9191
// TODO: use a single type when inner hits have been changed to work with join field,
9292
// this test randomly generates queries with inner hits
9393
mapperService.merge(PARENT_TYPE, new CompressedXContent(Strings.toString(PutMappingRequest.buildFromSimplifiedDef(PARENT_TYPE,
@@ -323,9 +323,7 @@ public void testNonDefaultSimilarity() throws Exception {
323323
hasChildQuery(CHILD_TYPE, new TermQueryBuilder("custom_string", "value"), ScoreMode.None);
324324
HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext);
325325
Similarity expected = SimilarityService.BUILT_IN.get(similarity)
326-
.create(similarity, Settings.EMPTY,
327-
Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(), null)
328-
.get();
326+
.apply(Settings.EMPTY, Version.CURRENT, null);
329327
assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass()));
330328
}
331329

server/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java

+8-4
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@
2020

2121
import org.apache.logging.log4j.message.ParameterizedMessage;
2222
import org.apache.lucene.analysis.Analyzer;
23+
import org.apache.lucene.search.similarities.Similarity;
2324
import org.elasticsearch.Version;
25+
import org.elasticsearch.common.TriFunction;
2426
import org.elasticsearch.common.component.AbstractComponent;
2527
import org.elasticsearch.common.settings.IndexScopedSettings;
2628
import org.elasticsearch.common.settings.Settings;
@@ -31,8 +33,8 @@
3133
import org.elasticsearch.index.analysis.NamedAnalyzer;
3234
import org.elasticsearch.index.mapper.MapperService;
3335
import org.elasticsearch.index.similarity.SimilarityService;
34-
import org.elasticsearch.index.similarity.SimilarityProvider;
3536
import org.elasticsearch.indices.mapper.MapperRegistry;
37+
import org.elasticsearch.script.ScriptService;
3638

3739
import java.util.AbstractMap;
3840
import java.util.Collection;
@@ -141,22 +143,24 @@ private void checkMappingsCompatibility(IndexMetaData indexMetaData) {
141143
// actual upgrade.
142144

143145
IndexSettings indexSettings = new IndexSettings(indexMetaData, this.settings);
144-
final Map<String, SimilarityProvider.Factory> similarityMap = new AbstractMap<String, SimilarityProvider.Factory>() {
146+
147+
final Map<String, TriFunction<Settings, Version, ScriptService, Similarity>> similarityMap
148+
= new AbstractMap<String, TriFunction<Settings, Version, ScriptService, Similarity>>() {
145149
@Override
146150
public boolean containsKey(Object key) {
147151
return true;
148152
}
149153

150154
@Override
151-
public SimilarityProvider.Factory get(Object key) {
155+
public TriFunction<Settings, Version, ScriptService, Similarity> get(Object key) {
152156
assert key instanceof String : "key must be a string but was: " + key.getClass();
153157
return SimilarityService.BUILT_IN.get(SimilarityService.DEFAULT_SIMILARITY);
154158
}
155159

156160
// this entrySet impl isn't fully correct but necessary as SimilarityService will iterate
157161
// over all similarities
158162
@Override
159-
public Set<Entry<String, SimilarityProvider.Factory>> entrySet() {
163+
public Set<Entry<String, TriFunction<Settings, Version, ScriptService, Similarity>>> entrySet() {
160164
return Collections.emptySet();
161165
}
162166
};

server/src/main/java/org/elasticsearch/index/IndexModule.java

+15-9
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,13 @@
1919

2020
package org.elasticsearch.index;
2121

22+
import org.apache.lucene.search.similarities.Similarity;
23+
import org.apache.lucene.search.similarities.BM25Similarity;
2224
import org.apache.lucene.util.SetOnce;
25+
import org.elasticsearch.Version;
2326
import org.elasticsearch.client.Client;
2427
import org.elasticsearch.common.Strings;
28+
import org.elasticsearch.common.TriFunction;
2529
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
2630
import org.elasticsearch.common.settings.Setting;
2731
import org.elasticsearch.common.settings.Setting.Property;
@@ -39,9 +43,6 @@
3943
import org.elasticsearch.index.shard.IndexSearcherWrapper;
4044
import org.elasticsearch.index.shard.IndexingOperationListener;
4145
import org.elasticsearch.index.shard.SearchOperationListener;
42-
import org.elasticsearch.index.shard.ShardId;
43-
import org.elasticsearch.index.similarity.BM25SimilarityProvider;
44-
import org.elasticsearch.index.similarity.SimilarityProvider;
4546
import org.elasticsearch.index.similarity.SimilarityService;
4647
import org.elasticsearch.index.store.IndexStore;
4748
import org.elasticsearch.indices.IndicesQueryCache;
@@ -68,10 +69,10 @@
6869
/**
6970
* IndexModule represents the central extension point for index level custom implementations like:
7071
* <ul>
71-
* <li>{@link SimilarityProvider} - New {@link SimilarityProvider} implementations can be registered through
72-
* {@link #addSimilarity(String, SimilarityProvider.Factory)} while existing Providers can be referenced through Settings under the
72+
* <li>{@link Similarity} - New {@link Similarity} implementations can be registered through
73+
* {@link #addSimilarity(String, TriFunction)} while existing Providers can be referenced through Settings under the
7374
* {@link IndexModule#SIMILARITY_SETTINGS_PREFIX} prefix along with the "type" value. For example, to reference the
74-
* {@link BM25SimilarityProvider}, the configuration <tt>"index.similarity.my_similarity.type : "BM25"</tt> can be used.</li>
75+
* {@link BM25Similarity}, the configuration <tt>"index.similarity.my_similarity.type : "BM25"</tt> can be used.</li>
7576
* <li>{@link IndexStore} - Custom {@link IndexStore} instances can be registered via {@link #addIndexStore(String, Function)}</li>
7677
* <li>{@link IndexEventListener} - Custom {@link IndexEventListener} instances can be registered via
7778
* {@link #addIndexEventListener(IndexEventListener)}</li>
@@ -107,7 +108,7 @@ public final class IndexModule {
107108
final SetOnce<EngineFactory> engineFactory = new SetOnce<>();
108109
private SetOnce<IndexSearcherWrapperFactory> indexSearcherWrapper = new SetOnce<>();
109110
private final Set<IndexEventListener> indexEventListeners = new HashSet<>();
110-
private final Map<String, SimilarityProvider.Factory> similarities = new HashMap<>();
111+
private final Map<String, TriFunction<Settings, Version, ScriptService, Similarity>> similarities = new HashMap<>();
111112
private final Map<String, Function<IndexSettings, IndexStore>> storeTypes = new HashMap<>();
112113
private final SetOnce<BiFunction<IndexSettings, IndicesQueryCache, QueryCache>> forceQueryCacheProvider = new SetOnce<>();
113114
private final List<SearchOperationListener> searchOperationListeners = new ArrayList<>();
@@ -246,12 +247,17 @@ public void addIndexStore(String type, Function<IndexSettings, IndexStore> provi
246247

247248

248249
/**
249-
* Registers the given {@link SimilarityProvider} with the given name
250+
* Registers the given {@link Similarity} with the given name.
251+
* The function takes as parameters:<ul>
252+
* <li>settings for this similarity
253+
* <li>version of Elasticsearch when the index was created
254+
* <li>ScriptService, for script-based similarities
255+
* </ul>
250256
*
251257
* @param name Name of the SimilarityProvider
252258
* @param similarity SimilarityProvider to register
253259
*/
254-
public void addSimilarity(String name, SimilarityProvider.Factory similarity) {
260+
public void addSimilarity(String name, TriFunction<Settings, Version, ScriptService, Similarity> similarity) {
255261
ensureNotFrozen();
256262
if (similarities.containsKey(name) || SimilarityService.BUILT_IN.containsKey(name)) {
257263
throw new IllegalArgumentException("similarity for name: [" + name + " is already registered");

server/src/main/java/org/elasticsearch/index/similarity/AbstractSimilarityProvider.java

-82
This file was deleted.

server/src/main/java/org/elasticsearch/index/similarity/BM25SimilarityProvider.java

-64
This file was deleted.

0 commit comments

Comments
 (0)