Skip to content

Commit 939c242

Browse files
authored
Remove deprecated vector functions. (#48725)
Follow up to #48604. This PR removes the deprecated vector function signatures of the form `cosineSimilarity(query, doc['field'])`.
1 parent 23a4e4a commit 939c242

File tree

7 files changed

+56
-154
lines changed

7 files changed

+56
-154
lines changed

docs/reference/migration/migrate_8_0/search.asciidoc

+9-1
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,12 @@ The `nested_filter` and `nested_path` options, deprecated in 6.x, have been remo
3434

3535
{es} will no longer prefer using shards in the same location (with the same awareness attribute values) to process
3636
`_search` and `_get` requests. Adaptive replica selection (activated by default in this version) will route requests
37-
more efficiently using the service time of prior inter-node communications.
37+
more efficiently using the service time of prior inter-node communications.
38+
39+
[float]
40+
==== Update to vector function signatures
41+
The vector functions of the form `function(query, doc['field'])` were
42+
deprecated in 7.6, and are now removed in 8.x. The form
43+
`function(query, 'field')` should be used instead. For example,
44+
`cosineSimilarity(query, doc['field'])` is replaced by
45+
`cosineSimilarity(query, 'field')`.

x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/10_dense_vector_basic.yml

+1-24
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
setup:
22
- skip:
3-
features: [headers, warnings]
3+
features: headers
44
version: " - 7.2.99"
55
reason: "dense_vector functions were added from 7.3"
66

@@ -99,26 +99,3 @@ setup:
9999
- match: {hits.hits.2._id: "1"}
100100
- gte: {hits.hits.2._score: 0.78}
101101
- lte: {hits.hits.2._score: 0.791}
102-
103-
---
104-
"Deprecated function signature":
105-
- do:
106-
headers:
107-
Content-Type: application/json
108-
warnings:
109-
- The vector functions of the form function(query, doc['field']) are deprecated, and the form function(query, 'field') should be used instead. For example, cosineSimilarity(query, doc['field']) is replaced by cosineSimilarity(query, 'field').
110-
search:
111-
rest_total_hits_as_int: true
112-
body:
113-
query:
114-
script_score:
115-
query: {match_all: {} }
116-
script:
117-
source: "cosineSimilarity(params.query_vector, doc['my_dense_vector'])"
118-
params:
119-
query_vector: [0.5, 111.3, -13.0, 14.8, -156.0]
120-
121-
- match: {hits.total: 3}
122-
- match: {hits.hits.0._id: "3"}
123-
- match: {hits.hits.1._id: "2"}
124-
- match: {hits.hits.2._id: "1"}

x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/30_sparse_vector_basic.yml

-24
Original file line numberDiff line numberDiff line change
@@ -104,27 +104,3 @@ setup:
104104
- match: {hits.hits.2._id: "1"}
105105
- gte: {hits.hits.2._score: 0.78}
106106
- lte: {hits.hits.2._score: 0.791}
107-
108-
---
109-
"Deprecated function signature":
110-
- do:
111-
headers:
112-
Content-Type: application/json
113-
warnings:
114-
- The [sparse_vector] field type is deprecated and will be removed in 8.0.
115-
- The vector functions of the form function(query, doc['field']) are deprecated, and the form function(query, 'field') should be used instead. For example, cosineSimilarity(query, doc['field']) is replaced by cosineSimilarity(query, 'field').
116-
search:
117-
rest_total_hits_as_int: true
118-
body:
119-
query:
120-
script_score:
121-
query: {match_all: {} }
122-
script:
123-
source: "cosineSimilaritySparse(params.query_vector, doc['my_sparse_vector'])"
124-
params:
125-
query_vector: {"2": -0.5, "10" : 111.3, "50": -13.0, "113": 14.8, "4545": -156.0}
126-
127-
- match: {hits.total: 3}
128-
- match: {hits.hits.0._id: "3"}
129-
- match: {hits.hits.1._id: "2"}
130-
- match: {hits.hits.2._id: "1"}

x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/ScoreScriptUtils.java

+18-41
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,6 @@
2727

2828
public class ScoreScriptUtils {
2929
private static final DeprecationLogger deprecationLogger = new DeprecationLogger(LogManager.getLogger(ScoreScriptUtils.class));
30-
static final String DEPRECATION_MESSAGE = "The vector functions of the form function(query, doc['field']) are deprecated, and " +
31-
"the form function(query, 'field') should be used instead. For example, cosineSimilarity(query, doc['field']) is replaced by " +
32-
"cosineSimilarity(query, 'field').";
3330

3431
//**************FUNCTIONS FOR DENSE VECTORS
3532
// Functions are implemented as classes to accept a hidden parameter scoreScript that contains some index settings.
@@ -43,7 +40,7 @@ public static class DenseVectorFunction {
4340

4441
public DenseVectorFunction(ScoreScript scoreScript,
4542
List<Number> queryVector,
46-
Object field) {
43+
String field) {
4744
this(scoreScript, queryVector, field, false);
4845
}
4946

@@ -56,9 +53,10 @@ public DenseVectorFunction(ScoreScript scoreScript,
5653
*/
5754
public DenseVectorFunction(ScoreScript scoreScript,
5855
List<Number> queryVector,
59-
Object field,
56+
String field,
6057
boolean normalizeQuery) {
6158
this.scoreScript = scoreScript;
59+
this.docValues = (DenseVectorScriptDocValues) scoreScript.getDoc().get(field);
6260

6361
this.queryVector = new float[queryVector.size()];
6462
double queryMagnitude = 0.0;
@@ -74,17 +72,6 @@ public DenseVectorFunction(ScoreScript scoreScript,
7472
this.queryVector[dim] /= queryMagnitude;
7573
}
7674
}
77-
78-
if (field instanceof String) {
79-
String fieldName = (String) field;
80-
docValues = (DenseVectorScriptDocValues) scoreScript.getDoc().get(fieldName);
81-
} else if (field instanceof DenseVectorScriptDocValues) {
82-
docValues = (DenseVectorScriptDocValues) field;
83-
deprecationLogger.deprecatedAndMaybeLog("vector_function_signature", DEPRECATION_MESSAGE);
84-
} else {
85-
throw new IllegalArgumentException("For vector functions, the 'field' argument must be of type String or " +
86-
"VectorScriptDocValues");
87-
}
8875
}
8976

9077
BytesRef getEncodedVector() {
@@ -112,7 +99,7 @@ BytesRef getEncodedVector() {
11299
// Calculate l1 norm (Manhattan distance) between a query's dense vector and documents' dense vectors
113100
public static final class L1Norm extends DenseVectorFunction {
114101

115-
public L1Norm(ScoreScript scoreScript, List<Number> queryVector, Object field) {
102+
public L1Norm(ScoreScript scoreScript, List<Number> queryVector, String field) {
116103
super(scoreScript, queryVector, field);
117104
}
118105

@@ -132,7 +119,7 @@ public double l1norm() {
132119
// Calculate l2 norm (Euclidean distance) between a query's dense vector and documents' dense vectors
133120
public static final class L2Norm extends DenseVectorFunction {
134121

135-
public L2Norm(ScoreScript scoreScript, List<Number> queryVector, Object field) {
122+
public L2Norm(ScoreScript scoreScript, List<Number> queryVector, String field) {
136123
super(scoreScript, queryVector, field);
137124
}
138125

@@ -152,7 +139,7 @@ public double l2norm() {
152139
// Calculate a dot product between a query's dense vector and documents' dense vectors
153140
public static final class DotProduct extends DenseVectorFunction {
154141

155-
public DotProduct(ScoreScript scoreScript, List<Number> queryVector, Object field) {
142+
public DotProduct(ScoreScript scoreScript, List<Number> queryVector, String field) {
156143
super(scoreScript, queryVector, field);
157144
}
158145

@@ -171,7 +158,7 @@ public double dotProduct() {
171158
// Calculate cosine similarity between a query's dense vector and documents' dense vectors
172159
public static final class CosineSimilarity extends DenseVectorFunction {
173160

174-
public CosineSimilarity(ScoreScript scoreScript, List<Number> queryVector, Object field) {
161+
public CosineSimilarity(ScoreScript scoreScript, List<Number> queryVector, String field) {
175162
super(scoreScript, queryVector, field, true);
176163
}
177164

@@ -214,8 +201,10 @@ public static class SparseVectorFunction {
214201
// queryVector represents a map of dimensions to values
215202
public SparseVectorFunction(ScoreScript scoreScript,
216203
Map<String, Number> queryVector,
217-
Object field) {
204+
String field) {
218205
this.scoreScript = scoreScript;
206+
this.docValues = (SparseVectorScriptDocValues) scoreScript.getDoc().get(field);
207+
219208
//break vector into two arrays dims and values
220209
int n = queryVector.size();
221210
queryValues = new float[n];
@@ -232,18 +221,6 @@ public SparseVectorFunction(ScoreScript scoreScript,
232221
}
233222
// Sort dimensions in the ascending order and sort values in the same order as their corresponding dimensions
234223
sortSparseDimsFloatValues(queryDims, queryValues, n);
235-
236-
if (field instanceof String) {
237-
String fieldName = (String) field;
238-
docValues = (SparseVectorScriptDocValues) scoreScript.getDoc().get(fieldName);
239-
} else if (field instanceof SparseVectorScriptDocValues) {
240-
docValues = (SparseVectorScriptDocValues) field;
241-
deprecationLogger.deprecatedAndMaybeLog("vector_function_signature", DEPRECATION_MESSAGE);
242-
} else {
243-
throw new IllegalArgumentException("For vector functions, the 'field' argument must be of type String or " +
244-
"VectorScriptDocValues");
245-
}
246-
247224
deprecationLogger.deprecatedAndMaybeLog("sparse_vector_function", SparseVectorFieldMapper.DEPRECATION_MESSAGE);
248225
}
249226

@@ -264,8 +241,8 @@ BytesRef getEncodedVector() {
264241

265242
// Calculate l1 norm (Manhattan distance) between a query's sparse vector and documents' sparse vectors
266243
public static final class L1NormSparse extends SparseVectorFunction {
267-
public L1NormSparse(ScoreScript scoreScript,Map<String, Number> queryVector, Object docVector) {
268-
super(scoreScript, queryVector, docVector);
244+
public L1NormSparse(ScoreScript scoreScript,Map<String, Number> queryVector, String field) {
245+
super(scoreScript, queryVector, field);
269246
}
270247

271248
public double l1normSparse() {
@@ -303,8 +280,8 @@ public double l1normSparse() {
303280

304281
// Calculate l2 norm (Euclidean distance) between a query's sparse vector and documents' sparse vectors
305282
public static final class L2NormSparse extends SparseVectorFunction {
306-
public L2NormSparse(ScoreScript scoreScript, Map<String, Number> queryVector, Object docVector) {
307-
super(scoreScript, queryVector, docVector);
283+
public L2NormSparse(ScoreScript scoreScript, Map<String, Number> queryVector, String field) {
284+
super(scoreScript, queryVector, field);
308285
}
309286

310287
public double l2normSparse() {
@@ -345,8 +322,8 @@ public double l2normSparse() {
345322

346323
// Calculate a dot product between a query's sparse vector and documents' sparse vectors
347324
public static final class DotProductSparse extends SparseVectorFunction {
348-
public DotProductSparse(ScoreScript scoreScript, Map<String, Number> queryVector, Object docVector) {
349-
super(scoreScript, queryVector, docVector);
325+
public DotProductSparse(ScoreScript scoreScript, Map<String, Number> queryVector, String field) {
326+
super(scoreScript, queryVector, field);
350327
}
351328

352329
public double dotProductSparse() {
@@ -362,8 +339,8 @@ public double dotProductSparse() {
362339
public static final class CosineSimilaritySparse extends SparseVectorFunction {
363340
final double queryVectorMagnitude;
364341

365-
public CosineSimilaritySparse(ScoreScript scoreScript, Map<String, Number> queryVector, Object docVector) {
366-
super(scoreScript, queryVector, docVector);
342+
public CosineSimilaritySparse(ScoreScript scoreScript, Map<String, Number> queryVector, String field) {
343+
super(scoreScript, queryVector, field);
367344
double dotProduct = 0;
368345
for (int i = 0; i< queryDims.length; i++) {
369346
dotProduct += queryValues[i] * queryValues[i];

x-pack/plugin/vectors/src/main/resources/org/elasticsearch/xpack/vectors/query/whitelist.txt

+8-8
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@ class org.elasticsearch.script.ScoreScript @no_import {
1313
}
1414

1515
static_import {
16-
double l1norm(org.elasticsearch.script.ScoreScript, List, Object) bound_to org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$L1Norm
17-
double l2norm(org.elasticsearch.script.ScoreScript, List, Object) bound_to org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$L2Norm
18-
double cosineSimilarity(org.elasticsearch.script.ScoreScript, List, Object) bound_to org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$CosineSimilarity
19-
double dotProduct(org.elasticsearch.script.ScoreScript, List, Object) bound_to org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$DotProduct
20-
double l1normSparse(org.elasticsearch.script.ScoreScript, Map, Object) bound_to org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$L1NormSparse
21-
double l2normSparse(org.elasticsearch.script.ScoreScript, Map, Object) bound_to org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$L2NormSparse
22-
double dotProductSparse(org.elasticsearch.script.ScoreScript, Map, Object) bound_to org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$DotProductSparse
23-
double cosineSimilaritySparse(org.elasticsearch.script.ScoreScript, Map, Object) bound_to org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$CosineSimilaritySparse
16+
double l1norm(org.elasticsearch.script.ScoreScript, List, String) bound_to org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$L1Norm
17+
double l2norm(org.elasticsearch.script.ScoreScript, List, String) bound_to org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$L2Norm
18+
double cosineSimilarity(org.elasticsearch.script.ScoreScript, List, String) bound_to org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$CosineSimilarity
19+
double dotProduct(org.elasticsearch.script.ScoreScript, List, String) bound_to org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$DotProduct
20+
double l1normSparse(org.elasticsearch.script.ScoreScript, Map, String) bound_to org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$L1NormSparse
21+
double l2normSparse(org.elasticsearch.script.ScoreScript, Map, String) bound_to org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$L2NormSparse
22+
double dotProductSparse(org.elasticsearch.script.ScoreScript, Map, String) bound_to org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$DotProductSparse
23+
double cosineSimilaritySparse(org.elasticsearch.script.ScoreScript, Map, String) bound_to org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$CosineSimilaritySparse
2424
}

x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/query/DenseVectorFunctionTests.java

+8-28
Original file line numberDiff line numberDiff line change
@@ -50,68 +50,48 @@ public void testDenseVectorFunctions() {
5050
when(scoreScript._getIndexVersion()).thenReturn(indexVersion);
5151
when(scoreScript.getDoc()).thenReturn(Collections.singletonMap(field, docValues));
5252

53-
testDotProduct(docValues, scoreScript);
54-
testCosineSimilarity(docValues, scoreScript);
55-
testL1Norm(docValues, scoreScript);
56-
testL2Norm(docValues, scoreScript);
53+
testDotProduct(scoreScript);
54+
testCosineSimilarity(scoreScript);
55+
testL1Norm(scoreScript);
56+
testL2Norm(scoreScript);
5757
}
5858
}
5959

60-
private void testDotProduct(DenseVectorScriptDocValues docValues, ScoreScript scoreScript) {
60+
private void testDotProduct(ScoreScript scoreScript) {
6161
DotProduct function = new DotProduct(scoreScript, queryVector, field);
6262
double result = function.dotProduct();
6363
assertEquals("dotProduct result is not equal to the expected value!", 65425.624, result, 0.001);
6464

65-
DotProduct deprecatedFunction = new DotProduct(scoreScript, queryVector, docValues);
66-
double deprecatedResult = deprecatedFunction.dotProduct();
67-
assertEquals("dotProduct result is not equal to the expected value!", 65425.624, deprecatedResult, 0.001);
68-
assertWarnings(ScoreScriptUtils.DEPRECATION_MESSAGE);
69-
7065
DotProduct invalidFunction = new DotProduct(scoreScript, invalidQueryVector, field);
7166
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, invalidFunction::dotProduct);
7267
assertThat(e.getMessage(), containsString("query vector has a different number of dimensions [2] than the document vectors [5]"));
7368
}
7469

75-
private void testCosineSimilarity(DenseVectorScriptDocValues docValues, ScoreScript scoreScript) {
70+
private void testCosineSimilarity(ScoreScript scoreScript) {
7671
CosineSimilarity function = new CosineSimilarity(scoreScript, queryVector, field);
7772
double result = function.cosineSimilarity();
7873
assertEquals("cosineSimilarity result is not equal to the expected value!", 0.790, result, 0.001);
7974

80-
CosineSimilarity deprecatedFunction = new CosineSimilarity(scoreScript, queryVector, docValues);
81-
double deprecatedResult = deprecatedFunction.cosineSimilarity();
82-
assertEquals("cosineSimilarity result is not equal to the expected value!", 0.790, deprecatedResult, 0.001);
83-
assertWarnings(ScoreScriptUtils.DEPRECATION_MESSAGE);
84-
8575
CosineSimilarity invalidFunction = new CosineSimilarity(scoreScript, invalidQueryVector, field);
8676
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, invalidFunction::cosineSimilarity);
8777
assertThat(e.getMessage(), containsString("query vector has a different number of dimensions [2] than the document vectors [5]"));
8878
}
8979

90-
private void testL1Norm(DenseVectorScriptDocValues docValues, ScoreScript scoreScript) {
80+
private void testL1Norm(ScoreScript scoreScript) {
9181
L1Norm function = new L1Norm(scoreScript, queryVector, field);
9282
double result = function.l1norm();
9383
assertEquals("l1norm result is not equal to the expected value!", 485.184, result, 0.001);
9484

95-
L1Norm deprecatedFunction = new L1Norm(scoreScript, queryVector, docValues);
96-
double deprecatedResult = deprecatedFunction.l1norm();
97-
assertEquals("l1norm result is not equal to the expected value!", 485.184, deprecatedResult, 0.001);
98-
assertWarnings(ScoreScriptUtils.DEPRECATION_MESSAGE);
99-
10085
L1Norm invalidFunction = new L1Norm(scoreScript, invalidQueryVector, field);
10186
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, invalidFunction::l1norm);
10287
assertThat(e.getMessage(), containsString("query vector has a different number of dimensions [2] than the document vectors [5]"));
10388
}
10489

105-
private void testL2Norm(DenseVectorScriptDocValues docValues, ScoreScript scoreScript) {
90+
private void testL2Norm(ScoreScript scoreScript) {
10691
L2Norm function = new L2Norm(scoreScript, queryVector, field);
10792
double result = function.l2norm();
10893
assertEquals("l2norm result is not equal to the expected value!", 301.361, result, 0.001);
10994

110-
L2Norm deprecatedFunction = new L2Norm(scoreScript, queryVector, docValues);
111-
double deprecatedResult = deprecatedFunction.l2norm();
112-
assertEquals("l2norm result is not equal to the expected value!", 301.361, deprecatedResult, 0.001);
113-
assertWarnings(ScoreScriptUtils.DEPRECATION_MESSAGE);
114-
11595
L2Norm invalidFunction = new L2Norm(scoreScript, invalidQueryVector, field);
11696
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, invalidFunction::l2norm);
11797
assertThat(e.getMessage(), containsString("query vector has a different number of dimensions [2] than the document vectors [5]"));

0 commit comments

Comments
 (0)