Skip to content

Commit f1ff7f2

Browse files
authored
Require a field when a seed is provided to the random_score function. (#25594)
We currently use fielddata on the `_id` field which is trappy, especially as we do it implicitly. This changes the `random_score` function to use doc ids when no seed is provided and to suggest a field when a seed is provided. For now the change only emits a deprecation warning when no field is supplied but this should be replaced by a strict check on 7.0. Closes #25240
1 parent f69decf commit f1ff7f2

File tree

12 files changed

+176
-73
lines changed

12 files changed

+176
-73
lines changed

client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/QueryDSLDocumentationTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ public void testFunctionScore() {
142142
FilterFunctionBuilder[] functions = {
143143
new FunctionScoreQueryBuilder.FilterFunctionBuilder(
144144
matchQuery("name", "kimchy"), // <1>
145-
randomFunction("ABCDEF")), // <2>
145+
randomFunction()), // <2>
146146
new FunctionScoreQueryBuilder.FilterFunctionBuilder(
147147
exponentialDecayFunction("age", 0L, 1L)) // <3>
148148
};

core/src/main/java/org/elasticsearch/common/lucene/search/function/RandomScoreFunction.java

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
*/
1919
package org.elasticsearch.common.lucene.search.function;
2020

21+
import com.carrotsearch.hppc.BitMixer;
22+
2123
import org.apache.lucene.index.LeafReaderContext;
2224
import org.apache.lucene.search.Explanation;
2325
import org.apache.lucene.util.StringHelper;
@@ -33,17 +35,9 @@
3335
*/
3436
public class RandomScoreFunction extends ScoreFunction {
3537

36-
private int originalSeed;
37-
private int saltedSeed;
38-
private final IndexFieldData<?> uidFieldData;
39-
40-
/**
41-
* Default constructor. Only useful for constructing as a placeholder, but should not be used for actual scoring.
42-
*/
43-
public RandomScoreFunction() {
44-
super(CombineFunction.MULTIPLY);
45-
uidFieldData = null;
46-
}
38+
private final int originalSeed;
39+
private final int saltedSeed;
40+
private final IndexFieldData<?> fieldData;
4741

4842
/**
4943
* Creates a RandomScoreFunction.
@@ -55,33 +49,43 @@ public RandomScoreFunction() {
5549
public RandomScoreFunction(int seed, int salt, IndexFieldData<?> uidFieldData) {
5650
super(CombineFunction.MULTIPLY);
5751
this.originalSeed = seed;
58-
this.saltedSeed = seed ^ salt;
59-
this.uidFieldData = uidFieldData;
60-
if (uidFieldData == null) throw new NullPointerException("uid missing");
52+
this.saltedSeed = BitMixer.mix(seed, salt);
53+
this.fieldData = uidFieldData;
6154
}
6255

6356
@Override
6457
public LeafScoreFunction getLeafScoreFunction(LeafReaderContext ctx) {
65-
AtomicFieldData leafData = uidFieldData.load(ctx);
66-
final SortedBinaryDocValues uidByteData = leafData.getBytesValues();
67-
if (uidByteData == null) throw new NullPointerException("failed to get uid byte data");
58+
final SortedBinaryDocValues values;
59+
if (fieldData != null) {
60+
AtomicFieldData leafData = fieldData.load(ctx);
61+
values = leafData.getBytesValues();
62+
if (values == null) throw new NullPointerException("failed to get fielddata");
63+
} else {
64+
values = null;
65+
}
6866

6967
return new LeafScoreFunction() {
7068

7169
@Override
7270
public double score(int docId, float subQueryScore) throws IOException {
73-
if (uidByteData.advanceExact(docId) == false) {
74-
throw new AssertionError("Document without a _uid");
71+
int hash;
72+
if (values == null) {
73+
hash = BitMixer.mix(ctx.docBase + docId);
74+
} else if (values.advanceExact(docId)) {
75+
hash = StringHelper.murmurhash3_x86_32(values.nextValue(), saltedSeed);
76+
} else {
77+
// field has no value
78+
hash = saltedSeed;
7579
}
76-
int hash = StringHelper.murmurhash3_x86_32(uidByteData.nextValue(), saltedSeed);
7780
return (hash & 0x00FFFFFF) / (float)(1 << 24); // only use the lower 24 bits to construct a float from 0.0-1.0
7881
}
7982

8083
@Override
8184
public Explanation explainScore(int docId, Explanation subQueryScore) throws IOException {
85+
String field = fieldData == null ? null : fieldData.getFieldName();
8286
return Explanation.match(
8387
CombineFunction.toFloat(score(docId, subQueryScore.getValue())),
84-
"random score function (seed: " + originalSeed + ")");
88+
"random score function (seed: " + originalSeed + ", field: " + field + ")");
8589
}
8690
};
8791
}
@@ -94,8 +98,8 @@ public boolean needsScores() {
9498
@Override
9599
protected boolean doEquals(ScoreFunction other) {
96100
RandomScoreFunction randomScoreFunction = (RandomScoreFunction) other;
97-
return this.originalSeed == randomScoreFunction.originalSeed &&
98-
this.saltedSeed == randomScoreFunction.saltedSeed;
101+
return this.originalSeed == randomScoreFunction.originalSeed
102+
&& this.saltedSeed == randomScoreFunction.saltedSeed;
99103
}
100104

101105
@Override

core/src/main/java/org/elasticsearch/index/query/QueryShardContext.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ private ParsedQuery toQuery(QueryBuilder queryBuilder, CheckedFunction<QueryBuil
315315
}
316316
}
317317

318-
public final Index index() {
318+
public Index index() {
319319
return indexSettings.getIndex();
320320
}
321321

core/src/main/java/org/elasticsearch/index/query/functionscore/RandomScoreFunctionBuilder.java

Lines changed: 67 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,16 @@
1818
*/
1919
package org.elasticsearch.index.query.functionscore;
2020

21+
import org.elasticsearch.Version;
2122
import org.elasticsearch.common.ParsingException;
2223
import org.elasticsearch.common.io.stream.StreamInput;
2324
import org.elasticsearch.common.io.stream.StreamOutput;
25+
import org.elasticsearch.common.logging.DeprecationLogger;
26+
import org.elasticsearch.common.logging.Loggers;
2427
import org.elasticsearch.common.lucene.search.function.RandomScoreFunction;
2528
import org.elasticsearch.common.lucene.search.function.ScoreFunction;
2629
import org.elasticsearch.common.xcontent.XContentBuilder;
2730
import org.elasticsearch.common.xcontent.XContentParser;
28-
import org.elasticsearch.index.fielddata.IndexFieldData;
2931
import org.elasticsearch.index.mapper.IdFieldMapper;
3032
import org.elasticsearch.index.mapper.MappedFieldType;
3133
import org.elasticsearch.index.mapper.UidFieldMapper;
@@ -38,7 +40,11 @@
3840
* A function that computes a random score for the matched documents
3941
*/
4042
public class RandomScoreFunctionBuilder extends ScoreFunctionBuilder<RandomScoreFunctionBuilder> {
43+
44+
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(RandomScoreFunctionBuilder.class));
45+
4146
public static final String NAME = "random_score";
47+
private String field;
4248
private Integer seed;
4349

4450
public RandomScoreFunctionBuilder() {
@@ -52,6 +58,9 @@ public RandomScoreFunctionBuilder(StreamInput in) throws IOException {
5258
if (in.readBoolean()) {
5359
seed = in.readInt();
5460
}
61+
if (in.getVersion().onOrAfter(Version.V_6_0_0_alpha3)) {
62+
field = in.readOptionalString();
63+
}
5564
}
5665

5766
@Override
@@ -62,6 +71,9 @@ protected void doWriteTo(StreamOutput out) throws IOException {
6271
} else {
6372
out.writeBoolean(false);
6473
}
74+
if (out.getVersion().onOrAfter(Version.V_6_0_0_alpha3)) {
75+
out.writeOptionalString(field);
76+
}
6577
}
6678

6779
@Override
@@ -105,12 +117,33 @@ public Integer getSeed() {
105117
return seed;
106118
}
107119

120+
/**
121+
* Set the field to be used for random number generation. This parameter is compulsory
122+
* when a {@link #seed(int) seed} is set and ignored otherwise. Note that documents that
123+
* have the same value for a field will get the same score.
124+
*/
125+
public RandomScoreFunctionBuilder setField(String field) {
126+
this.field = field;
127+
return this;
128+
}
129+
130+
/**
131+
* Get the field to use for random number generation.
132+
* @see #setField(String)
133+
*/
134+
public String getField() {
135+
return field;
136+
}
137+
108138
@Override
109139
public void doXContent(XContentBuilder builder, Params params) throws IOException {
110140
builder.startObject(getName());
111141
if (seed != null) {
112142
builder.field("seed", seed);
113143
}
144+
if (field != null) {
145+
builder.field("field", field);
146+
}
114147
builder.endObject();
115148
}
116149

@@ -126,19 +159,39 @@ protected int doHashCode() {
126159

127160
@Override
128161
protected ScoreFunction doToFunction(QueryShardContext context) {
129-
final MappedFieldType fieldType;
130-
if (context.getIndexSettings().isSingleType()) {
131-
fieldType = context.getMapperService().fullName(IdFieldMapper.NAME);
162+
final int salt = (context.index().getName().hashCode() << 10) | context.getShardId();
163+
if (seed == null) {
164+
// DocID-based random score generation
165+
return new RandomScoreFunction(hash(context.nowInMillis()), salt, null);
132166
} else {
133-
fieldType = context.getMapperService().fullName(UidFieldMapper.NAME);
134-
}
135-
if (fieldType == null) {
136-
// mapper could be null if we are on a shard with no docs yet, so this won't actually be used
137-
return new RandomScoreFunction();
167+
final MappedFieldType fieldType;
168+
if (field != null) {
169+
fieldType = context.getMapperService().fullName(field);
170+
} else {
171+
DEPRECATION_LOGGER.deprecated(
172+
"As of version 7.0 Elasticsearch will require that a [field] parameter is provided when a [seed] is set");
173+
if (context.getIndexSettings().isSingleType()) {
174+
fieldType = context.getMapperService().fullName(IdFieldMapper.NAME);
175+
} else {
176+
fieldType = context.getMapperService().fullName(UidFieldMapper.NAME);
177+
}
178+
}
179+
if (fieldType == null) {
180+
if (context.getMapperService().types().isEmpty()) {
181+
// no mappings: the index is empty anyway
182+
return new RandomScoreFunction(hash(context.nowInMillis()), salt, null);
183+
}
184+
throw new IllegalArgumentException("Field [" + field + "] is not mapped on [" + context.index() +
185+
"] and cannot be used as a source of random numbers.");
186+
}
187+
int seed;
188+
if (this.seed != null) {
189+
seed = this.seed;
190+
} else {
191+
seed = hash(context.nowInMillis());
192+
}
193+
return new RandomScoreFunction(seed, salt, context.getForField(fieldType));
138194
}
139-
final int salt = (context.index().getName().hashCode() << 10) | context.getShardId();
140-
final IndexFieldData<?> uidFieldData = context.getForField(fieldType);
141-
return new RandomScoreFunction(this.seed == null ? hash(context.nowInMillis()) : seed, salt, uidFieldData);
142195
}
143196

144197
private static int hash(long value) {
@@ -170,6 +223,8 @@ public static RandomScoreFunctionBuilder fromXContent(XContentParser parser)
170223
throw new ParsingException(parser.getTokenLocation(), "random_score seed must be an int/long or string, not '"
171224
+ token.toString() + "'");
172225
}
226+
} else if ("field".equals(currentFieldName)) {
227+
randomScoreFunctionBuilder.setField(parser.text());
173228
} else {
174229
throw new ParsingException(parser.getTokenLocation(), NAME + " query does not support [" + currentFieldName + "]");
175230
}

core/src/main/java/org/elasticsearch/index/query/functionscore/ScoreFunctionBuilders.java

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -75,16 +75,8 @@ public static ScriptScoreFunctionBuilder scriptFunction(String script) {
7575
return (new ScriptScoreFunctionBuilder(new Script(ScriptType.INLINE, Script.DEFAULT_SCRIPT_LANG, script, emptyMap())));
7676
}
7777

78-
public static RandomScoreFunctionBuilder randomFunction(int seed) {
79-
return (new RandomScoreFunctionBuilder()).seed(seed);
80-
}
81-
82-
public static RandomScoreFunctionBuilder randomFunction(long seed) {
83-
return (new RandomScoreFunctionBuilder()).seed(seed);
84-
}
85-
86-
public static RandomScoreFunctionBuilder randomFunction(String seed) {
87-
return (new RandomScoreFunctionBuilder()).seed(seed);
78+
public static RandomScoreFunctionBuilder randomFunction() {
79+
return new RandomScoreFunctionBuilder();
8880
}
8981

9082
public static WeightBuilder weightFactorFunction(float weight) {

core/src/test/java/org/elasticsearch/index/query/functionscore/FunctionScoreQueryBuilderTests.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import org.elasticsearch.common.xcontent.XContent;
3939
import org.elasticsearch.common.xcontent.XContentParser;
4040
import org.elasticsearch.common.xcontent.XContentType;
41+
import org.elasticsearch.index.mapper.SeqNoFieldMapper;
4142
import org.elasticsearch.index.query.AbstractQueryBuilder;
4243
import org.elasticsearch.index.query.MatchAllQueryBuilder;
4344
import org.elasticsearch.index.query.QueryBuilder;
@@ -191,6 +192,7 @@ private static ScoreFunctionBuilder<?> randomScoreFunction() {
191192
} else {
192193
randomScoreFunctionBuilder.seed(randomAlphaOfLengthBetween(1, 10));
193194
}
195+
randomScoreFunctionBuilder.setField(SeqNoFieldMapper.NAME); // guaranteed to exist
194196
}
195197
functionBuilder = randomScoreFunctionBuilder;
196198
break;
@@ -270,14 +272,14 @@ public void testIllegalArguments() {
270272
expectThrows(IllegalArgumentException.class, () -> new FunctionScoreQueryBuilder((QueryBuilder) null));
271273
expectThrows(IllegalArgumentException.class, () -> new FunctionScoreQueryBuilder((ScoreFunctionBuilder<?>) null));
272274
expectThrows(IllegalArgumentException.class, () -> new FunctionScoreQueryBuilder((FilterFunctionBuilder[]) null));
273-
expectThrows(IllegalArgumentException.class, () -> new FunctionScoreQueryBuilder(null, randomFunction(123)));
275+
expectThrows(IllegalArgumentException.class, () -> new FunctionScoreQueryBuilder(null, randomFunction()));
274276
expectThrows(IllegalArgumentException.class, () -> new FunctionScoreQueryBuilder(matchAllQuery(), (ScoreFunctionBuilder<?>) null));
275277
expectThrows(IllegalArgumentException.class, () -> new FunctionScoreQueryBuilder(matchAllQuery(), (FilterFunctionBuilder[]) null));
276278
expectThrows(IllegalArgumentException.class, () -> new FunctionScoreQueryBuilder(null, new FilterFunctionBuilder[0]));
277279
expectThrows(IllegalArgumentException.class,
278280
() -> new FunctionScoreQueryBuilder(matchAllQuery(), new FilterFunctionBuilder[] { null }));
279281
expectThrows(IllegalArgumentException.class, () -> new FilterFunctionBuilder((ScoreFunctionBuilder<?>) null));
280-
expectThrows(IllegalArgumentException.class, () -> new FilterFunctionBuilder(null, randomFunction(123)));
282+
expectThrows(IllegalArgumentException.class, () -> new FilterFunctionBuilder(null, randomFunction()));
281283
expectThrows(IllegalArgumentException.class, () -> new FilterFunctionBuilder(matchAllQuery(), null));
282284
FunctionScoreQueryBuilder builder = new FunctionScoreQueryBuilder(matchAllQuery());
283285
expectThrows(IllegalArgumentException.class, () -> builder.scoreMode(null));

core/src/test/java/org/elasticsearch/index/query/functionscore/FunctionScoreTests.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ public void closeAllTheReaders() throws IOException {
283283
public void testExplainFunctionScoreQuery() throws IOException {
284284

285285
Explanation functionExplanation = getFunctionScoreExplanation(searcher, RANDOM_SCORE_FUNCTION);
286-
checkFunctionScoreExplanation(functionExplanation, "random score function (seed: 0)");
286+
checkFunctionScoreExplanation(functionExplanation, "random score function (seed: 0, field: test)");
287287
assertThat(functionExplanation.getDetails()[0].getDetails().length, equalTo(0));
288288

289289
functionExplanation = getFunctionScoreExplanation(searcher, FIELD_VALUE_FACTOR_FUNCTION);
@@ -331,7 +331,7 @@ public void checkFunctionScoreExplanation(Explanation randomExplanation, String
331331

332332
public void testExplainFiltersFunctionScoreQuery() throws IOException {
333333
Explanation functionExplanation = getFiltersFunctionScoreExplanation(searcher, RANDOM_SCORE_FUNCTION);
334-
checkFiltersFunctionScoreExplanation(functionExplanation, "random score function (seed: 0)", 0);
334+
checkFiltersFunctionScoreExplanation(functionExplanation, "random score function (seed: 0, field: test)", 0);
335335
assertThat(functionExplanation.getDetails()[0].getDetails()[0].getDetails()[1].getDetails().length, equalTo(0));
336336

337337
functionExplanation = getFiltersFunctionScoreExplanation(searcher, FIELD_VALUE_FACTOR_FUNCTION);
@@ -366,7 +366,7 @@ public void testExplainFiltersFunctionScoreQuery() throws IOException {
366366
, LIN_DECAY_FUNCTION
367367
);
368368

369-
checkFiltersFunctionScoreExplanation(functionExplanation, "random score function (seed: 0)", 0);
369+
checkFiltersFunctionScoreExplanation(functionExplanation, "random score function (seed: 0, field: test)", 0);
370370
assertThat(functionExplanation.getDetails()[0].getDetails()[0].getDetails()[1].getDetails().length, equalTo(0));
371371

372372
checkFiltersFunctionScoreExplanation(functionExplanation, "field value function: ln(doc['test'].value?:1.0 * factor=1.0)", 1);

core/src/test/java/org/elasticsearch/index/query/functionscore/ScoreFunctionBuilderTests.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,18 @@
1919

2020
package org.elasticsearch.index.query.functionscore;
2121

22+
import org.elasticsearch.Version;
23+
import org.elasticsearch.cluster.metadata.IndexMetaData;
24+
import org.elasticsearch.common.settings.Settings;
25+
import org.elasticsearch.index.IndexSettings;
26+
import org.elasticsearch.index.mapper.MappedFieldType;
27+
import org.elasticsearch.index.mapper.MapperService;
28+
import org.elasticsearch.index.mapper.NumberFieldMapper;
29+
import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType;
30+
import org.elasticsearch.index.query.QueryShardContext;
2231
import org.elasticsearch.script.Script;
2332
import org.elasticsearch.test.ESTestCase;
33+
import org.mockito.Mockito;
2434

2535
public class ScoreFunctionBuilderTests extends ESTestCase {
2636

@@ -39,4 +49,23 @@ public void testIllegalArguments() {
3949
expectThrows(IllegalArgumentException.class, () -> new ExponentialDecayFunctionBuilder("", "", null, ""));
4050
expectThrows(IllegalArgumentException.class, () -> new ExponentialDecayFunctionBuilder("", "", null, "", randomDouble()));
4151
}
52+
53+
public void testRandomScoreFunctionWithSeed() throws Exception {
54+
RandomScoreFunctionBuilder builder = new RandomScoreFunctionBuilder();
55+
builder.seed(42);
56+
QueryShardContext context = Mockito.mock(QueryShardContext.class);
57+
Settings indexSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
58+
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1).build();
59+
IndexSettings settings = new IndexSettings(IndexMetaData.builder("index").settings(indexSettings).build(), Settings.EMPTY);
60+
Mockito.when(context.index()).thenReturn(settings.getIndex());
61+
Mockito.when(context.getShardId()).thenReturn(0);
62+
Mockito.when(context.getIndexSettings()).thenReturn(settings);
63+
MapperService mapperService = Mockito.mock(MapperService.class);
64+
MappedFieldType ft = new NumberFieldMapper.NumberFieldType(NumberType.LONG);
65+
ft.setName("foo");
66+
Mockito.when(mapperService.fullName(Mockito.anyString())).thenReturn(ft);
67+
Mockito.when(context.getMapperService()).thenReturn(mapperService);
68+
builder.toFunction(context);
69+
assertWarnings("As of version 7.0 Elasticsearch will require that a [field] parameter is provided when a [seed] is set");
70+
}
4271
}

0 commit comments

Comments
 (0)