Skip to content

Commit 4ae4ac0

Browse files
author
Christoph Büscher
authored
Add Expected Reciprocal Rank metric (#31891)
This change adds Expected Reciprocal Rank (ERR) as a ranking evaluation metric as descriped in: Chapelle, O., Metlzer, D., Zhang, Y., & Grinspan, P. (2009). Expected reciprocal rank for graded relevance. Proceeding of the 18th ACM Conference on Information and Knowledge Management. https://doi.org/10.1145/1645953.1646033 ERR is an extension of the classical reciprocal rank to the graded relevance case and assumes a cascade browsing model. It quantifies the usefulness of a document at rank `i` conditioned on the degree of relevance of the items at ranks less than `i`. ERR seems to be gain traction as an alternative to (n)DCG, so it seems like a good metric to support. Also ERR seems to be the default optimization metric used for training in RankLib, a widely used learning to rank library. Relates to #29653
1 parent 6fcd606 commit 4ae4ac0

File tree

5 files changed

+522
-9
lines changed

5 files changed

+522
-9
lines changed

libs/x-content/src/main/java/org/elasticsearch/common/xcontent/ConstructingObjectParser.java

+1
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ public <T> void declareNamedObjects(BiConsumer<Value, List<T>> consumer, NamedOb
294294
}
295295
}
296296

297+
@Override
297298
public String getName() {
298299
return objectParser.getName();
299300
}

modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGain.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -326,9 +326,9 @@ public boolean equals(Object obj) {
326326
return false;
327327
}
328328
DiscountedCumulativeGain.Detail other = (DiscountedCumulativeGain.Detail) obj;
329-
return (this.dcg == other.dcg &&
330-
this.idcg == other.idcg &&
331-
this.unratedDocs == other.unratedDocs);
329+
return Double.compare(this.dcg, other.dcg) == 0 &&
330+
Double.compare(this.idcg, other.idcg) == 0 &&
331+
this.unratedDocs == other.unratedDocs;
332332
}
333333

334334
@Override
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,295 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.index.rankeval;
21+
22+
import org.elasticsearch.common.Nullable;
23+
import org.elasticsearch.common.ParseField;
24+
import org.elasticsearch.common.io.stream.StreamInput;
25+
import org.elasticsearch.common.io.stream.StreamOutput;
26+
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
27+
import org.elasticsearch.common.xcontent.XContentBuilder;
28+
import org.elasticsearch.common.xcontent.XContentParser;
29+
import org.elasticsearch.search.SearchHit;
30+
31+
import java.io.IOException;
32+
import java.util.ArrayList;
33+
import java.util.List;
34+
import java.util.Objects;
35+
import java.util.Optional;
36+
37+
import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg;
38+
import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg;
39+
import static org.elasticsearch.index.rankeval.EvaluationMetric.joinHitsWithRatings;
40+
41+
/**
42+
* Implementation of the Expected Reciprocal Rank metric described in:<p>
43+
*
44+
* Chapelle, O., Metlzer, D., Zhang, Y., &amp; Grinspan, P. (2009).<br>
45+
* Expected reciprocal rank for graded relevance.<br>
46+
* Proceeding of the 18th ACM Conference on Information and Knowledge Management - CIKM ’09, 621.<br>
47+
* https://doi.org/10.1145/1645953.1646033
48+
*/
49+
public class ExpectedReciprocalRank implements EvaluationMetric {
50+
51+
/** the default search window size */
52+
private static final int DEFAULT_K = 10;
53+
54+
/** the search window size */
55+
private final int k;
56+
57+
/**
58+
* Optional. If set, this will be the rating for docs that are unrated in the ranking evaluation request
59+
*/
60+
private final Integer unknownDocRating;
61+
62+
private final int maxRelevance;
63+
64+
private final double two_pow_maxRelevance;
65+
66+
public static final String NAME = "expected_reciprocal_rank";
67+
68+
public ExpectedReciprocalRank(int maxRelevance) {
69+
this(maxRelevance, null, DEFAULT_K);
70+
}
71+
72+
/**
73+
* @param maxRelevance
74+
* the maximal relevance judgment in the evaluation dataset
75+
* @param unknownDocRating
76+
* the rating for documents the user hasn't supplied an explicit
77+
* rating for. Can be {@code null}, in which case document is
78+
* skipped.
79+
* @param k
80+
* the search window size all request use.
81+
*/
82+
public ExpectedReciprocalRank(int maxRelevance, @Nullable Integer unknownDocRating, int k) {
83+
this.maxRelevance = maxRelevance;
84+
this.unknownDocRating = unknownDocRating;
85+
this.k = k;
86+
// we can pre-calculate the constant used in metric calculation
87+
this.two_pow_maxRelevance = Math.pow(2, this.maxRelevance);
88+
}
89+
90+
ExpectedReciprocalRank(StreamInput in) throws IOException {
91+
this.maxRelevance = in.readVInt();
92+
this.unknownDocRating = in.readOptionalVInt();
93+
this.k = in.readVInt();
94+
this.two_pow_maxRelevance = Math.pow(2, this.maxRelevance);
95+
}
96+
97+
@Override
98+
public void writeTo(StreamOutput out) throws IOException {
99+
out.writeVInt(maxRelevance);
100+
out.writeOptionalVInt(unknownDocRating);
101+
out.writeVInt(k);
102+
}
103+
104+
@Override
105+
public String getWriteableName() {
106+
return NAME;
107+
}
108+
109+
int getK() {
110+
return this.k;
111+
}
112+
113+
int getMaxRelevance() {
114+
return this.maxRelevance;
115+
}
116+
117+
/**
118+
* get the rating used for unrated documents
119+
*/
120+
public Integer getUnknownDocRating() {
121+
return this.unknownDocRating;
122+
}
123+
124+
125+
@Override
126+
public Optional<Integer> forcedSearchSize() {
127+
return Optional.of(k);
128+
}
129+
130+
@Override
131+
public EvalQueryQuality evaluate(String taskId, SearchHit[] hits, List<RatedDocument> ratedDocs) {
132+
List<RatedSearchHit> ratedHits = joinHitsWithRatings(hits, ratedDocs);
133+
if (ratedHits.size() > this.k) {
134+
ratedHits = ratedHits.subList(0, k);
135+
}
136+
List<Integer> ratingsInSearchHits = new ArrayList<>(ratedHits.size());
137+
int unratedResults = 0;
138+
for (RatedSearchHit hit : ratedHits) {
139+
// unknownDocRating might be null, in which case unrated will be ignored in the calculation.
140+
// we still need to add them as a placeholder so the rank of the subsequent ratings is correct
141+
ratingsInSearchHits.add(hit.getRating().orElse(unknownDocRating));
142+
if (hit.getRating().isPresent() == false) {
143+
unratedResults++;
144+
}
145+
}
146+
147+
double p = 1;
148+
double err = 0;
149+
int rank = 1;
150+
for (Integer rating : ratingsInSearchHits) {
151+
if (rating != null) {
152+
double probR = probabilityOfRelevance(rating);
153+
err = err + (p * probR / rank);
154+
p = p * (1 - probR);
155+
}
156+
rank++;
157+
}
158+
159+
EvalQueryQuality evalQueryQuality = new EvalQueryQuality(taskId, err);
160+
evalQueryQuality.addHitsAndRatings(ratedHits);
161+
evalQueryQuality.setMetricDetails(new Detail(unratedResults));
162+
return evalQueryQuality;
163+
}
164+
165+
double probabilityOfRelevance(Integer rating) {
166+
return (Math.pow(2, rating) - 1) / this.two_pow_maxRelevance;
167+
}
168+
169+
private static final ParseField K_FIELD = new ParseField("k");
170+
private static final ParseField UNKNOWN_DOC_RATING_FIELD = new ParseField("unknown_doc_rating");
171+
private static final ParseField MAX_RELEVANCE_FIELD = new ParseField("maximum_relevance");
172+
private static final ConstructingObjectParser<ExpectedReciprocalRank, Void> PARSER = new ConstructingObjectParser<>("dcg", false,
173+
args -> {
174+
int maxRelevance = (Integer) args[0];
175+
Integer optK = (Integer) args[2];
176+
return new ExpectedReciprocalRank(maxRelevance, (Integer) args[1],
177+
optK == null ? DEFAULT_K : optK);
178+
});
179+
180+
181+
static {
182+
PARSER.declareInt(constructorArg(), MAX_RELEVANCE_FIELD);
183+
PARSER.declareInt(optionalConstructorArg(), UNKNOWN_DOC_RATING_FIELD);
184+
PARSER.declareInt(optionalConstructorArg(), K_FIELD);
185+
}
186+
187+
public static ExpectedReciprocalRank fromXContent(XContentParser parser) {
188+
return PARSER.apply(parser, null);
189+
}
190+
191+
@Override
192+
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
193+
builder.startObject();
194+
builder.startObject(NAME);
195+
builder.field(MAX_RELEVANCE_FIELD.getPreferredName(), this.maxRelevance);
196+
if (unknownDocRating != null) {
197+
builder.field(UNKNOWN_DOC_RATING_FIELD.getPreferredName(), this.unknownDocRating);
198+
}
199+
builder.field(K_FIELD.getPreferredName(), this.k);
200+
builder.endObject();
201+
builder.endObject();
202+
return builder;
203+
}
204+
205+
@Override
206+
public final boolean equals(Object obj) {
207+
if (this == obj) {
208+
return true;
209+
}
210+
if (obj == null || getClass() != obj.getClass()) {
211+
return false;
212+
}
213+
ExpectedReciprocalRank other = (ExpectedReciprocalRank) obj;
214+
return this.k == other.k &&
215+
this.maxRelevance == other.maxRelevance
216+
&& Objects.equals(unknownDocRating, other.unknownDocRating);
217+
}
218+
219+
@Override
220+
public final int hashCode() {
221+
return Objects.hash(unknownDocRating, k, maxRelevance);
222+
}
223+
224+
public static final class Detail implements MetricDetail {
225+
226+
private static ParseField UNRATED_FIELD = new ParseField("unrated_docs");
227+
private final int unratedDocs;
228+
229+
Detail(int unratedDocs) {
230+
this.unratedDocs = unratedDocs;
231+
}
232+
233+
Detail(StreamInput in) throws IOException {
234+
this.unratedDocs = in.readVInt();
235+
}
236+
237+
@Override
238+
public
239+
String getMetricName() {
240+
return NAME;
241+
}
242+
243+
@Override
244+
public XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException {
245+
return builder.field(UNRATED_FIELD.getPreferredName(), this.unratedDocs);
246+
}
247+
248+
private static final ConstructingObjectParser<Detail, Void> PARSER = new ConstructingObjectParser<>(NAME, true, args -> {
249+
return new Detail((Integer) args[0]);
250+
});
251+
252+
static {
253+
PARSER.declareInt(constructorArg(), UNRATED_FIELD);
254+
}
255+
256+
public static Detail fromXContent(XContentParser parser) {
257+
return PARSER.apply(parser, null);
258+
}
259+
260+
@Override
261+
public void writeTo(StreamOutput out) throws IOException {
262+
out.writeVInt(this.unratedDocs);
263+
}
264+
265+
@Override
266+
public String getWriteableName() {
267+
return NAME;
268+
}
269+
270+
/**
271+
* @return the number of unrated documents in the search results
272+
*/
273+
public Object getUnratedDocs() {
274+
return this.unratedDocs;
275+
}
276+
277+
@Override
278+
public boolean equals(Object obj) {
279+
if (this == obj) {
280+
return true;
281+
}
282+
if (obj == null || getClass() != obj.getClass()) {
283+
return false;
284+
}
285+
ExpectedReciprocalRank.Detail other = (ExpectedReciprocalRank.Detail) obj;
286+
return this.unratedDocs == other.unratedDocs;
287+
}
288+
289+
@Override
290+
public int hashCode() {
291+
return Objects.hash(this.unratedDocs);
292+
}
293+
}
294+
}
295+

modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGainTests.java

+6-6
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ public class DiscountedCumulativeGainTests extends ESTestCase {
5555
/**
5656
* Assuming the docs are ranked in the following order:
5757
*
58-
* rank | rel_rank | 2^(rel_rank) - 1 | log_2(rank + 1) | (2^(rel_rank) - 1) / log_2(rank + 1)
58+
* rank | relevance | 2^(relevance) - 1 | log_2(rank + 1) | (2^(relevance) - 1) / log_2(rank + 1)
5959
* -------------------------------------------------------------------------------------------
6060
* 1 | 3 | 7.0 | 1.0 | 7.0 | 7.0 | 
6161
* 2 | 2 | 3.0 | 1.5849625007211563 | 1.8927892607143721
@@ -82,7 +82,7 @@ public void testDCGAt() {
8282
* Check with normalization: to get the maximal possible dcg, sort documents by
8383
* relevance in descending order
8484
*
85-
* rank | rel_rank | 2^(rel_rank) - 1 | log_2(rank + 1) | (2^(rel_rank) - 1) / log_2(rank + 1)
85+
* rank | relevance | 2^(relevance) - 1 | log_2(rank + 1) | (2^(relevance) - 1) / log_2(rank + 1)
8686
* ---------------------------------------------------------------------------------------
8787
* 1 | 3 | 7.0 | 1.0  | 7.0
8888
* 2 | 3 | 7.0 | 1.5849625007211563 | 4.416508275000202
@@ -101,7 +101,7 @@ public void testDCGAt() {
101101
* This tests metric when some documents in the search result don't have a
102102
* rating provided by the user.
103103
*
104-
* rank | rel_rank | 2^(rel_rank) - 1 | log_2(rank + 1) | (2^(rel_rank) - 1) / log_2(rank + 1)
104+
* rank | relevance | 2^(relevance) - 1 | log_2(rank + 1) | (2^(relevance) - 1) / log_2(rank + 1)
105105
* -------------------------------------------------------------------------------------------
106106
* 1 | 3 | 7.0 | 1.0 | 7.0 2 | 
107107
* 2 | 3.0 | 1.5849625007211563 | 1.8927892607143721
@@ -134,7 +134,7 @@ public void testDCGAtSixMissingRatings() {
134134
* Check with normalization: to get the maximal possible dcg, sort documents by
135135
* relevance in descending order
136136
*
137-
* rank | rel_rank | 2^(rel_rank) - 1 | log_2(rank + 1) | (2^(rel_rank) - 1) / log_2(rank + 1)
137+
* rank | relevance | 2^(relevance) - 1 | log_2(rank + 1) | (2^(relevance) - 1) / log_2(rank + 1)
138138
* ----------------------------------------------------------------------------------------
139139
* 1 | 3 | 7.0 | 1.0  | 7.0
140140
* 2 | 3 | 7.0 | 1.5849625007211563 | 4.416508275000202
@@ -154,7 +154,7 @@ public void testDCGAtSixMissingRatings() {
154154
* documents than search hits because we restrict DCG to be calculated at the
155155
* fourth position
156156
*
157-
* rank | rel_rank | 2^(rel_rank) - 1 | log_2(rank + 1) | (2^(rel_rank) - 1) / log_2(rank + 1)
157+
* rank | relevance | 2^(relevance) - 1 | log_2(rank + 1) | (2^(relevance) - 1) / log_2(rank + 1)
158158
* -------------------------------------------------------------------------------------------
159159
* 1 | 3 | 7.0 | 1.0 | 7.0 2 | 
160160
* 2 | 3.0 | 1.5849625007211563 | 1.8927892607143721
@@ -191,7 +191,7 @@ public void testDCGAtFourMoreRatings() {
191191
* Check with normalization: to get the maximal possible dcg, sort documents by
192192
* relevance in descending order
193193
*
194-
* rank | rel_rank | 2^(rel_rank) - 1 | log_2(rank + 1) | (2^(rel_rank) - 1) / log_2(rank + 1)
194+
* rank | relevance | 2^(relevance) - 1 | log_2(rank + 1) | (2^(relevance) - 1) / log_2(rank + 1)
195195
* ---------------------------------------------------------------------------------------
196196
* 1 | 3 | 7.0 | 1.0  | 7.0
197197
* 2 | 3 | 7.0 | 1.5849625007211563 | 4.416508275000202

0 commit comments

Comments
 (0)