|
| 1 | +/* |
| 2 | + * Licensed to Elasticsearch under one or more contributor |
| 3 | + * license agreements. See the NOTICE file distributed with |
| 4 | + * this work for additional information regarding copyright |
| 5 | + * ownership. Elasticsearch licenses this file to you under |
| 6 | + * the Apache License, Version 2.0 (the "License"); you may |
| 7 | + * not use this file except in compliance with the License. |
| 8 | + * You may obtain a copy of the License at |
| 9 | + * |
| 10 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | + * |
| 12 | + * Unless required by applicable law or agreed to in writing, |
| 13 | + * software distributed under the License is distributed on an |
| 14 | + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 15 | + * KIND, either express or implied. See the License for the |
| 16 | + * specific language governing permissions and limitations |
| 17 | + * under the License. |
| 18 | + */ |
| 19 | + |
| 20 | +package org.elasticsearch.index.rankeval; |
| 21 | + |
| 22 | +import org.elasticsearch.common.Nullable; |
| 23 | +import org.elasticsearch.common.ParseField; |
| 24 | +import org.elasticsearch.common.io.stream.StreamInput; |
| 25 | +import org.elasticsearch.common.io.stream.StreamOutput; |
| 26 | +import org.elasticsearch.common.xcontent.ConstructingObjectParser; |
| 27 | +import org.elasticsearch.common.xcontent.XContentBuilder; |
| 28 | +import org.elasticsearch.common.xcontent.XContentParser; |
| 29 | +import org.elasticsearch.search.SearchHit; |
| 30 | + |
| 31 | +import java.io.IOException; |
| 32 | +import java.util.ArrayList; |
| 33 | +import java.util.List; |
| 34 | +import java.util.Objects; |
| 35 | +import java.util.Optional; |
| 36 | + |
| 37 | +import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg; |
| 38 | +import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg; |
| 39 | +import static org.elasticsearch.index.rankeval.EvaluationMetric.joinHitsWithRatings; |
| 40 | + |
| 41 | +/** |
| 42 | + * Implementation of the Expected Reciprocal Rank metric described in:<p> |
| 43 | + * |
| 44 | + * Chapelle, O., Metlzer, D., Zhang, Y., & Grinspan, P. (2009).<br> |
| 45 | + * Expected reciprocal rank for graded relevance.<br> |
| 46 | + * Proceeding of the 18th ACM Conference on Information and Knowledge Management - CIKM ’09, 621.<br> |
| 47 | + * https://doi.org/10.1145/1645953.1646033 |
| 48 | + */ |
| 49 | +public class ExpectedReciprocalRank implements EvaluationMetric { |
| 50 | + |
| 51 | + /** the default search window size */ |
| 52 | + private static final int DEFAULT_K = 10; |
| 53 | + |
| 54 | + /** the search window size */ |
| 55 | + private final int k; |
| 56 | + |
| 57 | + /** |
| 58 | + * Optional. If set, this will be the rating for docs that are unrated in the ranking evaluation request |
| 59 | + */ |
| 60 | + private final Integer unknownDocRating; |
| 61 | + |
| 62 | + private final int maxRelevance; |
| 63 | + |
| 64 | + private final double two_pow_maxRelevance; |
| 65 | + |
| 66 | + public static final String NAME = "expected_reciprocal_rank"; |
| 67 | + |
| 68 | + public ExpectedReciprocalRank(int maxRelevance) { |
| 69 | + this(maxRelevance, null, DEFAULT_K); |
| 70 | + } |
| 71 | + |
| 72 | + /** |
| 73 | + * @param maxRelevance |
| 74 | + * the maximal relevance judgment in the evaluation dataset |
| 75 | + * @param unknownDocRating |
| 76 | + * the rating for documents the user hasn't supplied an explicit |
| 77 | + * rating for. Can be {@code null}, in which case document is |
| 78 | + * skipped. |
| 79 | + * @param k |
| 80 | + * the search window size all request use. |
| 81 | + */ |
| 82 | + public ExpectedReciprocalRank(int maxRelevance, @Nullable Integer unknownDocRating, int k) { |
| 83 | + this.maxRelevance = maxRelevance; |
| 84 | + this.unknownDocRating = unknownDocRating; |
| 85 | + this.k = k; |
| 86 | + // we can pre-calculate the constant used in metric calculation |
| 87 | + this.two_pow_maxRelevance = Math.pow(2, this.maxRelevance); |
| 88 | + } |
| 89 | + |
| 90 | + ExpectedReciprocalRank(StreamInput in) throws IOException { |
| 91 | + this.maxRelevance = in.readVInt(); |
| 92 | + this.unknownDocRating = in.readOptionalVInt(); |
| 93 | + this.k = in.readVInt(); |
| 94 | + this.two_pow_maxRelevance = Math.pow(2, this.maxRelevance); |
| 95 | + } |
| 96 | + |
| 97 | + @Override |
| 98 | + public void writeTo(StreamOutput out) throws IOException { |
| 99 | + out.writeVInt(maxRelevance); |
| 100 | + out.writeOptionalVInt(unknownDocRating); |
| 101 | + out.writeVInt(k); |
| 102 | + } |
| 103 | + |
| 104 | + @Override |
| 105 | + public String getWriteableName() { |
| 106 | + return NAME; |
| 107 | + } |
| 108 | + |
| 109 | + int getK() { |
| 110 | + return this.k; |
| 111 | + } |
| 112 | + |
| 113 | + int getMaxRelevance() { |
| 114 | + return this.maxRelevance; |
| 115 | + } |
| 116 | + |
| 117 | + /** |
| 118 | + * get the rating used for unrated documents |
| 119 | + */ |
| 120 | + public Integer getUnknownDocRating() { |
| 121 | + return this.unknownDocRating; |
| 122 | + } |
| 123 | + |
| 124 | + |
| 125 | + @Override |
| 126 | + public Optional<Integer> forcedSearchSize() { |
| 127 | + return Optional.of(k); |
| 128 | + } |
| 129 | + |
| 130 | + @Override |
| 131 | + public EvalQueryQuality evaluate(String taskId, SearchHit[] hits, List<RatedDocument> ratedDocs) { |
| 132 | + List<RatedSearchHit> ratedHits = joinHitsWithRatings(hits, ratedDocs); |
| 133 | + if (ratedHits.size() > this.k) { |
| 134 | + ratedHits = ratedHits.subList(0, k); |
| 135 | + } |
| 136 | + List<Integer> ratingsInSearchHits = new ArrayList<>(ratedHits.size()); |
| 137 | + int unratedResults = 0; |
| 138 | + for (RatedSearchHit hit : ratedHits) { |
| 139 | + // unknownDocRating might be null, in which case unrated will be ignored in the calculation. |
| 140 | + // we still need to add them as a placeholder so the rank of the subsequent ratings is correct |
| 141 | + ratingsInSearchHits.add(hit.getRating().orElse(unknownDocRating)); |
| 142 | + if (hit.getRating().isPresent() == false) { |
| 143 | + unratedResults++; |
| 144 | + } |
| 145 | + } |
| 146 | + |
| 147 | + double p = 1; |
| 148 | + double err = 0; |
| 149 | + int rank = 1; |
| 150 | + for (Integer rating : ratingsInSearchHits) { |
| 151 | + if (rating != null) { |
| 152 | + double probR = probabilityOfRelevance(rating); |
| 153 | + err = err + (p * probR / rank); |
| 154 | + p = p * (1 - probR); |
| 155 | + } |
| 156 | + rank++; |
| 157 | + } |
| 158 | + |
| 159 | + EvalQueryQuality evalQueryQuality = new EvalQueryQuality(taskId, err); |
| 160 | + evalQueryQuality.addHitsAndRatings(ratedHits); |
| 161 | + evalQueryQuality.setMetricDetails(new Detail(unratedResults)); |
| 162 | + return evalQueryQuality; |
| 163 | + } |
| 164 | + |
| 165 | + double probabilityOfRelevance(Integer rating) { |
| 166 | + return (Math.pow(2, rating) - 1) / this.two_pow_maxRelevance; |
| 167 | + } |
| 168 | + |
| 169 | + private static final ParseField K_FIELD = new ParseField("k"); |
| 170 | + private static final ParseField UNKNOWN_DOC_RATING_FIELD = new ParseField("unknown_doc_rating"); |
| 171 | + private static final ParseField MAX_RELEVANCE_FIELD = new ParseField("maximum_relevance"); |
| 172 | + private static final ConstructingObjectParser<ExpectedReciprocalRank, Void> PARSER = new ConstructingObjectParser<>("dcg", false, |
| 173 | + args -> { |
| 174 | + int maxRelevance = (Integer) args[0]; |
| 175 | + Integer optK = (Integer) args[2]; |
| 176 | + return new ExpectedReciprocalRank(maxRelevance, (Integer) args[1], |
| 177 | + optK == null ? DEFAULT_K : optK); |
| 178 | + }); |
| 179 | + |
| 180 | + |
| 181 | + static { |
| 182 | + PARSER.declareInt(constructorArg(), MAX_RELEVANCE_FIELD); |
| 183 | + PARSER.declareInt(optionalConstructorArg(), UNKNOWN_DOC_RATING_FIELD); |
| 184 | + PARSER.declareInt(optionalConstructorArg(), K_FIELD); |
| 185 | + } |
| 186 | + |
| 187 | + public static ExpectedReciprocalRank fromXContent(XContentParser parser) { |
| 188 | + return PARSER.apply(parser, null); |
| 189 | + } |
| 190 | + |
| 191 | + @Override |
| 192 | + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { |
| 193 | + builder.startObject(); |
| 194 | + builder.startObject(NAME); |
| 195 | + builder.field(MAX_RELEVANCE_FIELD.getPreferredName(), this.maxRelevance); |
| 196 | + if (unknownDocRating != null) { |
| 197 | + builder.field(UNKNOWN_DOC_RATING_FIELD.getPreferredName(), this.unknownDocRating); |
| 198 | + } |
| 199 | + builder.field(K_FIELD.getPreferredName(), this.k); |
| 200 | + builder.endObject(); |
| 201 | + builder.endObject(); |
| 202 | + return builder; |
| 203 | + } |
| 204 | + |
| 205 | + @Override |
| 206 | + public final boolean equals(Object obj) { |
| 207 | + if (this == obj) { |
| 208 | + return true; |
| 209 | + } |
| 210 | + if (obj == null || getClass() != obj.getClass()) { |
| 211 | + return false; |
| 212 | + } |
| 213 | + ExpectedReciprocalRank other = (ExpectedReciprocalRank) obj; |
| 214 | + return this.k == other.k && |
| 215 | + this.maxRelevance == other.maxRelevance |
| 216 | + && Objects.equals(unknownDocRating, other.unknownDocRating); |
| 217 | + } |
| 218 | + |
| 219 | + @Override |
| 220 | + public final int hashCode() { |
| 221 | + return Objects.hash(unknownDocRating, k, maxRelevance); |
| 222 | + } |
| 223 | + |
| 224 | + public static final class Detail implements MetricDetail { |
| 225 | + |
| 226 | + private static ParseField UNRATED_FIELD = new ParseField("unrated_docs"); |
| 227 | + private final int unratedDocs; |
| 228 | + |
| 229 | + Detail(int unratedDocs) { |
| 230 | + this.unratedDocs = unratedDocs; |
| 231 | + } |
| 232 | + |
| 233 | + Detail(StreamInput in) throws IOException { |
| 234 | + this.unratedDocs = in.readVInt(); |
| 235 | + } |
| 236 | + |
| 237 | + @Override |
| 238 | + public |
| 239 | + String getMetricName() { |
| 240 | + return NAME; |
| 241 | + } |
| 242 | + |
| 243 | + @Override |
| 244 | + public XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException { |
| 245 | + return builder.field(UNRATED_FIELD.getPreferredName(), this.unratedDocs); |
| 246 | + } |
| 247 | + |
| 248 | + private static final ConstructingObjectParser<Detail, Void> PARSER = new ConstructingObjectParser<>(NAME, true, args -> { |
| 249 | + return new Detail((Integer) args[0]); |
| 250 | + }); |
| 251 | + |
| 252 | + static { |
| 253 | + PARSER.declareInt(constructorArg(), UNRATED_FIELD); |
| 254 | + } |
| 255 | + |
| 256 | + public static Detail fromXContent(XContentParser parser) { |
| 257 | + return PARSER.apply(parser, null); |
| 258 | + } |
| 259 | + |
| 260 | + @Override |
| 261 | + public void writeTo(StreamOutput out) throws IOException { |
| 262 | + out.writeVInt(this.unratedDocs); |
| 263 | + } |
| 264 | + |
| 265 | + @Override |
| 266 | + public String getWriteableName() { |
| 267 | + return NAME; |
| 268 | + } |
| 269 | + |
| 270 | + /** |
| 271 | + * @return the number of unrated documents in the search results |
| 272 | + */ |
| 273 | + public Object getUnratedDocs() { |
| 274 | + return this.unratedDocs; |
| 275 | + } |
| 276 | + |
| 277 | + @Override |
| 278 | + public boolean equals(Object obj) { |
| 279 | + if (this == obj) { |
| 280 | + return true; |
| 281 | + } |
| 282 | + if (obj == null || getClass() != obj.getClass()) { |
| 283 | + return false; |
| 284 | + } |
| 285 | + ExpectedReciprocalRank.Detail other = (ExpectedReciprocalRank.Detail) obj; |
| 286 | + return this.unratedDocs == other.unratedDocs; |
| 287 | + } |
| 288 | + |
| 289 | + @Override |
| 290 | + public int hashCode() { |
| 291 | + return Objects.hash(this.unratedDocs); |
| 292 | + } |
| 293 | + } |
| 294 | +} |
| 295 | + |
0 commit comments