Skip to content

Commit 640fda3

Browse files
author
Christoph Büscher
committed
BlendedTermQuery's equals method should consider boosts (#48193)
This changes the queries equals() method so that the boost factors for each term are considered for the equality calculation. This means queries are only equal if both their terms and associated boosts match. The ordering of the terms doesn't matter as before, which is why we internally need to sort the terms and boost for comparison on the first equals() call like before. Boosts that are `null` are considered equal to boosts of 1.0f because topLevelQuery() will only wrap into BoostQuery if boost is not null and different from 1f. Closes #48184
1 parent aefebb2 commit 640fda3

File tree

2 files changed

+132
-16
lines changed

2 files changed

+132
-16
lines changed

server/src/main/java/org/apache/lucene/queries/BlendedTermQuery.java

Lines changed: 61 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@
6060
* which is the minimum number of documents the terms occurs in.
6161
* </p>
6262
*/
63-
// TODO maybe contribute to Lucene
6463
public abstract class BlendedTermQuery extends Query {
6564

6665
private final Term[] terms;
@@ -246,36 +245,82 @@ public String toString(String field) {
246245
return builder.toString();
247246
}
248247

249-
private volatile Term[] equalTerms = null;
248+
private class TermAndBoost implements Comparable<TermAndBoost> {
249+
protected final Term term;
250+
protected float boost;
250251

251-
private Term[] equalsTerms() {
252-
if (terms.length == 1) {
253-
return terms;
252+
protected TermAndBoost(Term term, float boost) {
253+
this.term = term;
254+
this.boost = boost;
255+
}
256+
257+
@Override
258+
public int compareTo(TermAndBoost other) {
259+
int compareTo = term.compareTo(other.term);
260+
if (compareTo == 0) {
261+
compareTo = Float.compare(boost, other.boost);
262+
}
263+
return compareTo;
264+
}
265+
266+
@Override
267+
public boolean equals(Object o) {
268+
if (this == o) {
269+
return true;
270+
}
271+
if (o instanceof TermAndBoost == false) {
272+
return false;
273+
}
274+
275+
TermAndBoost that = (TermAndBoost) o;
276+
return term.equals(that.term) && (Float.compare(boost, that.boost) == 0);
277+
}
278+
279+
@Override
280+
public int hashCode() {
281+
return 31 * term.hashCode() + Float.hashCode(boost);
254282
}
255-
if (equalTerms == null) {
283+
}
284+
285+
private volatile TermAndBoost[] equalTermsAndBoosts = null;
286+
287+
private TermAndBoost[] equalsTermsAndBoosts() {
288+
if (equalTermsAndBoosts != null) {
289+
return equalTermsAndBoosts;
290+
}
291+
if (terms.length == 1) {
292+
float boost = (boosts != null ? boosts[0] : 1f);
293+
equalTermsAndBoosts = new TermAndBoost[] {new TermAndBoost(terms[0], boost)};
294+
} else {
256295
// sort the terms to make sure equals and hashCode are consistent
257296
// this should be a very small cost and equivalent to a HashSet but less object creation
258-
final Term[] t = new Term[terms.length];
259-
System.arraycopy(terms, 0, t, 0, terms.length);
260-
ArrayUtil.timSort(t);
261-
equalTerms = t;
297+
equalTermsAndBoosts = new TermAndBoost[terms.length];
298+
for (int i = 0; i < terms.length; i++) {
299+
float boost = (boosts != null ? boosts[i] : 1f);
300+
equalTermsAndBoosts[i] = new TermAndBoost(terms[i], boost);
301+
}
302+
ArrayUtil.timSort(equalTermsAndBoosts);
262303
}
263-
return equalTerms;
264-
304+
return equalTermsAndBoosts;
265305
}
266306

267307
@Override
268308
public boolean equals(Object o) {
269-
if (this == o) return true;
270-
if (sameClassAs(o) == false) return false;
309+
if (this == o) {
310+
return true;
311+
}
312+
if (sameClassAs(o) == false) {
313+
return false;
314+
}
271315

272316
BlendedTermQuery that = (BlendedTermQuery) o;
273-
return Arrays.equals(equalsTerms(), that.equalsTerms());
317+
return Arrays.equals(equalsTermsAndBoosts(), that.equalsTermsAndBoosts());
318+
274319
}
275320

276321
@Override
277322
public int hashCode() {
278-
return Objects.hash(classHash(), Arrays.hashCode(equalsTerms()));
323+
return Objects.hash(classHash(), Arrays.hashCode(equalsTermsAndBoosts()));
279324
}
280325

281326
/**

server/src/test/java/org/apache/lucene/queries/BlendedTermQueryTests.java

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@
4444
import org.apache.lucene.search.similarities.Similarity;
4545
import org.apache.lucene.store.Directory;
4646
import org.elasticsearch.test.ESTestCase;
47+
import org.elasticsearch.test.EqualsHashCodeTestUtils;
48+
import org.elasticsearch.test.EqualsHashCodeTestUtils.CopyFunction;
49+
import org.elasticsearch.test.EqualsHashCodeTestUtils.MutateFunction;
4750

4851
import java.io.IOException;
4952
import java.util.Arrays;
@@ -257,4 +260,72 @@ public void testMinTTF() throws IOException {
257260
w.close();
258261
dir.close();
259262
}
263+
264+
public void testEqualsAndHash() {
265+
String[] fields = new String[1 + random().nextInt(10)];
266+
for (int i = 0; i < fields.length; i++) {
267+
fields[i] = randomRealisticUnicodeOfLengthBetween(1, 10);
268+
}
269+
String term = randomRealisticUnicodeOfLengthBetween(1, 10);
270+
Term[] terms = toTerms(fields, term);
271+
float tieBreaker = randomFloat();
272+
final float[] boosts;
273+
if (randomBoolean()) {
274+
boosts = new float[terms.length];
275+
for (int i = 0; i < terms.length; i++) {
276+
boosts[i] = randomFloat();
277+
}
278+
} else {
279+
boosts = null;
280+
}
281+
282+
BlendedTermQuery original = BlendedTermQuery.dismaxBlendedQuery(terms, boosts, tieBreaker);
283+
CopyFunction<BlendedTermQuery> copyFunction = org -> {
284+
Term[] termsCopy = new Term[terms.length];
285+
System.arraycopy(terms, 0, termsCopy, 0, terms.length);
286+
287+
float[] boostsCopy = null;
288+
if (boosts != null) {
289+
boostsCopy = new float[boosts.length];
290+
System.arraycopy(boosts, 0, boostsCopy, 0, terms.length);
291+
}
292+
if (randomBoolean() && terms.length > 1) {
293+
// if we swap two elements, the resulting query should still be regarded as equal
294+
int swapPos = randomIntBetween(1, terms.length - 1);
295+
296+
Term swpTerm = termsCopy[0];
297+
termsCopy[0] = termsCopy[swapPos];
298+
termsCopy[swapPos] = swpTerm;
299+
300+
if (boosts != null) {
301+
float swpBoost = boostsCopy[0];
302+
boostsCopy[0] = boostsCopy[swapPos];
303+
boostsCopy[swapPos] = swpBoost;
304+
}
305+
}
306+
return BlendedTermQuery.dismaxBlendedQuery(termsCopy, boostsCopy, tieBreaker);
307+
};
308+
MutateFunction<BlendedTermQuery> mutateFunction = org -> {
309+
if (randomBoolean()) {
310+
Term[] termsCopy = new Term[terms.length];
311+
System.arraycopy(terms, 0, termsCopy, 0, terms.length);
312+
termsCopy[randomIntBetween(0, terms.length - 1)] = new Term(randomAlphaOfLength(10), randomAlphaOfLength(10));
313+
return BlendedTermQuery.dismaxBlendedQuery(termsCopy, boosts, tieBreaker);
314+
} else {
315+
float[] boostsCopy = null;
316+
if (boosts != null) {
317+
boostsCopy = new float[boosts.length];
318+
System.arraycopy(boosts, 0, boostsCopy, 0, terms.length);
319+
boostsCopy[randomIntBetween(0, terms.length - 1)] = randomFloat();
320+
} else {
321+
boostsCopy = new float[terms.length];
322+
for (int i = 0; i < terms.length; i++) {
323+
boostsCopy[i] = randomFloat();
324+
}
325+
}
326+
return BlendedTermQuery.dismaxBlendedQuery(terms, boostsCopy, tieBreaker);
327+
}
328+
};
329+
EqualsHashCodeTestUtils.checkEqualsAndHashCode(original, copyFunction, mutateFunction );
330+
}
260331
}

0 commit comments

Comments
 (0)