Skip to content

Commit 4407fe5

Browse files
committed
Align XCombinedFieldQuery with latest changes (elastic#75483)
In elastic#74678 we released an early fix for a Lucene bug around `combined_fields` queries with missing fields. This PR brings our fix up-to-date with what was actually committed to Lucene.
1 parent 5643c4f commit 4407fe5

File tree

3 files changed

+55
-19
lines changed

3 files changed

+55
-19
lines changed

server/src/main/java/org/apache/lucene/search/XCombinedFieldQuery.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
*/
1919
package org.apache.lucene.search;
2020

21+
import org.apache.lucene.index.FieldInfo;
22+
import org.apache.lucene.index.FieldInfos;
2123
import org.apache.lucene.index.IndexReader;
2224
import org.apache.lucene.index.LeafReaderContext;
2325
import org.apache.lucene.index.PostingsEnum;
@@ -69,6 +71,9 @@
6971
* compute norms the same way as {@link SimilarityBase#computeNorm}, which includes {@link
7072
* BM25Similarity} and {@link DFRSimilarity}. Per-field similarities are not supported.
7173
*
74+
* <p>The query also requires that either all fields or no fields have norms enabled. Having only
75+
* some fields with norms enabled can result in errors.
76+
*
7277
* <p>The scoring is based on BM25F's simple formula described in:
7378
* http://www.staff.city.ac.uk/~sb317/papers/foundations_bm25_review.pdf. This query implements the
7479
* same approach but allows other similarities besides {@link
@@ -264,6 +269,7 @@ private BooleanQuery rewriteToBoolean() {
264269
@Override
265270
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
266271
throws IOException {
272+
validateConsistentNorms(searcher.getIndexReader());
267273
if (scoreMode.needsScores()) {
268274
return new CombinedFieldWeight(this, searcher, scoreMode, boost);
269275
} else {
@@ -273,6 +279,29 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo
273279
}
274280
}
275281

282+
private void validateConsistentNorms(IndexReader reader) {
283+
boolean allFieldsHaveNorms = true;
284+
boolean noFieldsHaveNorms = true;
285+
286+
for (LeafReaderContext context : reader.leaves()) {
287+
FieldInfos fieldInfos = context.reader().getFieldInfos();
288+
for (String field : fieldAndWeights.keySet()) {
289+
FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
290+
if (fieldInfo != null) {
291+
allFieldsHaveNorms &= fieldInfo.hasNorms();
292+
noFieldsHaveNorms &= fieldInfo.omitsNorms();
293+
}
294+
}
295+
}
296+
297+
if (allFieldsHaveNorms == false && noFieldsHaveNorms == false) {
298+
throw new IllegalArgumentException(
299+
getClass().getSimpleName()
300+
+ " requires norms to be consistent across fields: some fields cannot "
301+
+ " have norms enabled, while others have norms disabled");
302+
}
303+
}
304+
276305
class CombinedFieldWeight extends Weight {
277306
private final IndexSearcher searcher;
278307
private final TermStates termStates[];

server/src/main/java/org/apache/lucene/search/XMultiNormsLeafSimScorer.java

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,9 @@
3434
* Copy of {@link MultiNormsLeafSimScorer} that contains a fix for LUCENE-9999.
3535
* TODO: remove once LUCENE-9999 is fixed and integrated
3636
*
37-
* <p>This scorer requires that either all fields or no fields have norms enabled. It will throw an
38-
* error if some fields have norms enabled, while others have norms disabled.
37+
* <p>For all fields, norms must be encoded using {@link SmallFloat#intToByte4}. This scorer also
38+
* requires that either all fields or no fields have norms enabled. Having only some fields with
39+
* norms enabled can result in errors or undefined behavior.
3940
*/
4041
final class XMultiNormsLeafSimScorer {
4142
/** Cache of decoded norms. */
@@ -69,13 +70,6 @@ final class XMultiNormsLeafSimScorer {
6970
}
7071
}
7172

72-
if (normsList.isEmpty() == false && normsList.size() != normFields.size()) {
73-
throw new IllegalArgumentException(
74-
getClass().getSimpleName()
75-
+ " requires norms to be consistent across fields: some fields cannot"
76-
+ " have norms enabled, while others have norms disabled");
77-
}
78-
7973
if (normsList.isEmpty()) {
8074
norms = null;
8175
} else if (normsList.size() == 1) {

server/src/test/java/org/apache/lucene/search/XCombinedFieldQueryTests.java

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,12 @@ public void testNormsDisabled() throws IOException {
9797
doc.add(new StringField("b", "value", Store.NO));
9898
doc.add(new TextField("c", "value", Store.NO));
9999
w.addDocument(doc);
100+
w.commit();
101+
102+
doc = new Document();
103+
doc.add(new StringField("a", "value", Store.NO));
104+
doc.add(new TextField("c", "value", Store.NO));
105+
w.addDocument(doc);
100106

101107
IndexReader reader = w.getReader();
102108
IndexSearcher searcher = newSearcher(reader);
@@ -105,19 +111,26 @@ public void testNormsDisabled() throws IOException {
105111
searcher.setSimilarity(searchSimilarity);
106112
TopScoreDocCollector collector = TopScoreDocCollector.create(10, null, 10);
107113

108-
XCombinedFieldQuery query = new XCombinedFieldQuery.Builder().addField("a", 1.0f)
109-
.addField("b", 1.0f)
110-
.addTerm(new BytesRef("value"))
111-
.build();
114+
XCombinedFieldQuery query =
115+
new XCombinedFieldQuery.Builder()
116+
.addField("a", 1.0f)
117+
.addField("b", 1.0f)
118+
.addTerm(new BytesRef("value"))
119+
.build();
112120
searcher.search(query, collector);
113121
TopDocs topDocs = collector.topDocs();
114-
assertEquals(new TotalHits(1, TotalHits.Relation.EQUAL_TO), topDocs.totalHits);
122+
assertEquals(new TotalHits(2, TotalHits.Relation.EQUAL_TO), topDocs.totalHits);
115123

116-
XCombinedFieldQuery invalidQuery = new XCombinedFieldQuery.Builder().addField("b", 1.0f)
117-
.addField("c", 1.0f)
118-
.addTerm(new BytesRef("value"))
119-
.build();
120-
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> searcher.search(invalidQuery, collector));
124+
TopScoreDocCollector invalidCollector = TopScoreDocCollector.create(10, null, 10);
125+
XCombinedFieldQuery invalidQuery =
126+
new XCombinedFieldQuery.Builder()
127+
.addField("b", 1.0f)
128+
.addField("c", 1.0f)
129+
.addTerm(new BytesRef("value"))
130+
.build();
131+
IllegalArgumentException e =
132+
expectThrows(
133+
IllegalArgumentException.class, () -> searcher.search(invalidQuery, invalidCollector));
121134
assertTrue(e.getMessage().contains("requires norms to be consistent across fields"));
122135

123136
reader.close();

0 commit comments

Comments
 (0)