Skip to content

Commit 123a074

Browse files
committed
Fix division by zero in phrase suggester that causes assertion to fail
1 parent ba167f7 commit 123a074

File tree

2 files changed

+31
-4
lines changed

2 files changed

+31
-4
lines changed

core/src/main/java/org/elasticsearch/search/suggest/phrase/WordScorer.java

+4-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,10 @@ public WordScorer(IndexReader reader, Terms terms, String field, double realWord
5757
final long vocSize = terms.getSumTotalTermFreq();
5858
this.vocabluarySize = vocSize == -1 ? reader.maxDoc() : vocSize;
5959
this.useTotalTermFreq = vocSize != -1;
60-
this.numTerms = terms.size();
60+
long numTerms = terms.size();
61+
// -1 cannot be used as value, because scoreUnigram(...) can then divide by 0 if vocabluarySize is 1.
62+
// -1 is returned when terms is a MultiTerms instance.
63+
this.numTerms = vocabluarySize + numTerms > 1 ? numTerms : 0;
6164
this.termsEnum = new FreqTermsEnum(reader, field, !useTotalTermFreq, useTotalTermFreq, null, BigArrays.NON_RECYCLING_INSTANCE); // non recycling for now
6265
this.reader = reader;
6366
this.realWordLikelyhood = realWordLikelyHood;

core/src/test/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellCheckerTests.java

+27-3
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
2727
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
2828
import org.apache.lucene.analysis.shingle.ShingleFilter;
29+
import org.apache.lucene.analysis.standard.StandardAnalyzer;
2930
import org.apache.lucene.analysis.standard.StandardTokenizer;
3031
import org.apache.lucene.analysis.synonym.SolrSynonymParser;
3132
import org.apache.lucene.analysis.synonym.SynonymFilter;
@@ -38,16 +39,14 @@
3839
import org.apache.lucene.index.MultiFields;
3940
import org.apache.lucene.search.spell.DirectSpellChecker;
4041
import org.apache.lucene.search.spell.SuggestMode;
42+
import org.apache.lucene.store.Directory;
4143
import org.apache.lucene.store.RAMDirectory;
4244
import org.apache.lucene.util.BytesRef;
4345
import org.elasticsearch.search.suggest.phrase.NoisyChannelSpellChecker.Result;
4446
import org.elasticsearch.test.ESTestCase;
4547

46-
import java.io.BufferedReader;
4748
import java.io.IOException;
48-
import java.io.InputStreamReader;
4949
import java.io.StringReader;
50-
import java.nio.charset.StandardCharsets;
5150
import java.util.HashMap;
5251
import java.util.Map;
5352

@@ -439,4 +438,29 @@ protected TokenStreamComponents createComponents(String fieldName) {
439438
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
440439
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));
441440
}
441+
442+
public void testFewDocsEgdeCase() throws Exception {
443+
try (Directory dir = newDirectory()) {
444+
try (IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig())) {
445+
Document document = new Document();
446+
document.add(new TextField("field", "value", Field.Store.NO));
447+
iw.addDocument(document);
448+
iw.commit();
449+
document = new Document();
450+
document.add(new TextField("other_field", "value", Field.Store.NO));
451+
iw.addDocument(document);
452+
}
453+
454+
try (DirectoryReader ir = DirectoryReader.open(dir)) {
455+
WordScorer wordScorer = new StupidBackoffScorer(ir, MultiFields.getTerms(ir, "field"), "field", 0.95d, new BytesRef(" "), 0.4f);
456+
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
457+
DirectSpellChecker spellchecker = new DirectSpellChecker();
458+
DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "field", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 5);
459+
Result result = suggester.getCorrections(new StandardAnalyzer(), new BytesRef("valeu"), generator, 1, 1, ir, "field", wordScorer, 1, 2);
460+
assertThat(result.corrections.length, equalTo(1));
461+
assertThat(result.corrections[0].join(space).utf8ToString(), equalTo("value"));
462+
}
463+
}
464+
}
465+
442466
}

0 commit comments

Comments
 (0)