Skip to content

Commit 35cba50

Browse files
committed
More Like This Query: creates only one MLT query per field for all queried items.
Previously, one MLT query per field was created for each item. One issue with this method is that the maximum number of selected terms was equal to the number of items times 'max_query_terms'. Instead, users should have direct control over the maximum number of selected terms allowed, regardless of the number of queried items. Another issue related to the previous method is that it could lead to the selection of rather uninteresting terms, that because they were found in a particular queried item. Instead, this new procedure enforces the selection of interesting terms across ALL items, not within each item. This could lead to search results where the best matching items share commonalities amongst the best characteristics of all the items. Closes #6404
1 parent c41e63c commit 35cba50

File tree

4 files changed

+33
-14
lines changed

4 files changed

+33
-14
lines changed

src/main/java/org/elasticsearch/common/lucene/search/MoreLikeThisQuery.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,13 @@
2727
import org.apache.lucene.search.similarities.DefaultSimilarity;
2828
import org.apache.lucene.search.similarities.Similarity;
2929
import org.apache.lucene.search.similarities.TFIDFSimilarity;
30+
import org.elasticsearch.common.Strings;
3031
import org.elasticsearch.common.io.FastStringReader;
3132

3233
import java.io.IOException;
3334
import java.io.Reader;
3435
import java.util.Arrays;
36+
import java.util.List;
3537
import java.util.Set;
3638

3739
/**
@@ -174,13 +176,17 @@ public String[] getLikeTexts() {
174176
}
175177

176178
public void setLikeText(String likeText) {
177-
this.likeText = new String[]{likeText};
179+
setLikeText(new String[]{likeText});
178180
}
179181

180182
public void setLikeText(String... likeText) {
181183
this.likeText = likeText;
182184
}
183185

186+
public void setLikeText(List<String> likeText) {
187+
setLikeText(likeText.toArray(Strings.EMPTY_ARRAY));
188+
}
189+
184190
public String[] getMoreLikeFields() {
185191
return moreLikeFields;
186192
}

src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
package org.elasticsearch.index.query;
2121

2222
import com.google.common.collect.Lists;
23+
import com.google.common.collect.ObjectArrays;
2324
import com.google.common.collect.Sets;
2425
import org.apache.lucene.analysis.Analyzer;
2526
import org.apache.lucene.queries.TermsFilter;
@@ -207,9 +208,11 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars
207208
}
208209
// fetching the items with multi-get
209210
List<LikeText> likeTexts = fetchService.fetch(items);
211+
// collapse the text onto the same field name
212+
Collection<LikeText> likeTextsCollapsed = collapseTextOnField(likeTexts);
210213
// right now we are just building a boolean query
211214
BooleanQuery boolQuery = new BooleanQuery();
212-
for (LikeText likeText : likeTexts) {
215+
for (LikeText likeText : likeTextsCollapsed) {
213216
addMoreLikeThis(boolQuery, mltQuery, likeText);
214217
}
215218
// exclude the items from the search
@@ -260,6 +263,19 @@ private List<String> removeUnsupportedFields(List<String> moreLikeFields, Analyz
260263
return moreLikeFields;
261264
}
262265

266+
public static Collection<LikeText> collapseTextOnField (Collection<LikeText> likeTexts) {
267+
Map<String, LikeText> collapsedTexts = new HashMap<>();
268+
for (LikeText likeText : likeTexts) {
269+
String field = likeText.field;
270+
String[] text = likeText.text;
271+
if (collapsedTexts.containsKey(field)) {
272+
text = ObjectArrays.concat(collapsedTexts.get(field).text, text, String.class);
273+
}
274+
collapsedTexts.put(field, new LikeText(field, text));
275+
}
276+
return collapsedTexts.values();
277+
}
278+
263279
private void removeUnsupportedFields(MultiGetRequest.Item item, Analyzer analyzer, boolean failOnUnsupportedField) throws IOException {
264280
item.fields((String[]) removeUnsupportedFields(Arrays.asList(item.fields()), analyzer, failOnUnsupportedField).toArray());
265281
}

src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import org.elasticsearch.action.get.MultiGetRequest;
3434
import org.elasticsearch.cache.recycler.CacheRecyclerModule;
3535
import org.elasticsearch.cluster.ClusterService;
36+
import org.elasticsearch.common.Strings;
3637
import org.elasticsearch.common.bytes.BytesArray;
3738
import org.elasticsearch.common.compress.CompressedString;
3839
import org.elasticsearch.common.inject.AbstractModule;
@@ -65,6 +66,7 @@
6566
import org.elasticsearch.index.search.geo.GeoPolygonFilter;
6667
import org.elasticsearch.index.search.geo.InMemoryGeoBoundingBoxFilter;
6768
import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService;
69+
import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService.LikeText;
6870
import org.elasticsearch.index.settings.IndexSettingsModule;
6971
import org.elasticsearch.index.similarity.SimilarityModule;
7072
import org.elasticsearch.indices.fielddata.breaker.CircuitBreakerService;
@@ -1680,27 +1682,22 @@ public void testMoreLikeThisIds() throws Exception {
16801682
MoreLikeThisQueryParser parser = (MoreLikeThisQueryParser) queryParser.queryParser("more_like_this");
16811683
parser.setFetchService(new MockMoreLikeThisFetchService());
16821684

1683-
List<MoreLikeThisFetchService.LikeText> likeTexts = new ArrayList<>();
1684-
String index = "test";
1685-
String type = "person";
1686-
for (int i = 1; i < 5; i++) {
1687-
for (String field : new String[]{"name.first", "name.last"}) {
1688-
MoreLikeThisFetchService.LikeText likeText = new MoreLikeThisFetchService.LikeText(
1689-
field, index + " " + type + " " + i + " " + field);
1690-
likeTexts.add(likeText);
1691-
}
1692-
}
1685+
List<LikeText> likeTexts = new ArrayList<>();
1686+
likeTexts.add(new LikeText("name.first", new String[]{
1687+
"test person 1 name.first", "test person 2 name.first", "test person 3 name.first", "test person 4 name.first"}));
1688+
likeTexts.add(new LikeText("name.last", new String[]{
1689+
"test person 1 name.last", "test person 2 name.last", "test person 3 name.last", "test person 4 name.last"}));
16931690

16941691
IndexQueryParserService queryParser = queryParser();
1695-
String query = copyToStringFromClasspath("/org/elasticsearch/index/query/mlt-ids.json");
1692+
String query = copyToStringFromClasspath("/org/elasticsearch/index/query/mlt-items.json");
16961693
Query parsedQuery = queryParser.parse(query).query();
16971694
assertThat(parsedQuery, instanceOf(BooleanQuery.class));
16981695
BooleanQuery booleanQuery = (BooleanQuery) parsedQuery;
16991696
assertThat(booleanQuery.getClauses().length, is(likeTexts.size() + 1));
17001697

17011698
// check each clause is for each item
17021699
BooleanClause[] boolClauses = booleanQuery.getClauses();
1703-
for (int i=0; i<likeTexts.size(); i++) {
1700+
for (int i = 0; i < likeTexts.size(); i++) {
17041701
BooleanClause booleanClause = booleanQuery.getClauses()[i];
17051702
assertThat(booleanClause.getOccur(), is(BooleanClause.Occur.SHOULD));
17061703
assertThat(booleanClause.getQuery(), instanceOf(MoreLikeThisQuery.class));

0 commit comments

Comments
 (0)