Skip to content

Commit 7128bf4

Browse files
Add second level of field collapsing (#31808)
Put second level collapse under inner_hits Closes #24855
1 parent 63e45c8 commit 7128bf4

File tree

4 files changed

+296
-4
lines changed

4 files changed

+296
-4
lines changed

docs/reference/search/request/collapse.asciidoc

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,3 +116,105 @@ The default is based on the number of data nodes and the default search thread p
116116

117117
WARNING: `collapse` cannot be used in conjunction with <<search-request-scroll, scroll>>,
118118
<<search-request-rescore, rescore>> or <<search-request-search-after, search after>>.
119+
120+
==== Second level of collapsing
121+
122+
Second level of collapsing is also supported and is applied to `inner_hits`.
123+
For example, the following request finds the top scored tweets for
124+
each country, and within each country finds the top scored tweets
125+
for each user.
126+
127+
[source,js]
128+
--------------------------------------------------
129+
GET /twitter/_search
130+
{
131+
"query": {
132+
"match": {
133+
"message": "elasticsearch"
134+
}
135+
},
136+
"collapse" : {
137+
"field" : "country",
138+
"inner_hits" : {
139+
"name": "by_location",
140+
"collapse" : {"field" : "user"},
141+
"size": 3
142+
}
143+
}
144+
}
145+
--------------------------------------------------
146+
// NOTCONSOLE
147+
148+
149+
Response:
150+
[source,js]
151+
--------------------------------------------------
152+
{
153+
...
154+
"hits": [
155+
{
156+
"_index": "twitter",
157+
"_type": "_doc",
158+
"_id": "9",
159+
"_score": ...,
160+
"_source": {...},
161+
"fields": {"country": ["UK"]},
162+
"inner_hits":{
163+
"by_location": {
164+
"hits": {
165+
...,
166+
"hits": [
167+
{
168+
...
169+
"fields": {"user" : ["user124"]}
170+
},
171+
{
172+
...
173+
"fields": {"user" : ["user589"]}
174+
},
175+
{
176+
...
177+
"fields": {"user" : ["user001"]}
178+
}
179+
]
180+
}
181+
}
182+
}
183+
},
184+
{
185+
"_index": "twitter",
186+
"_type": "_doc",
187+
"_id": "1",
188+
"_score": ..,
189+
"_source": {...},
190+
"fields": {"country": ["Canada"]},
191+
"inner_hits":{
192+
"by_location": {
193+
"hits": {
194+
...,
195+
"hits": [
196+
{
197+
...
198+
"fields": {"user" : ["user444"]}
199+
},
200+
{
201+
...
202+
"fields": {"user" : ["user1111"]}
203+
},
204+
{
205+
...
206+
"fields": {"user" : ["user999"]}
207+
}
208+
]
209+
}
210+
}
211+
}
212+
213+
},
214+
....
215+
]
216+
}
217+
--------------------------------------------------
218+
// NOTCONSOLE
219+
220+
NOTE: Second level of of collapsing doesn't allow `inner_hits`.
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
---
2+
"two levels fields collapsing":
3+
- skip:
4+
version: " - 6.3.99"
5+
reason: using multiple field collapsing from 6.4 on
6+
- do:
7+
indices.create:
8+
index: addresses
9+
body:
10+
settings:
11+
number_of_shards: 1
12+
number_of_replicas: 1
13+
mappings:
14+
_doc:
15+
properties:
16+
country: {"type": "keyword"}
17+
city: {"type": "keyword"}
18+
address: {"type": "text"}
19+
20+
- do:
21+
bulk:
22+
refresh: true
23+
body:
24+
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "1" } }'
25+
- '{"country" : "Canada", "city" : "Saskatoon", "address" : "701 Victoria Avenue" }'
26+
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "2" } }'
27+
- '{"country" : "Canada", "city" : "Toronto", "address" : "74 Victoria Street, Suite, 74 Victoria Street, Suite 300" }'
28+
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "3" } }'
29+
- '{"country" : "Canada", "city" : "Toronto", "address" : "350 Victoria St" }'
30+
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "4" } }'
31+
- '{"country" : "Canada", "city" : "Toronto", "address" : "20 Victoria Street" }'
32+
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "5" } }'
33+
- '{"country" : "UK", "city" : "London", "address" : "58 Victoria Street" }'
34+
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "6" } }'
35+
- '{"country" : "UK", "city" : "London", "address" : "Victoria Street Victoria Palace Theatre" }'
36+
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "7" } }'
37+
- '{"country" : "UK", "city" : "Manchester", "address" : "75 Victoria street Westminster" }'
38+
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "8" } }'
39+
- '{"country" : "UK", "city" : "London", "address" : "Victoria Station Victoria Arcade" }'
40+
41+
42+
# ************* error if internal collapse contains inner_hits
43+
- do:
44+
catch: /parse_exception/
45+
search:
46+
index: addresses
47+
body:
48+
query: { "match" : { "address" : "victoria" }}
49+
collapse:
50+
field: country
51+
inner_hits:
52+
collapse:
53+
field : city
54+
inner_hits: {}
55+
56+
57+
# ************* error if internal collapse contains another collapse
58+
- do:
59+
catch: /parse_exception/
60+
search:
61+
index: addresses
62+
body:
63+
query: { "match" : { "address" : "victoria" }}
64+
collapse:
65+
field: country
66+
inner_hits:
67+
collapse:
68+
field : city
69+
collapse: { field: city }
70+
71+
72+
73+
# ************* top scored
74+
- do:
75+
search:
76+
index: addresses
77+
body:
78+
query: { "match" : { "address" : "victoria" }}
79+
collapse:
80+
field: country
81+
inner_hits:
82+
name: by_location
83+
size: 3
84+
collapse:
85+
field : city
86+
87+
- match: { hits.total: 8 }
88+
- length: { hits.hits: 2 }
89+
- match: { hits.hits.0.fields.country: ["UK"] }
90+
- match: { hits.hits.0.inner_hits.by_location.hits.total: 4 }
91+
# 2 inner hits returned instead of requested 3 as they are collapsed by city
92+
- length: { hits.hits.0.inner_hits.by_location.hits.hits : 2}
93+
- match: { hits.hits.0.inner_hits.by_location.hits.hits.0._id: "8" }
94+
- match: { hits.hits.0.inner_hits.by_location.hits.hits.0.fields.city: ["London"] }
95+
- match: { hits.hits.0.inner_hits.by_location.hits.hits.1._id: "7" }
96+
- match: { hits.hits.0.inner_hits.by_location.hits.hits.1.fields.city: ["Manchester"] }
97+
98+
- match: { hits.hits.1.fields.country: ["Canada"] }
99+
- match: { hits.hits.1.inner_hits.by_location.hits.total: 4 }
100+
# 2 inner hits returned instead of requested 3 as they are collapsed by city
101+
- length: { hits.hits.1.inner_hits.by_location.hits.hits : 2 }
102+
- match: { hits.hits.1.inner_hits.by_location.hits.hits.0._id: "1" }
103+
- match: { hits.hits.1.inner_hits.by_location.hits.hits.0.fields.city: ["Saskatoon"] }
104+
- match: { hits.hits.1.inner_hits.by_location.hits.hits.1._id: "3" }
105+
- match: { hits.hits.1.inner_hits.by_location.hits.hits.1.fields.city: ["Toronto"] }
106+
107+
108+
# ************* sorted
109+
- do:
110+
search:
111+
index: addresses
112+
body:
113+
query: { "match" : { "address" : "victoria" }}
114+
collapse:
115+
field: country
116+
inner_hits:
117+
name: by_location
118+
size: 3
119+
sort: [{ "city": "desc" }]
120+
collapse:
121+
field : city
122+
123+
- match: { hits.total: 8 }
124+
- length: { hits.hits: 2 }
125+
- match: { hits.hits.0.fields.country: ["UK"] }
126+
- match: { hits.hits.0.inner_hits.by_location.hits.total: 4 }
127+
# 2 inner hits returned instead of requested 3 as they are collapsed by city
128+
- length: { hits.hits.0.inner_hits.by_location.hits.hits : 2}
129+
- match: { hits.hits.0.inner_hits.by_location.hits.hits.0._id: "7" }
130+
- match: { hits.hits.0.inner_hits.by_location.hits.hits.0.fields.city: ["Manchester"] }
131+
- match: { hits.hits.0.inner_hits.by_location.hits.hits.1._id: "5" }
132+
- match: { hits.hits.0.inner_hits.by_location.hits.hits.1.fields.city: ["London"] }
133+
134+
- match: { hits.hits.1.fields.country: ["Canada"] }
135+
- match: { hits.hits.1.inner_hits.by_location.hits.total: 4 }
136+
# 2 inner hits returned instead of requested 3 as they are collapsed by city
137+
- length: { hits.hits.1.inner_hits.by_location.hits.hits : 2 }
138+
- match: { hits.hits.1.inner_hits.by_location.hits.hits.0._id: "2" }
139+
- match: { hits.hits.1.inner_hits.by_location.hits.hits.0.fields.city: ["Toronto"] }
140+
- match: { hits.hits.1.inner_hits.by_location.hits.hits.1._id: "1" }
141+
- match: { hits.hits.1.inner_hits.by_location.hits.hits.1.fields.city: ["Saskatoon"] }

server/src/main/java/org/elasticsearch/action/search/ExpandSearchPhase.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,8 @@ public void run() throws IOException {
8787
groupQuery.must(origQuery);
8888
}
8989
for (InnerHitBuilder innerHitBuilder : innerHitBuilders) {
90-
SearchSourceBuilder sourceBuilder = buildExpandSearchSourceBuilder(innerHitBuilder)
90+
CollapseBuilder innerCollapseBuilder = innerHitBuilder.getInnerCollapseBuilder();
91+
SearchSourceBuilder sourceBuilder = buildExpandSearchSourceBuilder(innerHitBuilder, innerCollapseBuilder)
9192
.query(groupQuery)
9293
.postFilter(searchRequest.source().postFilter());
9394
SearchRequest groupRequest = buildExpandSearchRequest(searchRequest, sourceBuilder);
@@ -137,7 +138,7 @@ private SearchRequest buildExpandSearchRequest(SearchRequest orig, SearchSourceB
137138
return groupRequest;
138139
}
139140

140-
private SearchSourceBuilder buildExpandSearchSourceBuilder(InnerHitBuilder options) {
141+
private SearchSourceBuilder buildExpandSearchSourceBuilder(InnerHitBuilder options, CollapseBuilder innerCollapseBuilder) {
141142
SearchSourceBuilder groupSource = new SearchSourceBuilder();
142143
groupSource.from(options.getFrom());
143144
groupSource.size(options.getSize());
@@ -169,6 +170,9 @@ private SearchSourceBuilder buildExpandSearchSourceBuilder(InnerHitBuilder optio
169170
groupSource.explain(options.isExplain());
170171
groupSource.trackScores(options.isTrackScores());
171172
groupSource.version(options.isVersion());
173+
if (innerCollapseBuilder != null) {
174+
groupSource.collapse(innerCollapseBuilder);
175+
}
172176
return groupSource;
173177
}
174178
}

server/src/main/java/org/elasticsearch/index/query/InnerHitBuilder.java

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
3838
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
3939
import org.elasticsearch.search.sort.SortBuilder;
40+
import org.elasticsearch.search.collapse.CollapseBuilder;
4041

4142
import java.io.IOException;
4243
import java.util.ArrayList;
@@ -55,6 +56,8 @@ public final class InnerHitBuilder implements Writeable, ToXContentObject {
5556
public static final ParseField NAME_FIELD = new ParseField("name");
5657
public static final ParseField IGNORE_UNMAPPED = new ParseField("ignore_unmapped");
5758
public static final QueryBuilder DEFAULT_INNER_HIT_QUERY = new MatchAllQueryBuilder();
59+
public static final ParseField COLLAPSE_FIELD = new ParseField("collapse");
60+
public static final ParseField FIELD_FIELD = new ParseField("field");
5861

5962
private static final ObjectParser<InnerHitBuilder, Void> PARSER = new ObjectParser<>("inner_hits", InnerHitBuilder::new);
6063

@@ -91,6 +94,28 @@ public final class InnerHitBuilder implements Writeable, ToXContentObject {
9194
}, SearchSourceBuilder._SOURCE_FIELD, ObjectParser.ValueType.OBJECT_ARRAY_BOOLEAN_OR_STRING);
9295
PARSER.declareObject(InnerHitBuilder::setHighlightBuilder, (p, c) -> HighlightBuilder.fromXContent(p),
9396
SearchSourceBuilder.HIGHLIGHT_FIELD);
97+
PARSER.declareField((parser, builder, context) -> {
98+
Boolean isParsedCorrectly = false;
99+
String field;
100+
if (parser.currentToken() == XContentParser.Token.START_OBJECT) {
101+
if (parser.nextToken() == XContentParser.Token.FIELD_NAME) {
102+
if (FIELD_FIELD.match(parser.currentName(), parser.getDeprecationHandler())) {
103+
if (parser.nextToken() == XContentParser.Token.VALUE_STRING){
104+
field = parser.text();
105+
if (parser.nextToken() == XContentParser.Token.END_OBJECT){
106+
isParsedCorrectly = true;
107+
CollapseBuilder cb = new CollapseBuilder(field);
108+
builder.setInnerCollapse(cb);
109+
}
110+
}
111+
}
112+
}
113+
}
114+
if (isParsedCorrectly == false) {
115+
throw new ParsingException(parser.getTokenLocation(), "Invalid token in the inner collapse");
116+
}
117+
118+
}, COLLAPSE_FIELD, ObjectParser.ValueType.OBJECT);
94119
}
95120

96121
private String name;
@@ -109,6 +134,7 @@ public final class InnerHitBuilder implements Writeable, ToXContentObject {
109134
private Set<ScriptField> scriptFields;
110135
private HighlightBuilder highlightBuilder;
111136
private FetchSourceContext fetchSourceContext;
137+
private CollapseBuilder innerCollapseBuilder = null;
112138

113139
public InnerHitBuilder() {
114140
this.name = null;
@@ -173,6 +199,9 @@ public InnerHitBuilder(StreamInput in) throws IOException {
173199
boolean hasChildren = in.readBoolean();
174200
assert hasChildren == false;
175201
}
202+
if (in.getVersion().onOrAfter(Version.V_6_4_0)) {
203+
this.innerCollapseBuilder = in.readOptionalWriteable(CollapseBuilder::new);
204+
}
176205
}
177206

178207
@Override
@@ -218,6 +247,9 @@ public void writeTo(StreamOutput out) throws IOException {
218247
}
219248
}
220249
out.writeOptionalWriteable(highlightBuilder);
250+
if (out.getVersion().onOrAfter(Version.V_6_4_0)) {
251+
out.writeOptionalWriteable(innerCollapseBuilder);
252+
}
221253
}
222254

223255
/**
@@ -501,6 +533,15 @@ QueryBuilder getQuery() {
501533
return query;
502534
}
503535

536+
public InnerHitBuilder setInnerCollapse(CollapseBuilder innerCollapseBuilder) {
537+
this.innerCollapseBuilder = innerCollapseBuilder;
538+
return this;
539+
}
540+
541+
public CollapseBuilder getInnerCollapseBuilder() {
542+
return innerCollapseBuilder;
543+
}
544+
504545
@Override
505546
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
506547
builder.startObject();
@@ -550,6 +591,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
550591
if (highlightBuilder != null) {
551592
builder.field(SearchSourceBuilder.HIGHLIGHT_FIELD.getPreferredName(), highlightBuilder, params);
552593
}
594+
if (innerCollapseBuilder != null) {
595+
builder.field(COLLAPSE_FIELD.getPreferredName(), innerCollapseBuilder);
596+
}
553597
builder.endObject();
554598
return builder;
555599
}
@@ -572,13 +616,14 @@ public boolean equals(Object o) {
572616
Objects.equals(scriptFields, that.scriptFields) &&
573617
Objects.equals(fetchSourceContext, that.fetchSourceContext) &&
574618
Objects.equals(sorts, that.sorts) &&
575-
Objects.equals(highlightBuilder, that.highlightBuilder);
619+
Objects.equals(highlightBuilder, that.highlightBuilder) &&
620+
Objects.equals(innerCollapseBuilder, that.innerCollapseBuilder);
576621
}
577622

578623
@Override
579624
public int hashCode() {
580625
return Objects.hash(name, ignoreUnmapped, from, size, explain, version, trackScores,
581-
storedFieldsContext, docValueFields, scriptFields, fetchSourceContext, sorts, highlightBuilder);
626+
storedFieldsContext, docValueFields, scriptFields, fetchSourceContext, sorts, highlightBuilder, innerCollapseBuilder);
582627
}
583628

584629
public static InnerHitBuilder fromXContent(XContentParser parser) throws IOException {

0 commit comments

Comments
 (0)