Skip to content

Commit be7d304

Browse files
committed
Fix format problem in composite of unmapped
When a composite aggregation is reduced using the results from an index that has one of the fields unmapped we were throwing away the formatter. This is mildly annoying, except in the case of IP addresses which were coming out as non-utf-8-characters. And tripping assertions. This carefully preserves the formatter from the working bucket. Closes #50600
1 parent dfccbf0 commit be7d304

File tree

4 files changed

+117
-5
lines changed

4 files changed

+117
-5
lines changed

rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/230_composite.yml

+44
Original file line numberDiff line numberDiff line change
@@ -694,6 +694,7 @@ setup:
694694
- match: { aggregations.test.buckets.3.key.geo: "12/2048/0" }
695695
- match: { aggregations.test.buckets.3.key.kw: "bar" }
696696
- match: { aggregations.test.buckets.3.doc_count: 1 }
697+
697698
---
698699
"Simple Composite aggregation with geotile grid add aggregate after":
699700
- skip:
@@ -735,3 +736,46 @@ setup:
735736
- match: { aggregations.test.buckets.2.key.geo: "12/2048/0" }
736737
- match: { aggregations.test.buckets.2.key.kw: "bar" }
737738
- match: { aggregations.test.buckets.2.doc_count: 1 }
739+
740+
---
741+
"Mixed ip and unmapped fields":
742+
# It is important that the index *without* the ip field be sorted *before*
743+
# the index *with* the ip field because that has caused bugs in the past.
744+
- do:
745+
indices.create:
746+
index: test_1
747+
- do:
748+
indices.create:
749+
index: test_2
750+
body:
751+
mappings:
752+
properties:
753+
f:
754+
type: ip
755+
- do:
756+
index:
757+
index: test_2
758+
id: 1
759+
body: { "f": "192.168.0.1" }
760+
refresh: true
761+
762+
- do:
763+
search:
764+
index: test_*
765+
body:
766+
aggregations:
767+
test:
768+
composite:
769+
sources: [
770+
"f": {
771+
"terms": {
772+
"field": "f"
773+
}
774+
}
775+
]
776+
777+
- match: { hits.total.value: 1 }
778+
- match: { hits.total.relation: eq }
779+
- length: { aggregations.test.buckets: 1 }
780+
- match: { aggregations.test.buckets.0.key.f: "192.168.0.1" }
781+
- match: { aggregations.test.buckets.0.doc_count: 1 }

server/src/main/java/org/elasticsearch/search/DocValueFormat.java

+10
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,11 @@ public double parseDouble(String value, boolean roundUp, LongSupplier now) {
139139
public BytesRef parseBytesRef(String value) {
140140
return new BytesRef(value);
141141
}
142+
143+
@Override
144+
public String toString() {
145+
return "raw";
146+
}
142147
};
143148

144149
DocValueFormat BINARY = new DocValueFormat() {
@@ -335,6 +340,11 @@ public String format(BytesRef value) {
335340
public BytesRef parseBytesRef(String value) {
336341
return new BytesRef(InetAddressPoint.encode(InetAddresses.forString(value)));
337342
}
343+
344+
@Override
345+
public String toString() {
346+
return "ip";
347+
}
338348
};
339349

340350
final class Decimal implements DocValueFormat {

server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/InternalComposite.java

+31-3
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,13 @@ public List<InternalBucket> getBuckets() {
134134
return buckets;
135135
}
136136

137+
/**
138+
* The formats used when writing the keys. Package private for testing.
139+
*/
140+
List<DocValueFormat> getFormats() {
141+
return formats;
142+
}
143+
137144
@Override
138145
public Map<String, Object> afterKey() {
139146
if (afterKey != null) {
@@ -189,8 +196,17 @@ public InternalAggregation reduce(List<InternalAggregation> aggregations, Reduce
189196
reduceContext.consumeBucketsAndMaybeBreak(1);
190197
result.add(reduceBucket);
191198
}
192-
final CompositeKey lastKey = result.size() > 0 ? result.get(result.size()-1).getRawKey() : null;
193-
return new InternalComposite(name, size, sourceNames, formats, result, lastKey, reverseMuls,
199+
200+
List<DocValueFormat> reducedFormats = formats;
201+
CompositeKey lastKey = null;
202+
if (result.size() > 0) {
203+
lastBucket = result.get(result.size() - 1);
204+
/* Attach the formats from the last bucket to the reduced composite
205+
* so that we can properly format the after key. */
206+
reducedFormats = lastBucket.formats;
207+
lastKey = lastBucket.getRawKey();
208+
}
209+
return new InternalComposite(name, size, sourceNames, reducedFormats, result, lastKey, reverseMuls,
194210
earlyTerminated, pipelineAggregators(), metaData);
195211
}
196212

@@ -204,7 +220,12 @@ protected InternalBucket reduceBucket(List<InternalBucket> buckets, ReduceContex
204220
aggregations.add(bucket.aggregations);
205221
}
206222
InternalAggregations aggs = InternalAggregations.reduce(aggregations, context);
207-
return new InternalBucket(sourceNames, formats, buckets.get(0).key, reverseMuls, docCount, aggs);
223+
/* Use the formats from the bucket because they'll be right to format
224+
* the key. The formats on the InternalComposite doing the reducing are
225+
* just whatever formats make sense for *its* index. This can be real
226+
* trouble when the index doing the reducing is unmapped. */
227+
var reducedFormats = buckets.get(0).formats;
228+
return new InternalBucket(sourceNames, reducedFormats, buckets.get(0).key, reverseMuls, docCount, aggs);
208229
}
209230

210231
@Override
@@ -334,6 +355,13 @@ public Aggregations getAggregations() {
334355
return aggregations;
335356
}
336357

358+
/**
359+
* The formats used when writing the keys. Package private for testing.
360+
*/
361+
List<DocValueFormat> getFormats() {
362+
return formats;
363+
}
364+
337365
@Override
338366
public int compareKey(InternalBucket other) {
339367
for (int i = 0; i < key.size(); i++) {

server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/InternalCompositeTests.java

+32-2
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@
4747
import java.util.stream.IntStream;
4848

4949
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomAsciiLettersOfLengthBetween;
50+
import static java.util.Collections.emptyList;
51+
import static java.util.Collections.emptyMap;
52+
import static java.util.stream.Collectors.toList;
5053
import static org.hamcrest.Matchers.containsString;
5154
import static org.hamcrest.Matchers.equalTo;
5255
import static org.hamcrest.Matchers.greaterThan;
@@ -238,8 +241,7 @@ public void testReduceSame() throws IOException {
238241
for (int i = 0; i < numSame; i++) {
239242
toReduce.add(result);
240243
}
241-
InternalComposite finalReduce = (InternalComposite) result.reduce(toReduce,
242-
new InternalAggregation.ReduceContext(BigArrays.NON_RECYCLING_INSTANCE, null, true));
244+
InternalComposite finalReduce = (InternalComposite) result.reduce(toReduce, reduceContext());
243245
assertThat(finalReduce.getBuckets().size(), equalTo(result.getBuckets().size()));
244246
Iterator<InternalComposite.InternalBucket> expectedIt = result.getBuckets().iterator();
245247
for (InternalComposite.InternalBucket bucket : finalReduce.getBuckets()) {
@@ -249,6 +251,30 @@ public void testReduceSame() throws IOException {
249251
}
250252
}
251253

254+
/**
255+
* Check that reducing with an unmapped index produces useful formats.
256+
*/
257+
public void testReduceUnmapped() throws IOException {
258+
var mapped = createTestInstance(randomAlphaOfLength(10), emptyList(), emptyMap(), InternalAggregations.EMPTY);
259+
var rawFormats = formats.stream().map(f -> DocValueFormat.RAW).collect(toList());
260+
var unmapped = new InternalComposite(mapped.getName(), mapped.getSize(), sourceNames,
261+
rawFormats, emptyList(), null, reverseMuls, true, emptyList(), emptyMap());
262+
List<InternalAggregation> toReduce = Arrays.asList(unmapped, mapped);
263+
Collections.shuffle(toReduce, random());
264+
InternalComposite finalReduce = (InternalComposite) unmapped.reduce(toReduce, reduceContext());
265+
assertThat(finalReduce.getBuckets().size(), equalTo(mapped.getBuckets().size()));
266+
if (false == mapped.getBuckets().isEmpty()) {
267+
assertThat(finalReduce.getFormats(), equalTo(mapped.getFormats()));
268+
}
269+
var expectedIt = mapped.getBuckets().iterator();
270+
for (var bucket : finalReduce.getBuckets()) {
271+
InternalComposite.InternalBucket expectedBucket = expectedIt.next();
272+
assertThat(bucket.getRawKey(), equalTo(expectedBucket.getRawKey()));
273+
assertThat(bucket.getDocCount(), equalTo(expectedBucket.getDocCount()));
274+
assertThat(bucket.getFormats(), equalTo(expectedBucket.getFormats()));
275+
}
276+
}
277+
252278
public void testCompareCompositeKeyBiggerFieldName() {
253279
InternalComposite.ArrayMap key1 = createMap(
254280
Arrays.asList("field1", "field2"),
@@ -381,4 +407,8 @@ private InternalComposite.ArrayMap createMap(List<String> fields, Comparable[] v
381407
values
382408
);
383409
}
410+
411+
private InternalAggregation.ReduceContext reduceContext() {
412+
return new InternalAggregation.ReduceContext(BigArrays.NON_RECYCLING_INSTANCE, null, true);
413+
}
384414
}

0 commit comments

Comments
 (0)