Skip to content

Commit e84a79a

Browse files
authored
Enable validating user-supplied missing values on unmapped fields (#43718)
Provides a hook for aggregations to introspect the `ValuesSourceType` for a user supplied Missing value on an unmapped field, when the type would otherwise be `ANY`. Mapped field behavior is unchanged, and still applies the `ValuesSourceType` of the field. This PR just provides the hook for doing this, no existing aggregations have their behavior changed.
1 parent 0f894b6 commit e84a79a

File tree

8 files changed

+319
-17
lines changed

8 files changed

+319
-17
lines changed

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator pare
189189
}
190190

191191
throw new AggregationExecutionException("terms aggregation cannot be applied to field [" + config.fieldContext().field()
192-
+ "]. It can only be applied to numeric or string fields.");
192+
+ "]. It can only be applied to numeric or string fields.");
193193
}
194194

195195
// return the SubAggCollectionMode that this aggregation should use based on the expected size

server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceAggregatorFactory.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,28 @@ public ValuesSourceAggregatorFactory(String name, ValuesSourceConfig<VS> config,
4343
@Override
4444
public Aggregator createInternal(Aggregator parent, boolean collectsFromSingleBucket,
4545
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
46-
VS vs = config.toValuesSource(context.getQueryShardContext());
46+
VS vs = config.toValuesSource(context.getQueryShardContext(), this::resolveMissingAny);
4747
if (vs == null) {
4848
return createUnmapped(parent, pipelineAggregators, metaData);
4949
}
5050
return doCreateInternal(vs, parent, collectsFromSingleBucket, pipelineAggregators, metaData);
5151
}
5252

53+
/**
54+
* This method provides a hook for aggregations that need finer grained control over the ValuesSource selected when the user supplies a
55+
* missing value and there is no mapped field to infer the type from. This will only be called for aggregations that specify the
56+
* ValuesSourceType.ANY in their constructors (On the builder class). The user supplied object is passed as a parameter, so its type
57+
* may be inspected as needed.
58+
*
59+
* Generally, only the type of the returned ValuesSource is used, so returning the EMPTY instance of the chosen type is recommended.
60+
*
61+
* @param missing The user supplied missing value
62+
* @return A ValuesSource instance compatible with the supplied parameter
63+
*/
64+
protected ValuesSource resolveMissingAny(Object missing) {
65+
return ValuesSource.Bytes.WithOrdinals.EMPTY;
66+
}
67+
5368
protected abstract Aggregator createUnmapped(Aggregator parent,
5469
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException;
5570

server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636

3737
import java.time.ZoneId;
3838
import java.time.ZoneOffset;
39+
import java.util.function.Function;
3940

4041
/**
4142
* A configuration that tells aggregations how to retrieve data from the index
@@ -223,10 +224,15 @@ public DocValueFormat format() {
223224
return format;
224225
}
225226

227+
@Nullable
228+
public VS toValuesSource(QueryShardContext context) {
229+
return toValuesSource(context, value -> ValuesSource.Bytes.WithOrdinals.EMPTY);
230+
}
231+
226232
/** Get a value source given its configuration. A return value of null indicates that
227233
* no value source could be built. */
228234
@Nullable
229-
public VS toValuesSource(QueryShardContext context) {
235+
public VS toValuesSource(QueryShardContext context, Function<Object, ValuesSource> resolveMissingAny) {
230236
if (!valid()) {
231237
throw new IllegalStateException(
232238
"value source config is invalid; must have either a field context or a script or marked as unwrapped");
@@ -241,8 +247,10 @@ public VS toValuesSource(QueryShardContext context) {
241247
vs = (VS) ValuesSource.Numeric.EMPTY;
242248
} else if (valueSourceType() == ValuesSourceType.GEOPOINT) {
243249
vs = (VS) ValuesSource.GeoPoint.EMPTY;
244-
} else if (valueSourceType() == ValuesSourceType.ANY || valueSourceType() == ValuesSourceType.BYTES) {
250+
} else if (valueSourceType() == ValuesSourceType.BYTES) {
245251
vs = (VS) ValuesSource.Bytes.WithOrdinals.EMPTY;
252+
} else if (valueSourceType() == ValuesSourceType.ANY) {
253+
vs = (VS) resolveMissingAny.apply(missing());
246254
} else {
247255
throw new IllegalArgumentException("Can't deal with unmapped ValuesSource type " + valueSourceType());
248256
}

server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregatorTests.java

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,22 @@
2121

2222
import org.apache.lucene.document.Document;
2323
import org.apache.lucene.document.SortedNumericDocValuesField;
24+
import org.apache.lucene.document.SortedSetDocValuesField;
2425
import org.apache.lucene.index.IndexReader;
2526
import org.apache.lucene.index.RandomIndexWriter;
2627
import org.apache.lucene.search.IndexSearcher;
2728
import org.apache.lucene.search.MatchAllDocsQuery;
2829
import org.apache.lucene.store.Directory;
30+
import org.apache.lucene.util.BytesRef;
2931
import org.apache.lucene.util.NumericUtils;
32+
import org.elasticsearch.index.mapper.KeywordFieldMapper;
3033
import org.elasticsearch.index.mapper.MappedFieldType;
3134
import org.elasticsearch.index.mapper.NumberFieldMapper;
3235
import org.elasticsearch.search.aggregations.AggregatorTestCase;
3336
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
3437

38+
import static org.hamcrest.Matchers.containsString;
39+
3540
public class HistogramAggregatorTests extends AggregatorTestCase {
3641

3742
public void testLongs() throws Exception {
@@ -188,6 +193,83 @@ public void testMissing() throws Exception {
188193
}
189194
}
190195

196+
public void testMissingUnmappedField() throws Exception {
197+
try (Directory dir = newDirectory();
198+
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
199+
for (int i = 0; i < 7; i ++) {
200+
Document doc = new Document();
201+
w.addDocument(doc);
202+
}
203+
204+
HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg")
205+
.field("field")
206+
.interval(5)
207+
.missing(2d);
208+
MappedFieldType type = null;
209+
try (IndexReader reader = w.getReader()) {
210+
IndexSearcher searcher = new IndexSearcher(reader);
211+
InternalHistogram histogram = search(searcher, new MatchAllDocsQuery(), aggBuilder, type);
212+
213+
assertEquals(1, histogram.getBuckets().size());
214+
215+
assertEquals(0d, histogram.getBuckets().get(0).getKey());
216+
assertEquals(7, histogram.getBuckets().get(0).getDocCount());
217+
218+
assertTrue(AggregationInspectionHelper.hasValue(histogram));
219+
}
220+
}
221+
}
222+
223+
public void testMissingUnmappedFieldBadType() throws Exception {
224+
try (Directory dir = newDirectory();
225+
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
226+
for (int i = 0; i < 7; i ++) {
227+
w.addDocument(new Document());
228+
}
229+
230+
String missingValue = "🍌🍌🍌";
231+
HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg")
232+
.field("field")
233+
.interval(5)
234+
.missing(missingValue);
235+
MappedFieldType type = null;
236+
try (IndexReader reader = w.getReader()) {
237+
IndexSearcher searcher = new IndexSearcher(reader);
238+
Throwable t = expectThrows(IllegalArgumentException.class, () -> {
239+
search(searcher, new MatchAllDocsQuery(), aggBuilder, type);
240+
});
241+
// This throws a number format exception (which is a subclass of IllegalArgumentException) and might be ok?
242+
assertThat(t.getMessage(), containsString(missingValue));
243+
}
244+
}
245+
}
246+
247+
public void testIncorrectFieldType() throws Exception {
248+
try (Directory dir = newDirectory();
249+
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
250+
for (String value : new String[] {"foo", "bar", "baz", "quux"}) {
251+
Document doc = new Document();
252+
doc.add(new SortedSetDocValuesField("field", new BytesRef(value)));
253+
w.addDocument(doc);
254+
}
255+
256+
HistogramAggregationBuilder aggBuilder = new HistogramAggregationBuilder("my_agg")
257+
.field("field")
258+
.interval(5);
259+
MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType();
260+
fieldType.setName("field");
261+
fieldType.setHasDocValues(true);
262+
try (IndexReader reader = w.getReader()) {
263+
IndexSearcher searcher = new IndexSearcher(reader);
264+
265+
expectThrows(IllegalArgumentException.class, () -> {
266+
search(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType);
267+
});
268+
}
269+
}
270+
271+
}
272+
191273
public void testOffset() throws Exception {
192274
try (Directory dir = newDirectory();
193275
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {

server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/IpRangeAggregatorTests.java

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,4 +152,45 @@ public void testRanges() throws Exception {
152152
}
153153
}
154154
}
155+
156+
public void testMissingUnmapped() throws Exception {
157+
try (Directory dir = newDirectory();
158+
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
159+
for (int i = 0; i < 7; i++) {
160+
Document doc = new Document();
161+
w.addDocument(doc);
162+
}
163+
164+
IpRangeAggregationBuilder builder = new IpRangeAggregationBuilder("test_agg")
165+
.field("field")
166+
.addRange(new IpRangeAggregationBuilder.Range("foo", "192.168.100.0", "192.168.100.255"))
167+
.missing("192.168.100.42"); // Apparently we expect a string here
168+
try (IndexReader reader = w.getReader()) {
169+
IndexSearcher searcher = new IndexSearcher(reader);
170+
InternalBinaryRange range = search(searcher, new MatchAllDocsQuery(), builder, (MappedFieldType) null);
171+
assertEquals(1, range.getBuckets().size());
172+
}
173+
}
174+
}
175+
176+
public void testMissingUnmappedBadType() throws Exception {
177+
try (Directory dir = newDirectory();
178+
RandomIndexWriter w = new RandomIndexWriter(random(), dir)) {
179+
for (int i = 0; i < 7; i++) {
180+
Document doc = new Document();
181+
w.addDocument(doc);
182+
}
183+
184+
IpRangeAggregationBuilder builder = new IpRangeAggregationBuilder("test_agg")
185+
.field("field")
186+
.addRange(new IpRangeAggregationBuilder.Range("foo", "192.168.100.0", "192.168.100.255"))
187+
.missing(1234);
188+
try (IndexReader reader = w.getReader()) {
189+
IndexSearcher searcher = new IndexSearcher(reader);
190+
expectThrows(IllegalArgumentException.class, () -> {
191+
search(searcher, new MatchAllDocsQuery(), builder, (MappedFieldType) null);
192+
});
193+
}
194+
}
195+
}
155196
}

server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.apache.lucene.document.Document;
2222
import org.apache.lucene.document.Field;
2323
import org.apache.lucene.document.InetAddressPoint;
24+
import org.apache.lucene.document.LatLonDocValuesField;
2425
import org.apache.lucene.document.NumericDocValuesField;
2526
import org.apache.lucene.document.SortedDocValuesField;
2627
import org.apache.lucene.document.SortedNumericDocValuesField;
@@ -37,10 +38,12 @@
3738
import org.apache.lucene.store.Directory;
3839
import org.apache.lucene.util.BytesRef;
3940
import org.apache.lucene.util.NumericUtils;
41+
import org.elasticsearch.common.geo.GeoPoint;
4042
import org.elasticsearch.common.network.InetAddresses;
4143
import org.elasticsearch.common.settings.Settings;
4244
import org.elasticsearch.common.util.MockBigArrays;
4345
import org.elasticsearch.common.util.MockPageCacheRecycler;
46+
import org.elasticsearch.index.mapper.GeoPointFieldMapper;
4447
import org.elasticsearch.index.mapper.IdFieldMapper;
4548
import org.elasticsearch.index.mapper.IpFieldMapper;
4649
import org.elasticsearch.index.mapper.KeywordFieldMapper;
@@ -77,6 +80,7 @@
7780
import org.elasticsearch.search.aggregations.support.ValueType;
7881
import org.elasticsearch.search.sort.FieldSortBuilder;
7982
import org.elasticsearch.search.sort.ScoreSortBuilder;
83+
import org.elasticsearch.test.geo.RandomGeoGenerator;
8084

8185
import java.io.IOException;
8286
import java.net.InetAddress;
@@ -884,6 +888,60 @@ public void testUnmappedWithMissing() throws Exception {
884888
}
885889
}
886890

891+
public void testGeoPointField() throws Exception {
892+
try (Directory directory = newDirectory()) {
893+
GeoPoint point = RandomGeoGenerator.randomPoint(random());
894+
final String field = "field";
895+
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
896+
Document document = new Document();
897+
document.add(new LatLonDocValuesField(field, point.getLat(), point.getLon()));
898+
indexWriter.addDocument(document);
899+
try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) {
900+
MappedFieldType fieldType = new GeoPointFieldMapper.GeoPointFieldType();
901+
fieldType.setHasDocValues(true);
902+
fieldType.setName("field");
903+
904+
IndexSearcher indexSearcher = newIndexSearcher(indexReader);
905+
TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name", null) .field(field);
906+
// Note - other places we throw IllegalArgumentException
907+
expectThrows(AggregationExecutionException.class, () -> {
908+
createAggregator(aggregationBuilder, indexSearcher, fieldType);
909+
});
910+
}
911+
}
912+
}
913+
}
914+
915+
public void testIpField() throws Exception {
916+
try (Directory directory = newDirectory()) {
917+
final String field = "field";
918+
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
919+
Document document = new Document();
920+
document.add(new SortedSetDocValuesField("field",
921+
new BytesRef(InetAddressPoint.encode(InetAddresses.forString("192.168.100.42")))));
922+
indexWriter.addDocument(document);
923+
try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) {
924+
MappedFieldType fieldType = new IpFieldMapper.IpFieldType();
925+
fieldType.setHasDocValues(true);
926+
fieldType.setName("field");
927+
928+
IndexSearcher indexSearcher = newIndexSearcher(indexReader);
929+
TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name", null) .field(field);
930+
// Note - other places we throw IllegalArgumentException
931+
Aggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
932+
aggregator.preCollection();
933+
indexSearcher.search(new MatchAllDocsQuery(), aggregator);
934+
aggregator.postCollection();
935+
Terms result = (Terms) aggregator.buildAggregation(0L);
936+
assertEquals("_name", result.getName());
937+
assertEquals(1, result.getBuckets().size());
938+
assertEquals("192.168.100.42", result.getBuckets().get(0).getKey());
939+
assertEquals(1, result.getBuckets().get(0).getDocCount());
940+
}
941+
}
942+
}
943+
}
944+
887945
public void testNestedTermsAgg() throws Exception {
888946
try (Directory directory = newDirectory()) {
889947
try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {

0 commit comments

Comments
 (0)