Skip to content

Commit d7647c8

Browse files
authored
SQL: Improve null handling in Geo Functions (#40708)
ST_Distance function returns null now instead of throwing an error when one of the arguments in null. It also brings the handling of arrays when a single item is expected in line with the rest of data types and fixes the handling of geo_points when docvalues are not available. Relates to #29872
1 parent ee64cc9 commit d7647c8

File tree

7 files changed

+226
-70
lines changed

7 files changed

+226
-70
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,33 @@
11
{"index":{"_id": "1"}}
2-
{"region": "Americas", "city": "Mountain View", "location": {"lat":"37.386483", "lon":"-122.083843"}, "shape": "POINT (-122.083843 37.386483)", "region_point": "POINT(-105.2551 54.5260)"}
2+
{"region": "Americas", "city": "Mountain View", "location": {"lat":"37.386483", "lon":"-122.083843"}, "location_no_dv": {"lat":"37.386483", "lon":"-122.083843"}, "shape": "POINT (-122.083843 37.386483)", "region_point": "POINT(-105.2551 54.5260)"}
33
{"index":{"_id": "2"}}
4-
{"region": "Americas", "city": "Chicago", "location": [-87.637874, 41.888783], "shape": {"type" : "point", "coordinates" : [-87.637874, 41.888783]}, "region_point": "POINT(-105.2551 54.5260)"}
4+
{"region": "Americas", "city": "Chicago", "location": [-87.637874, 41.888783], "location_no_dv": [-87.637874, 41.888783], "shape": {"type" : "point", "coordinates" : [-87.637874, 41.888783]}, "region_point": "POINT(-105.2551 54.5260)"}
55
{"index":{"_id": "3"}}
6-
{"region": "Americas", "city": "New York", "location": "40.745171,-73.990027", "shape": "POINT (-73.990027 40.745171)", "region_point": "POINT(-105.2551 54.5260)"}
6+
{"region": "Americas", "city": "New York", "location": "40.745171,-73.990027", "location_no_dv": "40.745171,-73.990027", "shape": "POINT (-73.990027 40.745171)", "region_point": "POINT(-105.2551 54.5260)"}
77
{"index":{"_id": "4"}}
8-
{"region": "Americas", "city": "San Francisco", "location": "37.789541,-122.394228", "shape": "POINT (-122.394228 37.789541)", "region_point": "POINT(-105.2551 54.5260)"}
8+
{"region": "Americas", "city": "San Francisco", "location": "37.789541,-122.394228", "location_no_dv": "37.789541,-122.394228", "shape": "POINT (-122.394228 37.789541)", "region_point": "POINT(-105.2551 54.5260)"}
99
{"index":{"_id": "5"}}
10-
{"region": "Americas", "city": "Phoenix", "location": "33.376242,-111.973505", "shape": "POINT (-111.973505 33.376242)", "region_point": "POINT(-105.2551 54.5260)"}
10+
{"region": "Americas", "city": "Phoenix", "location": "33.376242,-111.973505", "location_no_dv": "33.376242,-111.973505", "shape": "POINT (-111.973505 33.376242)", "region_point": "POINT(-105.2551 54.5260)"}
1111
{"index":{"_id": "6"}}
12-
{"region": "Europe", "city": "Amsterdam", "location": "52.347557,4.850312", "shape": "POINT (4.850312 52.347557)", "region_point": "POINT(15.2551 54.5260)"}
12+
{"region": "Europe", "city": "Amsterdam", "location": "52.347557,4.850312", "location_no_dv": "52.347557,4.850312", "shape": "POINT (4.850312 52.347557)", "region_point": "POINT(15.2551 54.5260)"}
1313
{"index":{"_id": "7"}}
14-
{"region": "Europe", "city": "Berlin", "location": "52.486701,13.390889", "shape": "POINT (13.390889 52.486701)", "region_point": "POINT(15.2551 54.5260)"}
14+
{"region": "Europe", "city": "Berlin", "location": "52.486701,13.390889", "location_no_dv": "52.486701,13.390889", "shape": "POINT (13.390889 52.486701)", "region_point": "POINT(15.2551 54.5260)"}
1515
{"index":{"_id": "8"}}
16-
{"region": "Europe", "city": "Munich", "location": "48.146321,11.537505", "shape": "POINT (11.537505 48.146321)", "region_point": "POINT(15.2551 54.5260)"}
16+
{"region": "Europe", "city": "Munich", "location": "48.146321,11.537505", "location_no_dv": "48.146321,11.537505", "shape": "POINT (11.537505 48.146321)", "region_point": "POINT(15.2551 54.5260)"}
1717
{"index":{"_id": "9"}}
18-
{"region": "Europe", "city": "London", "location": "51.510871,-0.121672", "shape": "POINT (-0.121672 51.510871)", "region_point": "POINT(15.2551 54.5260)"}
18+
{"region": "Europe", "city": "London", "location": "51.510871,-0.121672", "location_no_dv": "51.510871,-0.121672", "shape": "POINT (-0.121672 51.510871)", "region_point": "POINT(15.2551 54.5260)"}
1919
{"index":{"_id": "10"}}
20-
{"region": "Europe", "city": "Paris", "location": "48.845538,2.351773", "shape": "POINT (2.351773 48.845538)", "region_point": "POINT(15.2551 54.5260)"}
20+
{"region": "Europe", "city": "Paris", "location": "48.845538,2.351773", "location_no_dv": "48.845538,2.351773", "shape": "POINT (2.351773 48.845538)", "region_point": "POINT(15.2551 54.5260)"}
2121
{"index":{"_id": "11"}}
22-
{"region": "Asia", "city": "Singapore", "location": "1.295868,103.855535", "shape": "POINT (103.855535 1.295868)", "region_point": "POINT(100.6197 34.0479)"}
22+
{"region": "Asia", "city": "Singapore", "location": "1.295868,103.855535", "location_no_dv": "1.295868,103.855535", "shape": "POINT (103.855535 1.295868)", "region_point": "POINT(100.6197 34.0479)"}
2323
{"index":{"_id": "12"}}
24-
{"region": "Asia", "city": "Hong Kong", "location": "22.281397,114.183925", "shape": "POINT (114.183925 22.281397)", "region_point": "POINT(100.6197 34.0479)"}
24+
{"region": "Asia", "city": "Hong Kong", "location": "22.281397,114.183925", "location_no_dv": "22.281397,114.183925", "shape": "POINT (114.183925 22.281397)", "region_point": "POINT(100.6197 34.0479)"}
2525
{"index":{"_id": "13"}}
26-
{"region": "Asia", "city": "Seoul", "location": "37.509132,127.060851", "shape": "POINT (127.060851 37.509132)", "region_point": "POINT(100.6197 34.0479)"}
26+
{"region": "Asia", "city": "Seoul", "location": "37.509132,127.060851", "location_no_dv": "37.509132,127.060851", "shape": "POINT (127.060851 37.509132)", "region_point": "POINT(100.6197 34.0479)"}
2727
{"index":{"_id": "14"}}
28-
{"region": "Asia", "city": "Tokyo", "location": "35.669616,139.76402225", "shape": "POINT (139.76402225 35.669616)", "region_point": "POINT(100.6197 34.0479)"}
28+
{"region": "Asia", "city": "Tokyo", "location": "35.669616,139.76402225", "location_no_dv": "35.669616,139.76402225", "shape": "POINT (139.76402225 35.669616)", "region_point": "POINT(100.6197 34.0479)"}
2929
{"index":{"_id": "15"}}
30-
{"region": "Asia", "city": "Sydney", "location": "-33.863385,151.208629", "shape": "POINT (151.208629 -33.863385)", "region_point": "POINT(100.6197 34.0479)"}
30+
{"region": "Asia", "city": "Sydney", "location": "-33.863385,151.208629", "location_no_dv": "-33.863385,151.208629", "shape": "POINT (151.208629 -33.863385)", "region_point": "POINT(100.6197 34.0479)"}
3131

3232

3333

x-pack/plugin/sql/qa/src/main/resources/geo/geosql.csv-spec

+31-18
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ DESCRIBE "geo";
1717
column:s | type:s | mapping:s
1818
city | VARCHAR | keyword
1919
location | GEOMETRY | geo_point
20+
location_no_dv | GEOMETRY | geo_point
2021
region | VARCHAR | keyword
2122
region_point | VARCHAR | keyword
2223
shape | GEOMETRY | geo_shape
@@ -26,24 +27,24 @@ shape | GEOMETRY | geo_shape
2627
// TODO: For now we just get geopoint formatted as is and we also need to convert it to STRING to work with CSV
2728

2829
selectAllPointsAsStrings
29-
SELECT city, CAST(location AS STRING) location, CAST(shape AS STRING) shape, region FROM "geo" ORDER BY "city";
30-
31-
city:s | location:s | shape:s | region:s
32-
Amsterdam |point (4.850311987102032 52.347556999884546) |point (4.850312 52.347557) |Europe
33-
Berlin |point (13.390888944268227 52.48670099303126) |point (13.390889 52.486701) |Europe
34-
Chicago |point (-87.63787407428026 41.888782968744636) |point (-87.637874 41.888783) |Americas
35-
Hong Kong |point (114.18392493389547 22.28139698971063) |point (114.183925 22.281397) |Asia
36-
London |point (-0.12167204171419144 51.51087098289281)|point (-0.121672 51.510871) |Europe
37-
Mountain View |point (-122.08384302444756 37.38648299127817) |point (-122.083843 37.386483) |Americas
38-
Munich |point (11.537504978477955 48.14632098656148) |point (11.537505 48.146321) |Europe
39-
New York |point (-73.9900270756334 40.74517097789794) |point (-73.990027 40.745171) |Americas
40-
Paris |point (2.3517729341983795 48.84553796611726) |point (2.351773 48.845538) |Europe
41-
Phoenix |point (-111.97350500151515 33.37624196894467) |point (-111.973505 33.376242) |Americas
42-
San Francisco |point (-122.39422800019383 37.789540970698) |point (-122.394228 37.789541) |Americas
43-
Seoul |point (127.06085099838674 37.50913198571652) |point (127.060851 37.509132) |Asia
44-
Singapore |point (103.8555349688977 1.2958679627627134) |point (103.855535 1.295868) |Asia
45-
Sydney |point (151.20862897485495 -33.863385021686554)|point (151.208629 -33.863385) |Asia
46-
Tokyo |point (139.76402222178876 35.66961596254259) |point (139.76402225 35.669616)|Asia
30+
SELECT city, CAST(location AS STRING) location, CAST(location_no_dv AS STRING) location_no_dv, CAST(shape AS STRING) shape, region FROM "geo" ORDER BY "city";
31+
32+
city:s | location:s | location_no_dv:s | shape:s | region:s
33+
Amsterdam |point (4.850311987102032 52.347556999884546) |point (4.850312 52.347557) |point (4.850312 52.347557) |Europe
34+
Berlin |point (13.390888944268227 52.48670099303126) |point (13.390889 52.486701) |point (13.390889 52.486701) |Europe
35+
Chicago |point (-87.63787407428026 41.888782968744636) |point (-87.637874 41.888783) |point (-87.637874 41.888783) |Americas
36+
Hong Kong |point (114.18392493389547 22.28139698971063) |point (114.183925 22.281397) |point (114.183925 22.281397) |Asia
37+
London |point (-0.12167204171419144 51.51087098289281)|point (-0.121672 51.510871) |point (-0.121672 51.510871) |Europe
38+
Mountain View |point (-122.08384302444756 37.38648299127817) |point (-122.083843 37.386483) |point (-122.083843 37.386483) |Americas
39+
Munich |point (11.537504978477955 48.14632098656148) |point (11.537505 48.146321) |point (11.537505 48.146321) |Europe
40+
New York |point (-73.9900270756334 40.74517097789794) |point (-73.990027 40.745171) |point (-73.990027 40.745171) |Americas
41+
Paris |point (2.3517729341983795 48.84553796611726) |point (2.351773 48.845538) |point (2.351773 48.845538) |Europe
42+
Phoenix |point (-111.97350500151515 33.37624196894467) |point (-111.973505 33.376242) |point (-111.973505 33.376242) |Americas
43+
San Francisco |point (-122.39422800019383 37.789540970698) |point (-122.394228 37.789541) |point (-122.394228 37.789541) |Americas
44+
Seoul |point (127.06085099838674 37.50913198571652) |point (127.060851 37.509132) |point (127.060851 37.509132) |Asia
45+
Singapore |point (103.8555349688977 1.2958679627627134) |point (103.855535 1.295868) |point (103.855535 1.295868) |Asia
46+
Sydney |point (151.20862897485495 -33.863385021686554)|point (151.208629 -33.863385) |point (151.208629 -33.863385) |Asia
47+
Tokyo |point (139.76402222178876 35.66961596254259) |point (139.76402225 35.669616)|point (139.76402225 35.669616)|Asia
4748
;
4849

4950
// TODO: Both shape and location contain the same data for now, we should change it later to make things more interesting
@@ -201,3 +202,15 @@ SELECT COUNT(*) count, FIRST(region) region FROM geo GROUP BY FLOOR(ST_Distance(
201202
3 |Asia
202203
2 |Asia
203204
;
205+
206+
selectWktToSqlOfNull
207+
SELECT ST_ASWKT(ST_WktToSql(NULL)) shape;
208+
shape:s
209+
null
210+
;
211+
212+
selectWktToSqlOfNull
213+
SELECT ST_Distance(ST_WktToSql(NULL), ST_WktToSQL('POINT (-71 42)')) shape;
214+
shape:d
215+
null
216+
;

x-pack/plugin/sql/qa/src/main/resources/geo/geosql.json

+4
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@
1313
"location": {
1414
"type": "geo_point"
1515
},
16+
"location_no_dv": {
17+
"type": "geo_point",
18+
"doc_values": "false"
19+
},
1620
"shape": {
1721
"type": "geo_shape"
1822
},

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/extractor/FieldHitExtractor.java

+32-28
Original file line numberDiff line numberDiff line change
@@ -127,43 +127,34 @@ private Object unwrapMultiValue(Object values) {
127127
if (values == null) {
128128
return null;
129129
}
130-
if (dataType == DataType.GEO_POINT) {
131-
Object value;
132-
if (values instanceof List && ((List<?>) values).size() == 1) {
133-
value = ((List<?>) values).get(0);
130+
if (values instanceof List) {
131+
List<?> list = (List<?>) values;
132+
if (list.isEmpty()) {
133+
return null;
134134
} else {
135-
value = values;
135+
// let's make sure first that we are not dealing with an geo_point represented as an array
136+
if (isGeoPointArray(list) == false) {
137+
if (list.size() == 1 || arrayLeniency) {
138+
return unwrapMultiValue(list.get(0));
139+
} else {
140+
throw new SqlIllegalArgumentException("Arrays (returned by [{}]) are not supported", fieldName);
141+
}
142+
}
136143
}
144+
}
145+
if (dataType == DataType.GEO_POINT) {
137146
try {
138-
GeoPoint geoPoint = GeoUtils.parseGeoPoint(value, true);
147+
GeoPoint geoPoint = GeoUtils.parseGeoPoint(values, true);
139148
return new GeoShape(geoPoint.lon(), geoPoint.lat());
140149
} catch (ElasticsearchParseException ex) {
141-
throw new SqlIllegalArgumentException("Cannot parse geo_point value (returned by [{}])", fieldName);
150+
throw new SqlIllegalArgumentException("Cannot parse geo_point value [{}] (returned by [{}])", values, fieldName);
142151
}
143152
}
144153
if (dataType == DataType.GEO_SHAPE) {
145-
Object value;
146-
if (values instanceof List && ((List<?>) values).size() == 1) {
147-
value = ((List<?>) values).get(0);
148-
} else {
149-
value = values;
150-
}
151154
try {
152-
return new GeoShape(value);
155+
return new GeoShape(values);
153156
} catch (IOException ex) {
154-
throw new SqlIllegalArgumentException("Cannot read geo_shape value (returned by [{}])", fieldName);
155-
}
156-
}
157-
if (values instanceof List) {
158-
List<?> list = (List<?>) values;
159-
if (list.isEmpty()) {
160-
return null;
161-
} else {
162-
if (arrayLeniency || list.size() == 1) {
163-
return unwrapMultiValue(list.get(0));
164-
} else {
165-
throw new SqlIllegalArgumentException("Arrays (returned by [{}]) are not supported", fieldName);
166-
}
157+
throw new SqlIllegalArgumentException("Cannot read geo_shape value [{}] (returned by [{}])", values, fieldName);
167158
}
168159
}
169160
if (values instanceof Map) {
@@ -180,6 +171,17 @@ private Object unwrapMultiValue(Object values) {
180171
throw new SqlIllegalArgumentException("Type {} (returned by [{}]) is not supported", values.getClass().getSimpleName(), fieldName);
181172
}
182173

174+
private boolean isGeoPointArray(List<?> list) {
175+
if (dataType != DataType.GEO_POINT) {
176+
return false;
177+
}
178+
// we expect the point in [lon lat] or [lon lat alt] formats
179+
if (list.size() > 3 || list.size() < 1) {
180+
return false;
181+
}
182+
return list.get(0) instanceof Number;
183+
}
184+
183185
@SuppressWarnings({ "unchecked", "rawtypes" })
184186
Object extractFromSource(Map<String, Object> map) {
185187
Object value = null;
@@ -204,7 +206,9 @@ Object extractFromSource(Map<String, Object> map) {
204206

205207
if (node instanceof List) {
206208
List listOfValues = (List) node;
207-
if (listOfValues.size() == 1 || arrayLeniency) {
209+
// we can only do this optimization until the last element of our pass since geo points are using arrays
210+
// and we don't want to blindly ignore the second element of array if arrayLeniency is enabled
211+
if ((i < path.length - 1) && (listOfValues.size() == 1 || arrayLeniency)) {
208212
// this is a List with a size of 1 e.g.: {"a" : [{"b" : "value"}]} meaning the JSON is a list with one element
209213
// or a list of values with one element e.g.: {"a": {"b" : ["value"]}}
210214
// in case of being lenient about arrays, just extract the first value in the array

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/scalar/geo/StDistanceProcessor.java

+5-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,11 @@ protected Object doProcess(Object left, Object right) {
4646
return process(left, right);
4747
}
4848

49-
public static double process(Object source1, Object source2) {
49+
public static Double process(Object source1, Object source2) {
50+
if (source1 == null || source2 == null) {
51+
return null;
52+
}
53+
5054
if (source1 instanceof GeoShape == false) {
5155
throw new SqlIllegalArgumentException("A geo_point or geo_shape with type point is required; received [{}]", source1);
5256
}

0 commit comments

Comments
 (0)