Skip to content

Commit 743e6ee

Browse files
ENH: match geopandas sjoin and clip API (#149)
* ENH: match geopandas sjoin and clip API * support clipping GeoSeries * add GeoSeries.clip to API docs * fix test * Update dask_geopandas/sjoin.py Co-authored-by: Joris Van den Bossche <[email protected]> * pin required geopandas Co-authored-by: Joris Van den Bossche <[email protected]>
1 parent bca1d7d commit 743e6ee

File tree

10 files changed

+150
-14
lines changed

10 files changed

+150
-14
lines changed

dask_geopandas/clip.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,10 @@ def clip(gdf, mask, keep_geom_type=False):
3838
}
3939
divisions = [None] * (len(dsk) + 1)
4040
graph = HighLevelGraph.from_collections(name, dsk, dependencies=[gdf])
41-
result = GeoDataFrame(graph, name, gdf._meta, tuple(divisions))
41+
if isinstance(gdf, GeoDataFrame):
42+
result = GeoDataFrame(graph, name, gdf._meta, tuple(divisions))
43+
elif isinstance(gdf, GeoSeries):
44+
result = GeoSeries(graph, name, gdf._meta, tuple(divisions))
4245
result.spatial_partitions = new_spatial_partitions
4346

4447
return result

dask_geopandas/core.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from .morton_distance import _morton_distance
1818
from .geohash import _geohash
1919

20+
import dask_geopandas
21+
2022

2123
def _set_crs(df, crs, allow_override):
2224
"""Return a new object with crs set to ``crs``"""
@@ -449,6 +451,10 @@ def geohash(self, string=True, p=12):
449451

450452
return geohashes
451453

454+
@derived_from(geopandas.GeoDataFrame)
455+
def clip(self, mask, keep_geom_type=False):
456+
return dask_geopandas.clip(self, mask=mask, keep_geom_type=keep_geom_type)
457+
452458

453459
class GeoSeries(_Frame, dd.core.Series):
454460
"""Parallel GeoPandas GeoSeries
@@ -579,6 +585,39 @@ def union(block):
579585
)
580586
return aggregated.set_crs(self.crs)
581587

588+
def sjoin(self, df, how="inner", predicate="intersects"):
589+
"""
590+
Spatial join of two GeoDataFrames.
591+
592+
Parameters
593+
----------
594+
df : geopandas or dask_geopandas GeoDataFrame
595+
If a geopandas.GeoDataFrame is passed, it is considered as a
596+
dask_geopandas.GeoDataFrame with 1 partition (without spatial
597+
partitioning information).
598+
how : string, default 'inner'
599+
The type of join. Currently only 'inner' is supported.
600+
predicate : string, default 'intersects'
601+
Binary predicate how to match corresponding rows of the left and right
602+
GeoDataFrame. Possible values: 'contains', 'contains_properly',
603+
'covered_by', 'covers', 'crosses', 'intersects', 'overlaps',
604+
'touches', 'within'.
605+
606+
Returns
607+
-------
608+
dask_geopandas.GeoDataFrame
609+
610+
Notes
611+
-----
612+
If both the left and right GeoDataFrame have spatial partitioning
613+
information available (the ``spatial_partitions`` attribute is set),
614+
the output partitions are determined based on intersection of the
615+
spatial partitions. In all other cases, the output partitions are
616+
all combinations (cartesian/cross product) of all input partition
617+
of the left and right GeoDataFrame.
618+
"""
619+
return dask_geopandas.sjoin(self, df, how=how, predicate=predicate)
620+
582621

583622
from_geopandas = dd.from_pandas
584623

dask_geopandas/sjoin.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import warnings
2+
13
import numpy as np
24
import geopandas
35

@@ -7,7 +9,7 @@
79
from .core import from_geopandas, GeoDataFrame
810

911

10-
def sjoin(left, right, how="inner", op="intersects"):
12+
def sjoin(left, right, how="inner", predicate="intersects", **kwargs):
1113
"""
1214
Spatial join of two GeoDataFrames.
1315
@@ -19,7 +21,7 @@ def sjoin(left, right, how="inner", op="intersects"):
1921
partitioning information).
2022
how : string, default 'inner'
2123
The type of join. Currently only 'inner' is supported.
22-
op : string, default 'intersects'
24+
predicate : string, default 'intersects'
2325
Binary predicate how to match corresponding rows of the left and right
2426
GeoDataFrame. Possible values: 'contains', 'contains_properly',
2527
'covered_by', 'covers', 'crosses', 'intersects', 'overlaps',
@@ -38,6 +40,14 @@ def sjoin(left, right, how="inner", op="intersects"):
3840
all combinations (cartesian/cross product) of all input partition
3941
of the left and right GeoDataFrame.
4042
"""
43+
if "op" in kwargs:
44+
predicate = kwargs.pop("op")
45+
deprecation_message = (
46+
"The `op` parameter is deprecated and will be removed"
47+
" in a future release. Please use the `predicate` parameter"
48+
" instead."
49+
)
50+
warnings.warn(deprecation_message, FutureWarning, stacklevel=2)
4151
if how != "inner":
4252
raise NotImplementedError("Only how='inner' is supported right now")
4353

@@ -46,8 +56,8 @@ def sjoin(left, right, how="inner", op="intersects"):
4656
if isinstance(right, geopandas.GeoDataFrame):
4757
right = from_geopandas(right, npartitions=1)
4858

49-
name = "sjoin-" + tokenize(left, right, how, op)
50-
meta = geopandas.sjoin(left._meta, right._meta, how=how, op=op)
59+
name = "sjoin-" + tokenize(left, right, how, predicate)
60+
meta = geopandas.sjoin(left._meta, right._meta, how=how, predicate=predicate)
5161

5262
if left.spatial_partitions is not None and right.spatial_partitions is not None:
5363
# Spatial partitions are known -> use them to trim down the list of
@@ -73,7 +83,13 @@ def sjoin(left, right, how="inner", op="intersects"):
7383
dsk = {}
7484
new_spatial_partitions = []
7585
for i, (l, r) in enumerate(zip(parts_left, parts_right)):
76-
dsk[(name, i)] = (geopandas.sjoin, (left._name, l), (right._name, r), how, op)
86+
dsk[(name, i)] = (
87+
geopandas.sjoin,
88+
(left._name, l),
89+
(right._name, r),
90+
how,
91+
predicate,
92+
)
7793
# TODO preserve spatial partitions of the output if only left has spatial
7894
# partitions
7995
if using_spatial_partitions:

doc/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
geopandas
1+
geopandas>=0.10
22
numpydoc
33
sphinx-book-theme
44
myst-nb

doc/source/docs/reference/geodataframe.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,22 @@ Aggregating and exploding
4747

4848
GeoDataFrame.explode
4949

50+
Spatial joins
51+
-------------
52+
53+
.. autosummary::
54+
:toctree: api/
55+
56+
GeoDataFrame.sjoin
57+
58+
Overlay operations
59+
------------------
60+
61+
.. autosummary::
62+
:toctree: api/
63+
64+
GeoDataFrame.clip
65+
5066
Indexing
5167
--------
5268

doc/source/docs/reference/geoseries.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,14 @@ Missing values
126126
GeoSeries.fillna
127127
GeoSeries.isna
128128

129+
Overlay operations
130+
------------------
131+
132+
.. autosummary::
133+
:toctree: api/
134+
135+
GeoSeries.clip
136+
129137
Indexing
130138
--------
131139

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import versioneer
55

66
install_requires = [
7-
"geopandas",
7+
"geopandas>=0.10",
88
"dask>=2.18.0,!=2021.05.1",
99
"distributed>=2.18.0,!=2021.05.1",
1010
"numba",

tests/test_clip.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import geopandas
2-
from geopandas.testing import assert_geodataframe_equal
2+
from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal
33
import pytest
44
import dask_geopandas
55
from .test_core import geodf_points # noqa: F401
@@ -31,3 +31,13 @@ def test_clip_dask_mask(geodf_points): # noqa: F811
3131
NotImplementedError, match=r"Mask cannot be a Dask GeoDataFrame or GeoSeries."
3232
):
3333
dask_geopandas.clip(dask_obj, mask)
34+
35+
36+
def test_clip_geoseries(geodf_points): # noqa: F811
37+
dask_obj = dask_geopandas.from_geopandas(geodf_points, npartitions=2)
38+
dask_obj.calculate_spatial_partitions()
39+
mask = geodf_points.iloc[:1]
40+
mask["geometry"] = mask["geometry"].buffer(2)
41+
expected = geopandas.clip(geodf_points.geometry, mask)
42+
result = dask_geopandas.clip(dask_obj.geometry, mask).compute()
43+
assert_geoseries_equal(expected, result)

tests/test_core.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,36 @@ def test_copy_none_spatial_partitions(geoseries_points):
529529
assert ddf_copy.spatial_partitions is None
530530

531531

532+
def test_sjoin():
533+
# test only the method, functionality tested in test_sjoin.py
534+
df_points = geopandas.read_file(geopandas.datasets.get_path("naturalearth_cities"))
535+
ddf_points = dask_geopandas.from_geopandas(df_points, npartitions=4)
536+
537+
df_polygons = geopandas.read_file(
538+
geopandas.datasets.get_path("naturalearth_lowres")
539+
)
540+
expected = df_points.sjoin(df_polygons, predicate="within", how="inner")
541+
expected = expected.sort_index()
542+
543+
result = ddf_points.sjoin(df_polygons, predicate="within", how="inner")
544+
assert_geodataframe_equal(expected, result.compute().sort_index())
545+
546+
547+
def test_clip(geodf_points):
548+
# test only the method, functionality tested in test_clip.py
549+
dask_obj = dask_geopandas.from_geopandas(geodf_points, npartitions=2)
550+
dask_obj.calculate_spatial_partitions()
551+
mask = geodf_points.iloc[:1]
552+
mask["geometry"] = mask["geometry"].buffer(2)
553+
expected = geodf_points.clip(mask)
554+
result = dask_obj.clip(mask).compute()
555+
assert_geodataframe_equal(expected, result)
556+
557+
expected = geodf_points.geometry.clip(mask)
558+
result = dask_obj.geometry.clip(mask).compute()
559+
assert_geoseries_equal(expected, result)
560+
561+
532562
class TestDissolve:
533563
def setup_method(self):
534564
self.world = geopandas.read_file(

tests/test_sjoin.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import pytest
2+
13
import geopandas
24
from geopandas.testing import assert_geodataframe_equal
35

@@ -13,24 +15,36 @@ def test_sjoin_dask_geopandas():
1315
)
1416
ddf_polygons = dask_geopandas.from_geopandas(df_polygons, npartitions=4)
1517

16-
expected = geopandas.sjoin(df_points, df_polygons, op="within", how="inner")
18+
expected = geopandas.sjoin(df_points, df_polygons, predicate="within", how="inner")
1719
expected = expected.sort_index()
1820

1921
# dask / geopandas
20-
result = dask_geopandas.sjoin(ddf_points, df_polygons, op="within", how="inner")
22+
result = dask_geopandas.sjoin(
23+
ddf_points, df_polygons, predicate="within", how="inner"
24+
)
2125
assert_geodataframe_equal(expected, result.compute().sort_index())
2226

2327
# geopandas / dask
24-
result = dask_geopandas.sjoin(df_points, ddf_polygons, op="within", how="inner")
28+
result = dask_geopandas.sjoin(
29+
df_points, ddf_polygons, predicate="within", how="inner"
30+
)
2531
assert_geodataframe_equal(expected, result.compute().sort_index())
2632

2733
# dask / dask
28-
result = dask_geopandas.sjoin(ddf_points, ddf_polygons, op="within", how="inner")
34+
result = dask_geopandas.sjoin(
35+
ddf_points, ddf_polygons, predicate="within", how="inner"
36+
)
2937
assert_geodataframe_equal(expected, result.compute().sort_index())
3038

3139
# with spatial_partitions
3240
ddf_points.calculate_spatial_partitions()
3341
ddf_polygons.calculate_spatial_partitions()
34-
result = dask_geopandas.sjoin(ddf_points, ddf_polygons, op="within", how="inner")
42+
result = dask_geopandas.sjoin(
43+
ddf_points, ddf_polygons, predicate="within", how="inner"
44+
)
3545
assert result.spatial_partitions is not None
3646
assert_geodataframe_equal(expected, result.compute().sort_index())
47+
48+
# check warning
49+
with pytest.warns(FutureWarning, match="The `op` parameter is deprecated"):
50+
dask_geopandas.sjoin(df_points, ddf_polygons, op="within", how="inner")

0 commit comments

Comments
 (0)