Skip to content

Implement gmt xarray BackendEntrypoint #3919

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 43 commits into from
Apr 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
b81aa06
Implement gmtread xarray BackendEntrypoint
weiji14 Apr 17, 2025
2b98a65
Add source encoding to raster xarray.DataArray
weiji14 Apr 17, 2025
f41a812
Set load_dataarray's default engine to gmtread
weiji14 Apr 17, 2025
b71dcf5
Support reading GeoTIFF images via the kind='image' argument
weiji14 Apr 17, 2025
60c6726
[skip ci] Satisfy type checking
weiji14 Apr 17, 2025
bf09a39
Make 'kind' a required kwarg with no default value
weiji14 Apr 17, 2025
e622878
Add .grd to set of supported extensions in guess_can_open
weiji14 Apr 17, 2025
b9e3eb4
Rename 'kind' parameter to 'decode_kind' and mypy fix
weiji14 Apr 17, 2025
3a109ac
Rename parameter kind to decode_kind in load_dataarray
weiji14 Apr 17, 2025
d764f78
Specify decode_kind="grid" in load_ functions
weiji14 Apr 17, 2025
98910f8
[skip ci] format
weiji14 Apr 17, 2025
126560e
Use type: ignore[override]
weiji14 Apr 17, 2025
436f0fc
Don't set default decode_kind in open_dataset
weiji14 Apr 17, 2025
328b6ab
Rename engine="gmtread" to engine="gmt"
weiji14 Apr 17, 2025
06aa39d
Add GMTBackendEntrypoint to doc/api/index.rst
weiji14 Apr 17, 2025
c79e50a
Apply suggestions from code review
weiji14 Apr 17, 2025
c0ccd4b
Add test for xr.load_dataarray with a GRD grid
weiji14 Apr 19, 2025
c5ebf9f
Update GMTBackendEntrypoint description to say NetCDF C library is used
weiji14 Apr 19, 2025
17fd9d7
lint
weiji14 Apr 19, 2025
1c8af3a
Move GMTBackendEntrypoint to a new "Xarray Integration" API section
weiji14 Apr 19, 2025
1d9f21a
Revert changes to datasets/samples.py and helpers/testing.py
weiji14 Apr 19, 2025
ca9132f
Apply suggestions from code review
weiji14 Apr 22, 2025
9669bcf
lint
weiji14 Apr 22, 2025
f722b47
Revert changes in pygmt/io.py
weiji14 Apr 22, 2025
a952df9
Improve docstring of guess_can_open
weiji14 Apr 22, 2025
0c6b8a3
Add example usage to docstring of GMTBackendEntrypoint
weiji14 Apr 22, 2025
3a256a6
Apply suggestions from code review
weiji14 Apr 22, 2025
64c50c9
Merge branch 'main' into backendentrypoint/gmtread
weiji14 Apr 22, 2025
e6ee6fa
Change type-hint from str | os.PathLike to pygmt._typing.PathLike
weiji14 Apr 22, 2025
5b90a8a
Use numpy.testing.assert_allclose to check min value
weiji14 Apr 22, 2025
31d1999
Use doctest: ELLIPSIS to represent byte size of xarray coordinates
weiji14 Apr 22, 2025
6a4e1a4
Rename decode_kind to raster_kind
weiji14 Apr 23, 2025
91df0ee
Put url to GMTBackendEntrypoint docs
weiji14 Apr 23, 2025
c97bd82
Remove guess_can_open method
weiji14 Apr 23, 2025
2e5fb6b
[skip ci] lint
weiji14 Apr 23, 2025
b136e26
Merge branch 'main' into backendentrypoint/gmtread
weiji14 Apr 24, 2025
19c5252
NetCDF to netCDF
weiji14 Apr 24, 2025
3bb1f44
Add Parameters section to docs of open_dataset
weiji14 Apr 24, 2025
8984c1c
Move pygmt/xarray_backend.py to pygmt/xarray/backend.py
weiji14 Apr 24, 2025
f2446fc
Workaround sphinx Attributes ivar using ellipsis
weiji14 Apr 24, 2025
52c0b7e
Remove extra blankspace
weiji14 Apr 24, 2025
143a375
Apply suggestions from code review
weiji14 Apr 24, 2025
acab8b2
Describe more on why one should choose the gmt engine
weiji14 Apr 24, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions doc/api/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,14 @@ Getting metadata from tabular or grid data:
info
grdinfo

Xarray Integration
------------------

.. autosummary::
:toctree: generated

GMTBackendEntrypoint

Enums
-----

Expand Down
1 change: 1 addition & 0 deletions pygmt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
x2sys_init,
xyz2grd,
)
from pygmt.xarray import GMTBackendEntrypoint

# Start our global modern mode session
_begin()
Expand Down
74 changes: 74 additions & 0 deletions pygmt/tests/test_xarray_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""
Tests for xarray 'gmt' backend engine.
"""

import re

import numpy as np
import numpy.testing as npt
import pytest
import xarray as xr
from pygmt.enums import GridRegistration, GridType
from pygmt.exceptions import GMTInvalidInput


def test_xarray_backend_gmt_open_nc_grid():
"""
Ensure that passing engine='gmt' to xarray.open_dataarray works for opening NetCDF
grids.
"""
with xr.open_dataarray(
"@static_earth_relief.nc", engine="gmt", raster_kind="grid"
) as da:
assert da.sizes == {"lat": 14, "lon": 8}
assert da.dtype == "float32"
assert da.gmt.registration == GridRegistration.PIXEL
assert da.gmt.gtype == GridType.GEOGRAPHIC


def test_xarray_backend_gmt_open_tif_image():
"""
Ensure that passing engine='gmt' to xarray.open_dataarray works for opening GeoTIFF
images.
"""
with xr.open_dataarray("@earth_day_01d", engine="gmt", raster_kind="image") as da:
assert da.sizes == {"band": 3, "y": 180, "x": 360}
assert da.dtype == "uint8"
assert da.gmt.registration == GridRegistration.PIXEL
assert da.gmt.gtype == GridType.GEOGRAPHIC


def test_xarray_backend_gmt_load_grd_grid():
"""
Ensure that passing engine='gmt' to xarray.load_dataarray works for loading GRD
grids.
"""
da = xr.load_dataarray(
"@earth_relief_20m_holes.grd", engine="gmt", raster_kind="grid"
)
# Ensure data is in memory.
assert isinstance(da.data, np.ndarray)
npt.assert_allclose(da.min(), -4929.5)
assert da.sizes == {"lat": 31, "lon": 31}
assert da.dtype == "float32"
assert da.gmt.registration == GridRegistration.GRIDLINE
assert da.gmt.gtype == GridType.GEOGRAPHIC


def test_xarray_backend_gmt_read_invalid_kind():
"""
Check that xarray.open_dataarray(..., engine="gmt") fails with missing or incorrect
'raster_kind'.
"""
with pytest.raises(
TypeError,
match=re.escape(
"GMTBackendEntrypoint.open_dataset() missing 1 required keyword-only argument: 'raster_kind'"
),
):
xr.open_dataarray("nokind.nc", engine="gmt")

with pytest.raises(GMTInvalidInput):
xr.open_dataarray(
filename_or_obj="invalid.tif", engine="gmt", raster_kind="invalid"
)
5 changes: 5 additions & 0 deletions pygmt/xarray/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""
PyGMT integration with Xarray accessors and backends.
"""

from pygmt.xarray.backend import GMTBackendEntrypoint
119 changes: 119 additions & 0 deletions pygmt/xarray/backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
"""
An xarray backend for reading raster grid/image files using the 'gmt' engine.
"""

from typing import Literal

import xarray as xr
from pygmt._typing import PathLike
from pygmt.clib import Session
from pygmt.exceptions import GMTInvalidInput
from pygmt.helpers import build_arg_list
from pygmt.src.which import which
from xarray.backends import BackendEntrypoint


class GMTBackendEntrypoint(BackendEntrypoint):
"""
Xarray backend to read raster grid/image files using 'gmt' engine.

Internally, GMT uses the netCDF C library to read netCDF files, and GDAL for GeoTIFF
and other raster formats. See :gmt-docs:`reference/features.html#grid-file-format`
for more details about supported formats. This GMT engine can also read
:gmt-docs:`GMT remote datasets <datasets/remote-data.html>` (file names starting
with an `@`) directly, and pre-loads :class:`pygmt.GMTDataArrayAccessor` properties
(in the '.gmt' accessor) for easy access to GMT-specific metadata and features.

When using :py:func:`xarray.open_dataarray` or :py:func:`xarray.load_dataarray` with
``engine="gmt"``, the ``raster_kind`` parameter is required and can be either:

- ``"grid"``: for reading single-band raster grids
- ``"image"``: for reading multi-band raster images

Examples
--------
Read a single-band netCDF file using ``raster_kind="grid"``

>>> import pygmt
>>> import xarray as xr
>>>
>>> da_grid = xr.open_dataarray(
... "@static_earth_relief.nc", engine="gmt", raster_kind="grid"
... )
>>> da_grid # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
<xarray.DataArray 'z' (lat: 14, lon: 8)>...
[112 values with dtype=float32]
Coordinates:
* lat (lat) float64... -23.5 -22.5 -21.5 -20.5 ... -12.5 -11.5 -10.5
* lon (lon) float64... -54.5 -53.5 -52.5 -51.5 -50.5 -49.5 -48.5 -47.5
Attributes:...
Conventions: CF-1.7
title: Produced by grdcut
history: grdcut @earth_relief_01d_p -R-55/-47/-24/-10 -Gstatic_eart...
description: Reduced by Gaussian Cartesian filtering (111.2 km fullwidt...
actual_range: [190. 981.]
long_name: elevation (m)

Read a multi-band GeoTIFF file using ``raster_kind="image"``

>>> da_image = xr.open_dataarray(
... "@earth_night_01d", engine="gmt", raster_kind="image"
... )
>>> da_image # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
<xarray.DataArray 'z' (band: 3, y: 180, x: 360)>...
[194400 values with dtype=uint8]
Coordinates:
* y (y) float64... 89.5 88.5 87.5 86.5 ... -86.5 -87.5 -88.5 -89.5
* x (x) float64... -179.5 -178.5 -177.5 -176.5 ... 177.5 178.5 179.5
* band (band) uint8... 1 2 3
Attributes:...
long_name: z
"""

description = "Open raster (.grd, .nc or .tif) files in Xarray via GMT."
open_dataset_parameters = ("filename_or_obj", "raster_kind")
url = "https://pygmt.org/dev/api/generated/pygmt.GMTBackendEntrypoint.html"

def open_dataset( # type: ignore[override]
self,
filename_or_obj: PathLike,
*,
drop_variables=None, # noqa: ARG002
raster_kind: Literal["grid", "image"],
# other backend specific keyword arguments
# `chunks` and `cache` DO NOT go here, they are handled by xarray
) -> xr.Dataset:
"""
Backend open_dataset method used by Xarray in :py:func:`~xarray.open_dataset`.

Parameters
----------
filename_or_obj
File path to a netCDF (.nc), GeoTIFF (.tif) or other grid/image file format
that can be read by GMT via the netCDF or GDAL C libraries. See also
:gmt-docs:`reference/features.html#grid-file-format`.
raster_kind
Whether to read the file as a "grid" (single-band) or "image" (multi-band).
"""
if raster_kind not in {"grid", "image"}:
msg = f"Invalid raster kind: '{raster_kind}'. Valid values are 'grid' or 'image'."
raise GMTInvalidInput(msg)

with Session() as lib:
with lib.virtualfile_out(kind=raster_kind) as voutfile:
kwdict = {"T": {"grid": "g", "image": "i"}[raster_kind]}
lib.call_module(
module="read",
args=[filename_or_obj, voutfile, *build_arg_list(kwdict)],
)

raster: xr.DataArray = lib.virtualfile_to_raster(
vfname=voutfile, kind=raster_kind
)
# Add "source" encoding
source = which(fname=filename_or_obj)
raster.encoding["source"] = (
source[0] if isinstance(source, list) else source
)
_ = raster.gmt # Load GMTDataArray accessor information
return raster.to_dataset()
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ all = [
"rioxarray",
]

[project.entry-points."xarray.backends"]
gmt = "pygmt.xarray:GMTBackendEntrypoint"

[project.urls]
"Homepage" = "https://www.pygmt.org"
"Documentation" = "https://www.pygmt.org"
Expand Down