Skip to content

Commit 499c519

Browse files
authored
Merge branch 'main' into datatypes/gmtcube
2 parents 1c5df8c + 7f77c77 commit 499c519

37 files changed

+766
-401
lines changed

.github/workflows/benchmarks.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ jobs:
4444

4545
# Install Miniconda with conda-forge dependencies
4646
- name: Setup Miniconda
47-
uses: conda-incubator/[email protected].1
47+
uses: conda-incubator/[email protected].3
4848
with:
4949
auto-activate-base: true
5050
activate-environment: "" # base environment
@@ -86,7 +86,7 @@ jobs:
8686

8787
# Run the benchmark tests
8888
- name: Run benchmarks
89-
uses: CodSpeedHQ/action@v2.2.1
89+
uses: CodSpeedHQ/action@v2.3.0
9090
with:
9191
run: |
9292
python -c "import pygmt; pygmt.show_versions()"

.github/workflows/cache_data.yaml

+4-4
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ jobs:
8080
with:
8181
name: gmt-cache
8282
path: |
83-
~/.gmt/cache
84-
~/.gmt/server
85-
~/.gmt/gmt_data_server.txt
86-
~/.gmt/gmt_hash_server.txt
83+
~/.gmt/cache
84+
~/.gmt/server
85+
~/.gmt/gmt_data_server.txt
86+
~/.gmt/gmt_hash_server.txt

.github/workflows/check-links.yml

+5-4
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ jobs:
6868

6969
- name: Create Issue From File
7070
if: env.lychee_exit_code != 0
71-
uses: peter-evans/create-issue-from-file@v5
72-
with:
73-
title: Link Checker Report on ${{ steps.date.outputs.date }}
74-
content-filepath: ./lychee/out.md
71+
run: |
72+
title="Link Checker Report on ${{ steps.date.outputs.date }}"
73+
gh issue create --title "$title" --body-file ./lychee/out.md
74+
env:
75+
GH_TOKEN: ${{secrets.GITHUB_TOKEN}}

.github/workflows/ci_tests.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ jobs:
166166

167167
# Upload coverage to Codecov
168168
- name: Upload coverage to Codecov
169-
uses: codecov/codecov-action@v4.1.1
169+
uses: codecov/codecov-action@v4.3.0
170170
with:
171171
file: ./coverage.xml # optional
172172
env_vars: OS,PYTHON,NUMPY

.github/workflows/format-command.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@ jobs:
1111
runs-on: ubuntu-latest
1212
steps:
1313
# Generate token from GenericMappingTools bot
14-
- uses: tibdex/github-app-token@v2
14+
- uses: actions/create-github-app-token@v1.9.3
1515
id: generate-token
1616
with:
17-
app_id: ${{ secrets.APP_ID }}
18-
private_key: ${{ secrets.APP_PRIVATE_KEY }}
17+
app-id: ${{ secrets.APP_ID }}
18+
private-key: ${{ secrets.APP_PRIVATE_KEY }}
1919

2020
# Checkout the pull request branch
2121
- uses: actions/[email protected]

.github/workflows/release-baseline-images.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
shasum -a 256 baseline-images.zip
3636
3737
- name: Upload baseline image as a release asset
38-
uses: shogo82148/[email protected].4
38+
uses: shogo82148/[email protected].5
3939
with:
4040
upload_url: ${{ github.event.release.upload_url }}
4141
asset_path: baseline-images.zip

doc/api/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,7 @@ Python objects to and from GMT virtual files:
292292
clib.Session.virtualfile_in
293293
clib.Session.virtualfile_out
294294
clib.Session.virtualfile_to_dataset
295+
clib.Session.virtualfile_to_raster
295296

296297
Low level access (these are mostly used by the :mod:`pygmt.clib` package):
297298

environment.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ dependencies:
2727
- codespell
2828
- ruff>=0.3.0
2929
# Dev dependencies (unit testing)
30-
- matplotlib
30+
- matplotlib-base
3131
- pytest-cov
3232
- pytest-doctestplus
3333
- pytest-mpl

pygmt/clib/session.py

+122-7
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import numpy as np
1616
import pandas as pd
17+
import xarray as xr
1718
from packaging.version import Version
1819
from pygmt.clib.conversion import (
1920
array_to_datetime,
@@ -1712,8 +1713,39 @@ def virtualfile_out(
17121713
with self.open_virtualfile(family, geometry, "GMT_OUT", None) as vfile:
17131714
yield vfile
17141715

1716+
def inquire_virtualfile(self, vfname: str) -> int:
1717+
"""
1718+
Get the family of a virtual file.
1719+
1720+
Parameters
1721+
----------
1722+
vfname
1723+
Name of the virtual file to inquire.
1724+
1725+
Returns
1726+
-------
1727+
family
1728+
The integer value for the family of the virtual file.
1729+
1730+
Examples
1731+
--------
1732+
>>> from pygmt.clib import Session
1733+
>>> with Session() as lib:
1734+
... with lib.virtualfile_out(kind="dataset") as vfile:
1735+
... family = lib.inquire_virtualfile(vfile)
1736+
... assert family == lib["GMT_IS_DATASET"]
1737+
"""
1738+
c_inquire_virtualfile = self.get_libgmt_func(
1739+
"GMT_Inquire_VirtualFile",
1740+
argtypes=[ctp.c_void_p, ctp.c_char_p],
1741+
restype=ctp.c_uint,
1742+
)
1743+
return c_inquire_virtualfile(self.session_pointer, vfname.encode())
1744+
17151745
def read_virtualfile(
1716-
self, vfname: str, kind: Literal["dataset", "grid", "cube", None] = None
1746+
self,
1747+
vfname: str,
1748+
kind: Literal["dataset", "grid", "image", "cube", None] = None,
17171749
):
17181750
"""
17191751
Read data from a virtual file and optionally cast into a GMT data container.
@@ -1772,13 +1804,15 @@ def read_virtualfile(
17721804
# _GMT_DATASET).
17731805
if kind is None: # Return the ctypes void pointer
17741806
return pointer
1807+
if kind == "image":
1808+
raise NotImplementedError(f"kind={kind} is not supported yet.")
17751809
dtype = {"dataset": _GMT_DATASET, "grid": _GMT_GRID, "cube": _GMT_CUBE}[kind]
17761810
return ctp.cast(pointer, ctp.POINTER(dtype))
17771811

17781812
def virtualfile_to_dataset(
17791813
self,
17801814
vfname: str,
1781-
output_type: Literal["pandas", "numpy", "file"] = "pandas",
1815+
output_type: Literal["pandas", "numpy", "file", "strings"] = "pandas",
17821816
column_names: list[str] | None = None,
17831817
dtype: type | dict[str, type] | None = None,
17841818
index_col: str | int | None = None,
@@ -1799,6 +1833,7 @@ def virtualfile_to_dataset(
17991833
- ``"pandas"`` will return a :class:`pandas.DataFrame` object.
18001834
- ``"numpy"`` will return a :class:`numpy.ndarray` object.
18011835
- ``"file"`` means the result was saved to a file and will return ``None``.
1836+
- ``"strings"`` will return the trailing text only as an array of strings.
18021837
column_names
18031838
The column names for the :class:`pandas.DataFrame` output.
18041839
dtype
@@ -1844,6 +1879,16 @@ def virtualfile_to_dataset(
18441879
... assert result is None
18451880
... assert Path(outtmp.name).stat().st_size > 0
18461881
...
1882+
... # strings output
1883+
... with Session() as lib:
1884+
... with lib.virtualfile_out(kind="dataset") as vouttbl:
1885+
... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td")
1886+
... outstr = lib.virtualfile_to_dataset(
1887+
... vfname=vouttbl, output_type="strings"
1888+
... )
1889+
... assert isinstance(outstr, np.ndarray)
1890+
... assert outstr.dtype.kind in ("S", "U")
1891+
...
18471892
... # numpy output
18481893
... with Session() as lib:
18491894
... with lib.virtualfile_out(kind="dataset") as vouttbl:
@@ -1872,6 +1917,9 @@ def virtualfile_to_dataset(
18721917
... column_names=["col1", "col2", "col3", "coltext"],
18731918
... )
18741919
... assert isinstance(outpd2, pd.DataFrame)
1920+
>>> outstr
1921+
array(['TEXT1 TEXT23', 'TEXT4 TEXT567', 'TEXT8 TEXT90',
1922+
'TEXT123 TEXT456789'], dtype='<U18')
18751923
>>> outnp
18761924
array([[1.0, 2.0, 3.0, 'TEXT1 TEXT23'],
18771925
[4.0, 5.0, 6.0, 'TEXT4 TEXT567'],
@@ -1893,16 +1941,83 @@ def virtualfile_to_dataset(
18931941
if output_type == "file": # Already written to file, so return None
18941942
return None
18951943

1896-
# Read the virtual file as a GMT dataset and convert to pandas.DataFrame
1897-
result = self.read_virtualfile(vfname, kind="dataset").contents.to_dataframe(
1898-
column_names=column_names,
1899-
dtype=dtype,
1900-
index_col=index_col,
1944+
# Read the virtual file as a _GMT_DATASET object
1945+
result = self.read_virtualfile(vfname, kind="dataset").contents
1946+
1947+
if output_type == "strings": # strings output
1948+
return result.to_strings()
1949+
1950+
result = result.to_dataframe(
1951+
column_names=column_names, dtype=dtype, index_col=index_col
19011952
)
19021953
if output_type == "numpy": # numpy.ndarray output
19031954
return result.to_numpy()
19041955
return result # pandas.DataFrame output
19051956

1957+
def virtualfile_to_raster(
1958+
self,
1959+
vfname: str,
1960+
kind: Literal["grid", "image", "cube", None] = "grid",
1961+
outgrid: str | None = None,
1962+
) -> xr.DataArray | None:
1963+
"""
1964+
Output raster data stored in a virtual file to an :class:`xarray.DataArray`
1965+
object.
1966+
1967+
The raster data can be a grid, an image or a cube.
1968+
1969+
Parameters
1970+
----------
1971+
vfname
1972+
The virtual file name that stores the result grid/image/cube.
1973+
kind
1974+
Type of the raster data. Valid values are ``"grid"``, ``"image"``,
1975+
``"cube"`` or ``None``. If ``None``, will inquire the data type from the
1976+
virtual file name.
1977+
outgrid
1978+
Name of the output grid/image/cube. If specified, it means the raster data
1979+
was already saved into an actual file and will return ``None``.
1980+
1981+
Returns
1982+
-------
1983+
result
1984+
The result grid/image/cube. If ``outgrid`` is specified, return ``None``.
1985+
1986+
Examples
1987+
--------
1988+
>>> from pathlib import Path
1989+
>>> from pygmt.clib import Session
1990+
>>> from pygmt.helpers import GMTTempFile
1991+
>>> with Session() as lib:
1992+
... # file output
1993+
... with GMTTempFile(suffix=".nc") as tmpfile:
1994+
... outgrid = tmpfile.name
1995+
... with lib.virtualfile_out(kind="grid", fname=outgrid) as voutgrd:
1996+
... lib.call_module("read", f"@earth_relief_01d_g {voutgrd} -Tg")
1997+
... result = lib.virtualfile_to_raster(
1998+
... vfname=voutgrd, outgrid=outgrid
1999+
... )
2000+
... assert result == None
2001+
... assert Path(outgrid).stat().st_size > 0
2002+
...
2003+
... # xarray.DataArray output
2004+
... outgrid = None
2005+
... with lib.virtualfile_out(kind="grid", fname=outgrid) as voutgrd:
2006+
... lib.call_module("read", f"@earth_relief_01d_g {voutgrd} -Tg")
2007+
... result = lib.virtualfile_to_raster(vfname=voutgrd, outgrid=outgrid)
2008+
... assert isinstance(result, xr.DataArray)
2009+
"""
2010+
if outgrid is not None:
2011+
return None
2012+
if kind is None: # Inquire the data family from the virtualfile
2013+
family = self.inquire_virtualfile(vfname)
2014+
kind = { # type: ignore[assignment]
2015+
self["GMT_IS_GRID"]: "grid",
2016+
self["GMT_IS_IMAGE"]: "image",
2017+
self["GMT_IS_CUBE"]: "cube",
2018+
}[family]
2019+
return self.read_virtualfile(vfname, kind=kind).contents.to_dataarray()
2020+
19062021
def extract_region(self):
19072022
"""
19082023
Extract the WESN bounding box of the currently active figure.

pygmt/datatypes/dataset.py

+37-19
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import ctypes as ctp
6+
import warnings
67
from collections.abc import Mapping
78
from typing import Any, ClassVar
89

@@ -144,6 +145,28 @@ class _GMT_DATASEGMENT(ctp.Structure): # noqa: N801
144145
("hidden", ctp.c_void_p),
145146
]
146147

148+
def to_strings(self) -> np.ndarray[Any, np.dtype[np.str_]]:
149+
"""
150+
Convert the trailing text column to an array of strings.
151+
"""
152+
textvector = []
153+
for table in self.table[: self.n_tables]:
154+
for segment in table.contents.segment[: table.contents.n_segments]:
155+
if segment.contents.text:
156+
textvector.extend(segment.contents.text[: segment.contents.n_rows])
157+
if None in textvector:
158+
# Workaround for upstream GMT bug reported in
159+
# https://github.com/GenericMappingTools/pygmt/issues/3170.
160+
msg = (
161+
"The trailing text column contains `None' values and has been replaced"
162+
"with empty strings to avoid TypeError exceptions. "
163+
"It's likely caused by an upstream GMT API bug. "
164+
"Please consider reporting to us."
165+
)
166+
warnings.warn(msg, category=RuntimeWarning, stacklevel=1)
167+
textvector = [item if item is not None else b"" for item in textvector]
168+
return np.char.decode(textvector) if textvector else np.array([], dtype=str)
169+
147170
def to_dataframe(
148171
self,
149172
column_names: pd.Index | None = None,
@@ -194,7 +217,11 @@ def to_dataframe(
194217
... with lib.virtualfile_out(kind="dataset") as vouttbl:
195218
... lib.call_module("read", f"{tmpfile.name} {vouttbl} -Td")
196219
... ds = lib.read_virtualfile(vouttbl, kind="dataset")
220+
... text = ds.contents.to_strings()
197221
... df = ds.contents.to_dataframe()
222+
>>> text
223+
array(['TEXT1 TEXT23', 'TEXT4 TEXT567', 'TEXT8 TEXT90',
224+
'TEXT123 TEXT456789'], dtype='<U18')
198225
>>> df
199226
0 1 2 3
200227
0 1.0 2.0 3.0 TEXT1 TEXT23
@@ -207,28 +234,19 @@ def to_dataframe(
207234
vectors = []
208235
# Deal with numeric columns
209236
for icol in range(self.n_columns):
210-
colvector = []
211-
for itbl in range(self.n_tables):
212-
dtbl = self.table[itbl].contents
213-
for iseg in range(dtbl.n_segments):
214-
dseg = dtbl.segment[iseg].contents
215-
colvector.append(
216-
np.ctypeslib.as_array(dseg.data[icol], shape=(dseg.n_rows,))
217-
)
237+
colvector = [
238+
np.ctypeslib.as_array(
239+
seg.contents.data[icol], shape=(seg.contents.n_rows,)
240+
)
241+
for tbl in self.table[: self.n_tables]
242+
for seg in tbl.contents.segment[: tbl.contents.n_segments]
243+
]
218244
vectors.append(pd.Series(data=np.concatenate(colvector)))
219245

220246
# Deal with trailing text column
221-
textvector = []
222-
for itbl in range(self.n_tables):
223-
dtbl = self.table[itbl].contents
224-
for iseg in range(dtbl.n_segments):
225-
dseg = dtbl.segment[iseg].contents
226-
if dseg.text:
227-
textvector.extend(dseg.text[: dseg.n_rows])
228-
if textvector:
229-
vectors.append(
230-
pd.Series(data=np.char.decode(textvector), dtype=pd.StringDtype())
231-
)
247+
textvector = self.to_strings()
248+
if len(textvector) != 0:
249+
vectors.append(pd.Series(data=textvector, dtype=pd.StringDtype()))
232250

233251
if len(vectors) == 0:
234252
# Return an empty DataFrame if no columns are found.

0 commit comments

Comments
 (0)