Skip to content

Commit 6d57b64

Browse files
authored
Refactor info and grdinfo to use virtualfile_from_data (#961)
Create a universal `virtualfile_from_data` function that can handle any raster or vector data input. This allows us to centralize the data validation logic in a single place, resulting in a cleaner API for PyGMT modules to handle different PyData types (e.g. numpy/pandas/xarray/etc) seamlessly. As a start, both `info` and `grdinfo` have been refactored to use this new convenience function. * Move check_kind to be the first parameter * Move check_kind out of data_kind function and into virtualfile_from_data Fixes all the tests which expect first input into data_kind to be the 'data' argument. * Add doctest example usage of virtualfile_from_data with xarray.Dataset * Add an entry to doc/api/index.rst for clib.Session.virtualfile_from_data
1 parent 1ce7a6e commit 6d57b64

File tree

4 files changed

+93
-37
lines changed

4 files changed

+93
-37
lines changed

doc/api/index.rst

+5-4
Original file line numberDiff line numberDiff line change
@@ -186,14 +186,15 @@ the :meth:`~pygmt.clib.Session.call_module` method:
186186

187187
clib.Session.call_module
188188

189-
Passing memory blocks between Python variables (:class:`numpy.ndarray`,
190-
:class:`pandas.Series`, and :class:`xarray.DataArray`) and GMT happens through *virtual
191-
files*. These methods are context managers that automate the conversion of Python
192-
variables to GMT virtual files:
189+
Passing memory blocks between Python data objects (e.g. :class:`numpy.ndarray`,
190+
:class:`pandas.Series`, :class:`xarray.DataArray`, etc) and GMT happens through
191+
*virtual files*. These methods are context managers that automate the
192+
conversion of Python variables to GMT virtual files:
193193

194194
.. autosummary::
195195
:toctree: generated
196196

197+
clib.Session.virtualfile_from_data
197198
clib.Session.virtualfile_from_matrix
198199
clib.Session.virtualfile_from_vectors
199200
clib.Session.virtualfile_from_grid

pygmt/clib/session.py

+85
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
GMTInvalidInput,
2626
GMTVersionError,
2727
)
28+
from pygmt.helpers import data_kind, dummy_context
2829

2930
FAMILIES = [
3031
"GMT_IS_DATASET",
@@ -1359,6 +1360,90 @@ def virtualfile_from_grid(self, grid):
13591360
with self.open_virtual_file(*args) as vfile:
13601361
yield vfile
13611362

1363+
def virtualfile_from_data(self, check_kind=None, data=None, x=None, y=None, z=None):
1364+
"""
1365+
Store any data inside a virtual file.
1366+
1367+
This convenience function automatically detects the kind of data passed
1368+
into it, and produces a virtualfile that can be passed into GMT later
1369+
on.
1370+
1371+
Parameters
1372+
----------
1373+
check_kind : str
1374+
Used to validate the type of data that can be passed in. Choose
1375+
from 'raster', 'vector' or None. Default is None (no validation).
1376+
data : str, xarray.DataArray, 2d array, or None
1377+
Any raster or vector data format. This could be a file name, a
1378+
raster grid, a vector matrix/arrays, or other supported data input.
1379+
x/y/z : 1d arrays or None
1380+
x, y and z columns as numpy arrays.
1381+
1382+
Returns
1383+
-------
1384+
file_context : contextlib._GeneratorContextManager
1385+
The virtual file stored inside a context manager. Access the file
1386+
name of this virtualfile using ``with file_context as fname: ...``.
1387+
1388+
Examples
1389+
--------
1390+
>>> from pygmt.helpers import GMTTempFile
1391+
>>> import xarray as xr
1392+
>>> data = xr.Dataset(
1393+
... coords={"index": [0, 1, 2]},
1394+
... data_vars={
1395+
... "x": ("index", [9, 8, 7]),
1396+
... "y": ("index", [6, 5, 4]),
1397+
... "z": ("index", [3, 2, 1]),
1398+
... },
1399+
... )
1400+
>>> with Session() as ses:
1401+
... with ses.virtualfile_from_data(
1402+
... check_kind="vector", data=data
1403+
... ) as fin:
1404+
... # Send the output to a file so that we can read it
1405+
... with GMTTempFile() as fout:
1406+
... ses.call_module("info", f"{fin} ->{fout.name}")
1407+
... print(fout.read().strip())
1408+
...
1409+
<vector memory>: N = 3 <7/9> <4/6> <1/3>
1410+
"""
1411+
kind = data_kind(data, x, y, z)
1412+
1413+
if check_kind == "raster" and kind not in ("file", "grid"):
1414+
raise GMTInvalidInput(f"Unrecognized data type: {type(data)}")
1415+
if check_kind == "vector" and kind not in ("file", "matrix", "vectors"):
1416+
raise GMTInvalidInput(f"Unrecognized data type: {type(data)}")
1417+
1418+
# Decide which virtualfile_from_ function to use
1419+
_virtualfile_from = {
1420+
"file": dummy_context,
1421+
"grid": self.virtualfile_from_grid,
1422+
# Note: virtualfile_from_matrix is not used because a matrix can be
1423+
# converted to vectors instead, and using vectors allows for better
1424+
# handling of string type inputs (e.g. for datetime data types)
1425+
"matrix": self.virtualfile_from_vectors,
1426+
"vectors": self.virtualfile_from_vectors,
1427+
}[kind]
1428+
1429+
# Ensure the data is an iterable (Python list or tuple)
1430+
if kind in ("file", "grid"):
1431+
_data = (data,)
1432+
elif kind == "vectors":
1433+
_data = (x, y, z)
1434+
elif kind == "matrix": # turn 2D arrays into list of vectors
1435+
try:
1436+
# pandas.DataFrame and xarray.Dataset types
1437+
_data = [array for _, array in data.items()]
1438+
except AttributeError:
1439+
# Python lists, tuples, and numpy ndarray types
1440+
_data = np.atleast_2d(np.asanyarray(data).T)
1441+
1442+
# Finally create the virtualfile from the data, to be passed into GMT
1443+
file_context = _virtualfile_from(*_data)
1444+
1445+
return file_context
1446+
13621447
def extract_region(self):
13631448
"""
13641449
Extract the WESN bounding box of the currently active figure.

pygmt/src/grdinfo.py

+1-10
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,9 @@
22
grdinfo - Retrieve info about grid file.
33
"""
44
from pygmt.clib import Session
5-
from pygmt.exceptions import GMTInvalidInput
65
from pygmt.helpers import (
76
GMTTempFile,
87
build_arg_string,
9-
data_kind,
10-
dummy_context,
118
fmt_docstring,
129
kwargs_to_strings,
1310
use_alias,
@@ -109,15 +106,9 @@ def grdinfo(grid, **kwargs):
109106
info : str
110107
A string with information about the grid.
111108
"""
112-
kind = data_kind(grid, None, None)
113109
with GMTTempFile() as outfile:
114110
with Session() as lib:
115-
if kind == "file":
116-
file_context = dummy_context(grid)
117-
elif kind == "grid":
118-
file_context = lib.virtualfile_from_grid(grid)
119-
else:
120-
raise GMTInvalidInput("Unrecognized data type: {}".format(type(grid)))
111+
file_context = lib.virtualfile_from_data(check_kind="raster", data=grid)
121112
with file_context as infile:
122113
arg_str = " ".join(
123114
[infile, build_arg_string(kwargs), "->" + outfile.name]

pygmt/src/info.py

+2-23
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,7 @@
33
"""
44
import numpy as np
55
from pygmt.clib import Session
6-
from pygmt.exceptions import GMTInvalidInput
7-
from pygmt.helpers import (
8-
GMTTempFile,
9-
build_arg_string,
10-
data_kind,
11-
dummy_context,
12-
fmt_docstring,
13-
use_alias,
14-
)
6+
from pygmt.helpers import GMTTempFile, build_arg_string, fmt_docstring, use_alias
157

168

179
@fmt_docstring
@@ -66,21 +58,8 @@ def info(table, **kwargs):
6658
- :class:`numpy.ndarray` if either of the above parameters are used.
6759
- str if none of the above parameters are used.
6860
"""
69-
kind = data_kind(table)
7061
with Session() as lib:
71-
if kind == "file":
72-
file_context = dummy_context(table)
73-
elif kind == "matrix":
74-
try:
75-
# pandas.DataFrame and xarray.Dataset types
76-
arrays = [array for _, array in table.items()]
77-
except AttributeError:
78-
# Python lists, tuples, and numpy ndarray types
79-
arrays = np.atleast_2d(np.asanyarray(table).T)
80-
file_context = lib.virtualfile_from_vectors(*arrays)
81-
else:
82-
raise GMTInvalidInput(f"Unrecognized data type: {type(table)}")
83-
62+
file_context = lib.virtualfile_from_data(data=table)
8463
with GMTTempFile() as tmpfile:
8564
with file_context as fname:
8665
arg_str = " ".join(

0 commit comments

Comments
 (0)