From eca7427322454ccb844e479a0db480717f9d7343 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Wed, 24 Feb 2021 16:56:17 +1300 Subject: [PATCH 1/8] Refactor info and grdinfo to use virtualfile_from_data Create a universal `virtualfile_from_data` function that can handle any raster or vector data input. This allows us to centralize the data validation logic in a single place, resulting in a cleaner API for PyGMT modules to handle different PyData types (e.g. numpy/pandas/xarray/etc) seamlessly. As a start, both `info` and `grdinfo` have been refactored to use this new convenience function. --- pygmt/clib/session.py | 57 ++++++++++++++++++++++++++++++++++++++++++ pygmt/helpers/utils.py | 13 ++++++++-- pygmt/src/grdinfo.py | 11 +------- pygmt/src/info.py | 25 ++---------------- 4 files changed, 71 insertions(+), 35 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 93734645e90..00fd2d15da2 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -25,6 +25,7 @@ GMTInvalidInput, GMTVersionError, ) +from pygmt.helpers import data_kind, dummy_context FAMILIES = [ "GMT_IS_DATASET", @@ -1359,6 +1360,62 @@ def virtualfile_from_grid(self, grid): with self.open_virtual_file(*args) as vfile: yield vfile + def virtualfile_from_data(self, data, x=None, y=None, z=None, check_kind=None): + """ + Store any data inside a virtual file. + + This convenience function automatically detects the kind of data passed + into it, and produces a virtualfile that can be passed into GMT later + on. + + Parameters + ---------- + data : str, xarray.DataArray, 2d array, or None + The vectors that will be included in the array. All must be of the + same size. + x/y/z : 1d arrays or None + x, y and z columns as numpy arrays. + check_kind : str + Used to validate the type of data that can be passed in. Choose + from 'raster', 'vector' or None. Default is None (no validation). + + Returns + ------- + file_context : contextlib._GeneratorContextManager + The virtual file stored inside a context manager. Access the file + name of this virtualfile using `with file_context as fname: ...`. + """ + kind = data_kind(data, x, y, z, check_kind) + + # Decide which virtualfile_from_ function to use + _virtualfile_from = { + "file": dummy_context, + "grid": self.virtualfile_from_grid, + # Note: virtualfile_from_matrix is not used because a matrix can be + # converted to vectors instead, and using vectors allows for better + # handling of string type inputs (e.g. for datetime data types) + "matrix": self.virtualfile_from_vectors, + "vectors": self.virtualfile_from_vectors, + }[kind] + + # Ensure the data is an iterable (Python list or tuple) + if kind == "matrix": # turn 2D arrays into list of vectors + try: + # pandas.DataFrame and xarray.Dataset types + _data = [array for _, array in data.items()] + except AttributeError: + # Python lists, tuples, and numpy ndarray types + _data = np.atleast_2d(np.asanyarray(data).T) + elif kind == "vectors": + _data = (x, y, z) + else: + _data = (data,) + + # Finally create the virtualfile from the data, to be passed into GMT + file_context = _virtualfile_from(*_data) + + return file_context + def extract_region(self): """ Extract the WESN bounding box of the currently active figure. diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index 749ebe63e5b..8809758f897 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -13,7 +13,7 @@ from pygmt.exceptions import GMTInvalidInput -def data_kind(data, x=None, y=None, z=None): +def data_kind(data, x=None, y=None, z=None, check_kind=None): """ Check what kind of data is provided to a module. @@ -30,12 +30,15 @@ def data_kind(data, x=None, y=None, z=None): Parameters ---------- data : str, xarray.DataArray, 2d array, or None - Data file name, xarray.DataArray or numpy array. + Data file name, xarray.DataArray or numpy array. x/y : 1d arrays or None x and y columns as numpy arrays. z : 1d array or None z column as numpy array. To be used optionally when x and y are given. + check_kind : str + Used to validate the type of data that can be passed in. Choose from + 'raster', 'vector' or None. Default is None (no validation). Returns ------- @@ -71,6 +74,12 @@ def data_kind(data, x=None, y=None, z=None): kind = "matrix" else: kind = "vectors" + + if check_kind == "raster" and kind not in ("file", "grid"): + raise GMTInvalidInput(f"Unrecognized data type: {type(data)}") + if check_kind == "vector" and kind not in ("file", "matrix", "vectors"): + raise GMTInvalidInput(f"Unrecognized data type: {type(data)}") + return kind diff --git a/pygmt/src/grdinfo.py b/pygmt/src/grdinfo.py index 43be9292aab..128adee2912 100644 --- a/pygmt/src/grdinfo.py +++ b/pygmt/src/grdinfo.py @@ -2,12 +2,9 @@ grdinfo - Retrieve info about grid file. """ from pygmt.clib import Session -from pygmt.exceptions import GMTInvalidInput from pygmt.helpers import ( GMTTempFile, build_arg_string, - data_kind, - dummy_context, fmt_docstring, kwargs_to_strings, use_alias, @@ -109,15 +106,9 @@ def grdinfo(grid, **kwargs): info : str A string with information about the grid. """ - kind = data_kind(grid, None, None) with GMTTempFile() as outfile: with Session() as lib: - if kind == "file": - file_context = dummy_context(grid) - elif kind == "grid": - file_context = lib.virtualfile_from_grid(grid) - else: - raise GMTInvalidInput("Unrecognized data type: {}".format(type(grid))) + file_context = lib.virtualfile_from_data(data=grid, check_kind="raster") with file_context as infile: arg_str = " ".join( [infile, build_arg_string(kwargs), "->" + outfile.name] diff --git a/pygmt/src/info.py b/pygmt/src/info.py index e8d3730aeb3..e6f2e9aac61 100644 --- a/pygmt/src/info.py +++ b/pygmt/src/info.py @@ -3,15 +3,7 @@ """ import numpy as np from pygmt.clib import Session -from pygmt.exceptions import GMTInvalidInput -from pygmt.helpers import ( - GMTTempFile, - build_arg_string, - data_kind, - dummy_context, - fmt_docstring, - use_alias, -) +from pygmt.helpers import GMTTempFile, build_arg_string, fmt_docstring, use_alias @fmt_docstring @@ -66,21 +58,8 @@ def info(table, **kwargs): - :class:`numpy.ndarray` if either of the above parameters are used. - str if none of the above parameters are used. """ - kind = data_kind(table) with Session() as lib: - if kind == "file": - file_context = dummy_context(table) - elif kind == "matrix": - try: - # pandas.DataFrame and xarray.Dataset types - arrays = [array for _, array in table.items()] - except AttributeError: - # Python lists, tuples, and numpy ndarray types - arrays = np.atleast_2d(np.asanyarray(table).T) - file_context = lib.virtualfile_from_vectors(*arrays) - else: - raise GMTInvalidInput(f"Unrecognized data type: {type(table)}") - + file_context = lib.virtualfile_from_data(data=table) with GMTTempFile() as tmpfile: with file_context as fname: arg_str = " ".join( From ebd226424166afe1c76fcae1144dbbd7b6164c62 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 26 Feb 2021 13:34:15 +1300 Subject: [PATCH 2/8] Move check_kind to be the first parameter --- pygmt/clib/session.py | 16 ++++++++-------- pygmt/helpers/utils.py | 8 ++++---- pygmt/src/grdinfo.py | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 00fd2d15da2..ff998e4bdec 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1360,7 +1360,7 @@ def virtualfile_from_grid(self, grid): with self.open_virtual_file(*args) as vfile: yield vfile - def virtualfile_from_data(self, data, x=None, y=None, z=None, check_kind=None): + def virtualfile_from_data(self, check_kind=None, data=None, x=None, y=None, z=None): """ Store any data inside a virtual file. @@ -1370,22 +1370,22 @@ def virtualfile_from_data(self, data, x=None, y=None, z=None, check_kind=None): Parameters ---------- - data : str, xarray.DataArray, 2d array, or None - The vectors that will be included in the array. All must be of the - same size. - x/y/z : 1d arrays or None - x, y and z columns as numpy arrays. check_kind : str Used to validate the type of data that can be passed in. Choose from 'raster', 'vector' or None. Default is None (no validation). + data : str, xarray.DataArray, 2d array, or None + Any raster or vector data format. This could be a file name, a + raster grid, a vector matrix/arrays, or other supported data input. + x/y/z : 1d arrays or None + x, y and z columns as numpy arrays. Returns ------- file_context : contextlib._GeneratorContextManager The virtual file stored inside a context manager. Access the file - name of this virtualfile using `with file_context as fname: ...`. + name of this virtualfile using ``with file_context as fname: ...``. """ - kind = data_kind(data, x, y, z, check_kind) + kind = data_kind(check_kind, data, x, y, z) # Decide which virtualfile_from_ function to use _virtualfile_from = { diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index 8809758f897..5e34843aab8 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -13,7 +13,7 @@ from pygmt.exceptions import GMTInvalidInput -def data_kind(data, x=None, y=None, z=None, check_kind=None): +def data_kind(check_kind=None, data=None, x=None, y=None, z=None): """ Check what kind of data is provided to a module. @@ -29,6 +29,9 @@ def data_kind(data, x=None, y=None, z=None, check_kind=None): Parameters ---------- + check_kind : str + Used to validate the type of data that can be passed in. Choose from + 'raster', 'vector' or None. Default is None (no validation). data : str, xarray.DataArray, 2d array, or None Data file name, xarray.DataArray or numpy array. x/y : 1d arrays or None @@ -36,9 +39,6 @@ def data_kind(data, x=None, y=None, z=None, check_kind=None): z : 1d array or None z column as numpy array. To be used optionally when x and y are given. - check_kind : str - Used to validate the type of data that can be passed in. Choose from - 'raster', 'vector' or None. Default is None (no validation). Returns ------- diff --git a/pygmt/src/grdinfo.py b/pygmt/src/grdinfo.py index 128adee2912..be481758497 100644 --- a/pygmt/src/grdinfo.py +++ b/pygmt/src/grdinfo.py @@ -108,7 +108,7 @@ def grdinfo(grid, **kwargs): """ with GMTTempFile() as outfile: with Session() as lib: - file_context = lib.virtualfile_from_data(data=grid, check_kind="raster") + file_context = lib.virtualfile_from_data(check_kind="raster", data=grid) with file_context as infile: arg_str = " ".join( [infile, build_arg_string(kwargs), "->" + outfile.name] From e5588345c509dac293438e4695202bd5ca4e9dc8 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 26 Feb 2021 14:07:02 +1300 Subject: [PATCH 3/8] Refactor virtualfile_from_data to remove if/elif/else block Pure dictionary based lookup to convert data to a virtualfile. --- pygmt/clib/session.py | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index ff998e4bdec..778b584f963 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1387,18 +1387,6 @@ def virtualfile_from_data(self, check_kind=None, data=None, x=None, y=None, z=No """ kind = data_kind(check_kind, data, x, y, z) - # Decide which virtualfile_from_ function to use - _virtualfile_from = { - "file": dummy_context, - "grid": self.virtualfile_from_grid, - # Note: virtualfile_from_matrix is not used because a matrix can be - # converted to vectors instead, and using vectors allows for better - # handling of string type inputs (e.g. for datetime data types) - "matrix": self.virtualfile_from_vectors, - "vectors": self.virtualfile_from_vectors, - }[kind] - - # Ensure the data is an iterable (Python list or tuple) if kind == "matrix": # turn 2D arrays into list of vectors try: # pandas.DataFrame and xarray.Dataset types @@ -1406,13 +1394,17 @@ def virtualfile_from_data(self, check_kind=None, data=None, x=None, y=None, z=No except AttributeError: # Python lists, tuples, and numpy ndarray types _data = np.atleast_2d(np.asanyarray(data).T) - elif kind == "vectors": - _data = (x, y, z) - else: - _data = (data,) - # Finally create the virtualfile from the data, to be passed into GMT - file_context = _virtualfile_from(*_data) + # Based on the data kind, create the virtualfile to be passed into GMT + file_context = { + "file": dummy_context(data), + "grid": self.virtualfile_from_grid(data), + # Note: virtualfile_from_matrix is not used because a matrix can be + # converted to vectors instead, and using vectors allows for better + # handling of string type inputs (e.g. for datetime data types) + "matrix": self.virtualfile_from_vectors(*_data), + "vectors": self.virtualfile_from_vectors(x, y, z), + }[kind] return file_context From febc06ce29db4fdc5a4f05625413316cad9fceab Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 26 Feb 2021 14:16:28 +1300 Subject: [PATCH 4/8] Move check_kind out of data_kind function and into virtualfile_from_data Fixes all the tests which expect first input into data_kind to be the 'data' argument. --- pygmt/clib/session.py | 7 ++++++- pygmt/helpers/utils.py | 13 ++----------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 778b584f963..cf8f1144a05 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1385,7 +1385,12 @@ def virtualfile_from_data(self, check_kind=None, data=None, x=None, y=None, z=No The virtual file stored inside a context manager. Access the file name of this virtualfile using ``with file_context as fname: ...``. """ - kind = data_kind(check_kind, data, x, y, z) + kind = data_kind(data, x, y, z) + + if check_kind == "raster" and kind not in ("file", "grid"): + raise GMTInvalidInput(f"Unrecognized data type: {type(data)}") + if check_kind == "vector" and kind not in ("file", "matrix", "vectors"): + raise GMTInvalidInput(f"Unrecognized data type: {type(data)}") if kind == "matrix": # turn 2D arrays into list of vectors try: diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index 5e34843aab8..01d75c51270 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -13,7 +13,7 @@ from pygmt.exceptions import GMTInvalidInput -def data_kind(check_kind=None, data=None, x=None, y=None, z=None): +def data_kind(data=None, x=None, y=None, z=None): """ Check what kind of data is provided to a module. @@ -29,11 +29,8 @@ def data_kind(check_kind=None, data=None, x=None, y=None, z=None): Parameters ---------- - check_kind : str - Used to validate the type of data that can be passed in. Choose from - 'raster', 'vector' or None. Default is None (no validation). data : str, xarray.DataArray, 2d array, or None - Data file name, xarray.DataArray or numpy array. + Data file name, xarray.DataArray or numpy array. x/y : 1d arrays or None x and y columns as numpy arrays. z : 1d array or None @@ -74,12 +71,6 @@ def data_kind(check_kind=None, data=None, x=None, y=None, z=None): kind = "matrix" else: kind = "vectors" - - if check_kind == "raster" and kind not in ("file", "grid"): - raise GMTInvalidInput(f"Unrecognized data type: {type(data)}") - if check_kind == "vector" and kind not in ("file", "matrix", "vectors"): - raise GMTInvalidInput(f"Unrecognized data type: {type(data)}") - return kind From 9bdd0be6e0b78ba35a7ad7ba7f2f05d4869106e9 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 26 Feb 2021 14:33:47 +1300 Subject: [PATCH 5/8] Go back to using an if/elif block so only one virtualfile is created Revert e5588345c509dac293438e4695202bd5ca4e9dc8 somewhat, because the dictionary actually creates four virtualfiles. Also pushed up 'file' and 'grid' kinds up the if-then block chain. --- pygmt/clib/session.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index cf8f1144a05..a6624072252 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1392,7 +1392,23 @@ def virtualfile_from_data(self, check_kind=None, data=None, x=None, y=None, z=No if check_kind == "vector" and kind not in ("file", "matrix", "vectors"): raise GMTInvalidInput(f"Unrecognized data type: {type(data)}") - if kind == "matrix": # turn 2D arrays into list of vectors + # Decide which virtualfile_from_ function to use + _virtualfile_from = { + "file": dummy_context, + "grid": self.virtualfile_from_grid, + # Note: virtualfile_from_matrix is not used because a matrix can be + # converted to vectors instead, and using vectors allows for better + # handling of string type inputs (e.g. for datetime data types) + "matrix": self.virtualfile_from_vectors, + "vectors": self.virtualfile_from_vectors, + }[kind] + + # Ensure the data is an iterable (Python list or tuple) + if kind in ("file", "grid"): + _data = (data,) + elif kind == "vectors": + _data = (x, y, z) + elif kind == "matrix": # turn 2D arrays into list of vectors try: # pandas.DataFrame and xarray.Dataset types _data = [array for _, array in data.items()] @@ -1400,16 +1416,8 @@ def virtualfile_from_data(self, check_kind=None, data=None, x=None, y=None, z=No # Python lists, tuples, and numpy ndarray types _data = np.atleast_2d(np.asanyarray(data).T) - # Based on the data kind, create the virtualfile to be passed into GMT - file_context = { - "file": dummy_context(data), - "grid": self.virtualfile_from_grid(data), - # Note: virtualfile_from_matrix is not used because a matrix can be - # converted to vectors instead, and using vectors allows for better - # handling of string type inputs (e.g. for datetime data types) - "matrix": self.virtualfile_from_vectors(*_data), - "vectors": self.virtualfile_from_vectors(x, y, z), - }[kind] + # Finally create the virtualfile from the data, to be passed into GMT + file_context = _virtualfile_from(*_data) return file_context From 4b5e8df877538810bf6d5855eb57e5aba5ae5ecf Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Fri, 26 Feb 2021 14:34:43 +1300 Subject: [PATCH 6/8] Reduce diff on data_kind change --- pygmt/helpers/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index 01d75c51270..749ebe63e5b 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -13,7 +13,7 @@ from pygmt.exceptions import GMTInvalidInput -def data_kind(data=None, x=None, y=None, z=None): +def data_kind(data, x=None, y=None, z=None): """ Check what kind of data is provided to a module. From 23530f4b35a7a0949926901368bf3bea36eb4754 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Tue, 2 Mar 2021 10:43:16 +1300 Subject: [PATCH 7/8] Add doctest example usage of virtualfile_from_data with xarray.Dataset --- pygmt/clib/session.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index a6624072252..09919dc9394 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -1384,6 +1384,29 @@ def virtualfile_from_data(self, check_kind=None, data=None, x=None, y=None, z=No file_context : contextlib._GeneratorContextManager The virtual file stored inside a context manager. Access the file name of this virtualfile using ``with file_context as fname: ...``. + + Examples + -------- + >>> from pygmt.helpers import GMTTempFile + >>> import xarray as xr + >>> data = xr.Dataset( + ... coords={"index": [0, 1, 2]}, + ... data_vars={ + ... "x": ("index", [9, 8, 7]), + ... "y": ("index", [6, 5, 4]), + ... "z": ("index", [3, 2, 1]), + ... }, + ... ) + >>> with Session() as ses: + ... with ses.virtualfile_from_data( + ... check_kind="vector", data=data + ... ) as fin: + ... # Send the output to a file so that we can read it + ... with GMTTempFile() as fout: + ... ses.call_module("info", f"{fin} ->{fout.name}") + ... print(fout.read().strip()) + ... + : N = 3 <7/9> <4/6> <1/3> """ kind = data_kind(data, x, y, z) From 7c60cb304950aacb0ed0e1b3c76fac24a242b65d Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Tue, 2 Mar 2021 14:18:07 +1300 Subject: [PATCH 8/8] Add an entry to doc/api/index.rst for clib.Session.virtualfile_from_data --- doc/api/index.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/api/index.rst b/doc/api/index.rst index 0ef0b359b39..58a41dd8cec 100644 --- a/doc/api/index.rst +++ b/doc/api/index.rst @@ -186,14 +186,15 @@ the :meth:`~pygmt.clib.Session.call_module` method: clib.Session.call_module -Passing memory blocks between Python variables (:class:`numpy.ndarray`, -:class:`pandas.Series`, and :class:`xarray.DataArray`) and GMT happens through *virtual -files*. These methods are context managers that automate the conversion of Python -variables to GMT virtual files: +Passing memory blocks between Python data objects (e.g. :class:`numpy.ndarray`, +:class:`pandas.Series`, :class:`xarray.DataArray`, etc) and GMT happens through +*virtual files*. These methods are context managers that automate the +conversion of Python variables to GMT virtual files: .. autosummary:: :toctree: generated + clib.Session.virtualfile_from_data clib.Session.virtualfile_from_matrix clib.Session.virtualfile_from_vectors clib.Session.virtualfile_from_grid