From 4172de8d36186205664a78e2693010a5bba8a902 Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Mon, 19 Nov 2018 11:24:35 +1300 Subject: [PATCH 01/13] Wrap surface Initial commit for https://github.com/GenericMappingTools/gmt-python/issues/243. Implement GMT surface function under gridding.py. Test cases checking file/matrix/vectors type inputs stored in test_surface.py. Sample dataset for tests uses newly created load_tut_ship function in datasets/tutorial.py. Note that original GMT surface module https://gmt.soest.hawaii.edu/doc/latest/surface.html requires a -Goutputfile.nc parameter. Here, the implementation of surface does not respect this -G parameter properly. Instead, it stores the output file to a GMTTempFile and returns an xarray.Dataset in Python. --- gmt/__init__.py | 1 + gmt/datasets/__init__.py | 2 +- gmt/datasets/tutorial.py | 21 ++++++++++++ gmt/gridding.py | 71 +++++++++++++++++++++++++++++++++++++++ gmt/tests/test_surface.py | 36 ++++++++++++++++++++ 5 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 gmt/gridding.py create mode 100644 gmt/tests/test_surface.py diff --git a/gmt/__init__.py b/gmt/__init__.py index 161c18f3a43..b213a547237 100644 --- a/gmt/__init__.py +++ b/gmt/__init__.py @@ -13,6 +13,7 @@ # Import modules to make the high-level GMT Python API from .session_management import begin as _begin, end as _end from .figure import Figure +from .gridding import surface from .modules import info, grdinfo, which from . import datasets diff --git a/gmt/datasets/__init__.py b/gmt/datasets/__init__.py index ffc945f34d1..29ca1b8b79b 100644 --- a/gmt/datasets/__init__.py +++ b/gmt/datasets/__init__.py @@ -1,5 +1,5 @@ """ Load sample data included with GMT (downloaded from the GMT cache server). """ -from .tutorial import load_japan_quakes, load_usgs_quakes +from .tutorial import load_japan_quakes, load_tut_ship, load_usgs_quakes from .earth_relief import load_earth_relief diff --git a/gmt/datasets/tutorial.py b/gmt/datasets/tutorial.py index 839fd42568e..dc07c735bb4 100644 --- a/gmt/datasets/tutorial.py +++ b/gmt/datasets/tutorial.py @@ -38,6 +38,27 @@ def load_japan_quakes(): return data +def load_tut_ship(): + """ + Load a table of ship observations of bathymetry off Baja California as a + pandas.DataFrame. + + This is the ``@tut_ship.xyz`` dataset used in the GMT tutorials. + + The data are downloaded to a cache directory (usually ``~/.gmt/cache``) the + first time you invoke this function. Afterwards, it will load the data from + the cache. So you'll need an internet connection the first time around. + + Returns + ------- + data : pandas.Dataframe + The data table. Columns are x, y, and z. + """ + fname = which("@tut_ship.xyz", download="c") + data = pd.read_csv(fname, sep="\t", header=None, names=["x", "y", "z"]) + return data + + def load_usgs_quakes(): """ Load a table of global earthquakes form the USGS as a pandas.Dataframe. diff --git a/gmt/gridding.py b/gmt/gridding.py new file mode 100644 index 00000000000..972c50e6590 --- /dev/null +++ b/gmt/gridding.py @@ -0,0 +1,71 @@ +""" +GMT modules for Gridding of Data Tables +""" +import xarray as xr + +from .clib import Session +from .helpers import ( + build_arg_string, + fmt_docstring, + GMTTempFile, + use_alias, + data_kind, + dummy_context, +) +from .exceptions import GMTInvalidInput + + +@fmt_docstring +@use_alias(I="spacing", R="region") +def surface(x=None, y=None, z=None, data=None, **kwargs): + """ + Grids table data using adjustable tension continuous curvature splines + + Takes a matrix, (x,y,z) pairs, or a file name as input. + + Must provide either *data* or *x*, *y*, and *z*. + + {gmt_module_docs} + + Parameters + ---------- + x, y, z : 1d arrays + Arrays of x and y coordinates and values z of the data points. + data : str or 2d array + Either a data file name or a 2d numpy array with the tabular data. + + spacing (I) : + ``'xinc[unit][+e|n][/yinc[unit][+e|n]]'``. + x_inc [and optionally y_inc] is the grid spacing. + + region (R) : str or list + ``'xmin/xmax/ymin/ymax[+r][+uunit]'``. + Specify the region of interest. + + {aliases} + + Returns + ------- + array: xarray.DataArray + The output grid as a DataArray + """ + kind = data_kind(data, x, y, z) + if kind == "vectors" and z is None: + raise GMTInvalidInput("Must provide z with x and y.") + + with GMTTempFile(suffix=".nc") as outfile: + with Session() as lib: + if kind == "file": + file_context = dummy_context(data) + elif kind == "matrix": + file_context = lib.virtualfile_from_matrix(data) + elif kind == "vectors": + file_context = lib.virtualfile_from_vectors(x, y, z) + else: + raise GMTInvalidInput("Unrecognized data type: {}".format(type(data))) + with file_context as infile: + kwargs.update({"G": outfile.name}) + arg_str = " ".join([infile, build_arg_string(kwargs)]) + lib.call_module(module="surface", args=arg_str) + result = xr.open_dataset(outfile) + return result diff --git a/gmt/tests/test_surface.py b/gmt/tests/test_surface.py new file mode 100644 index 00000000000..48b11a76298 --- /dev/null +++ b/gmt/tests/test_surface.py @@ -0,0 +1,36 @@ +""" +Tests for surface +""" +from .. import surface +from .. import which +from ..datasets import load_tut_ship + + +def test_surface_input_file(): + """ + Run surface by passing in a filename + """ + fname = which("@tut_ship.xyz", download="c") + outputfile = surface(data=fname, I="5m", R="245/255/20/30") + return outputfile + + +def test_surface_input_data_array(): + """ + Run surface by passing in a numpy array into data + """ + ship_data = load_tut_ship() + data = ship_data.values # convert pandas.DataFrame to numpy.ndarray + outputfile = surface(data=data, I="5m", R="245/255/20/30") + return outputfile + + +def test_surface_input_xyz(): + """ + Run surface by passing in x, y, z numpy.ndarrays individually + """ + ship_data = load_tut_ship() + outputfile = surface( + x=ship_data.x, y=ship_data.y, z=ship_data.z, I="5m", R="245/255/20/30" + ) + return outputfile From 3cee454e85397de5d04346f81f8bc3130f52108a Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Mon, 19 Nov 2018 11:50:36 +1300 Subject: [PATCH 02/13] Quickfix xarray open using netcdf dataset name --- gmt/gridding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gmt/gridding.py b/gmt/gridding.py index 972c50e6590..bcb34b7b6c6 100644 --- a/gmt/gridding.py +++ b/gmt/gridding.py @@ -67,5 +67,5 @@ def surface(x=None, y=None, z=None, data=None, **kwargs): kwargs.update({"G": outfile.name}) arg_str = " ".join([infile, build_arg_string(kwargs)]) lib.call_module(module="surface", args=arg_str) - result = xr.open_dataset(outfile) + result = xr.open_dataset(outfile.name) return result From d5c25b407a4ee19fafe90de318a2ce6b78dd0c55 Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Tue, 20 Nov 2018 14:51:10 +1300 Subject: [PATCH 03/13] Add tests for load_tut_ship Checks that the loaded pandas.DataFrame's shape and minmax summary on each column is correct. --- gmt/tests/test_datasets.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/gmt/tests/test_datasets.py b/gmt/tests/test_datasets.py index 4d5fe0586ee..ea618a9c973 100644 --- a/gmt/tests/test_datasets.py +++ b/gmt/tests/test_datasets.py @@ -5,7 +5,12 @@ import numpy as np import numpy.testing as npt -from ..datasets import load_japan_quakes, load_earth_relief, load_usgs_quakes +from ..datasets import ( + load_japan_quakes, + load_earth_relief, + load_tut_ship, + load_usgs_quakes, +) from ..exceptions import GMTInvalidInput @@ -22,6 +27,19 @@ def test_japan_quakes(): assert summary.loc["max", "day"] == 31 +def test_tut_ship(): + "Check that the @tut_ship.xyz dataset loads without errors" + data = load_tut_ship() + assert data.shape == (82970, 3) + summary = data.describe() + assert summary.loc["min", "x"] == 245.0 + assert summary.loc["max", "x"] == 254.705 + assert summary.loc["min", "y"] == 20.0 + assert summary.loc["max", "y"] == 29.99131 + assert summary.loc["min", "z"] == -7708.0 + assert summary.loc["max", "z"] == -9.0 + + def test_usgs_quakes(): "Check that the dataset loads without errors" data = load_usgs_quakes() From 10539a951bc21820034b70b273b4ef4b35c07c6b Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Tue, 20 Nov 2018 15:29:46 +1300 Subject: [PATCH 04/13] Add more tests for surface to check invalid inputs Getting code coverage up to 100% for gridding.py. --- gmt/tests/test_surface.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/gmt/tests/test_surface.py b/gmt/tests/test_surface.py index 48b11a76298..9633bf75844 100644 --- a/gmt/tests/test_surface.py +++ b/gmt/tests/test_surface.py @@ -1,9 +1,13 @@ """ Tests for surface """ +import pytest + from .. import surface from .. import which from ..datasets import load_tut_ship +from ..exceptions import GMTInvalidInput +from ..helpers import data_kind def test_surface_input_file(): @@ -34,3 +38,23 @@ def test_surface_input_xyz(): x=ship_data.x, y=ship_data.y, z=ship_data.z, I="5m", R="245/255/20/30" ) return outputfile + + +def test_surface_input_xy_no_z(): + """ + Run surface by passing in x and y, but no z + """ + ship_data = load_tut_ship() + with pytest.raises(GMTInvalidInput): + surface(x=ship_data.x, y=ship_data.y, I="5m", R="245/255/20/30") + + +def test_surface_wrong_kind_of_input(): + """ + Run surface using grid input that is not file/matrix/vectors + """ + ship_data = load_tut_ship() + data = ship_data.z.to_xarray() # convert pandas.Series to xarray.DataArray + assert data_kind(data) == "grid" + with pytest.raises(GMTInvalidInput): + surface(data=data, I="5m", R="245/255/20/30") From 9772fa3d5825175a8760e57f1d6c39afeee20e4f Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Tue, 20 Nov 2018 17:45:19 +1300 Subject: [PATCH 05/13] Implement G outfile parameter for surface Allow `surface` to store the output grid to a netcdf file. Will still return xarray.Dataset in python bindings. --- gmt/gridding.py | 12 +++++++++--- gmt/tests/test_surface.py | 37 +++++++++++++++++++++++++++++++------ 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/gmt/gridding.py b/gmt/gridding.py index bcb34b7b6c6..e70b2c70da3 100644 --- a/gmt/gridding.py +++ b/gmt/gridding.py @@ -42,6 +42,10 @@ def surface(x=None, y=None, z=None, data=None, **kwargs): ``'xmin/xmax/ymin/ymax[+r][+uunit]'``. Specify the region of interest. + outfile (G) : str + Optional. The file name for the output netcdf file with extension .nc + to store the grid in. + {aliases} Returns @@ -53,7 +57,7 @@ def surface(x=None, y=None, z=None, data=None, **kwargs): if kind == "vectors" and z is None: raise GMTInvalidInput("Must provide z with x and y.") - with GMTTempFile(suffix=".nc") as outfile: + with GMTTempFile(suffix=".nc") as tmpfile: with Session() as lib: if kind == "file": file_context = dummy_context(data) @@ -64,8 +68,10 @@ def surface(x=None, y=None, z=None, data=None, **kwargs): else: raise GMTInvalidInput("Unrecognized data type: {}".format(type(data))) with file_context as infile: - kwargs.update({"G": outfile.name}) + if "G" not in kwargs.keys(): + kwargs.update({"G": tmpfile.name}) + outfile = kwargs["G"] arg_str = " ".join([infile, build_arg_string(kwargs)]) lib.call_module(module="surface", args=arg_str) - result = xr.open_dataset(outfile.name) + result = xr.open_dataset(outfile) return result diff --git a/gmt/tests/test_surface.py b/gmt/tests/test_surface.py index 9633bf75844..597f07d5ac4 100644 --- a/gmt/tests/test_surface.py +++ b/gmt/tests/test_surface.py @@ -1,7 +1,10 @@ """ Tests for surface """ +import os + import pytest +import xarray as xr from .. import surface from .. import which @@ -9,14 +12,18 @@ from ..exceptions import GMTInvalidInput from ..helpers import data_kind +TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "data") +TEMP_GRID = os.path.join(TEST_DATA_DIR, "tmp_grid.nc") + def test_surface_input_file(): """ Run surface by passing in a filename """ fname = which("@tut_ship.xyz", download="c") - outputfile = surface(data=fname, I="5m", R="245/255/20/30") - return outputfile + output = surface(data=fname, I="5m", R="245/255/20/30") + assert isinstance(output, xr.Dataset) + return output def test_surface_input_data_array(): @@ -25,8 +32,9 @@ def test_surface_input_data_array(): """ ship_data = load_tut_ship() data = ship_data.values # convert pandas.DataFrame to numpy.ndarray - outputfile = surface(data=data, I="5m", R="245/255/20/30") - return outputfile + output = surface(data=data, I="5m", R="245/255/20/30") + assert isinstance(output, xr.Dataset) + return output def test_surface_input_xyz(): @@ -34,10 +42,11 @@ def test_surface_input_xyz(): Run surface by passing in x, y, z numpy.ndarrays individually """ ship_data = load_tut_ship() - outputfile = surface( + output = surface( x=ship_data.x, y=ship_data.y, z=ship_data.z, I="5m", R="245/255/20/30" ) - return outputfile + assert isinstance(output, xr.Dataset) + return output def test_surface_input_xy_no_z(): @@ -58,3 +67,19 @@ def test_surface_wrong_kind_of_input(): assert data_kind(data) == "grid" with pytest.raises(GMTInvalidInput): surface(data=data, I="5m", R="245/255/20/30") + + +def test_surface_g_outfile_param(): + """ + Run surface with the -Goutputfile.nc parameter. + """ + ship_data = load_tut_ship() + data = ship_data.values # convert pandas.DataFrame to numpy.ndarray + try: + output = surface(data=data, I="5m", R="245/255/20/30", G=TEMP_GRID) + assert os.path.exists(TEMP_GRID) + grid = xr.open_dataset(TEMP_GRID) + assert output == grid # check that original output is same as that from file + finally: + os.remove(path=TEMP_GRID) + return output From 322b8300a3e2ca1f5e7227c220122a66ae653164 Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Sun, 25 Nov 2018 14:12:10 +1300 Subject: [PATCH 06/13] Add tests for surface aliases 'spacing' and 'region' --- gmt/tests/test_surface.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/gmt/tests/test_surface.py b/gmt/tests/test_surface.py index 597f07d5ac4..8ed26c1c1b4 100644 --- a/gmt/tests/test_surface.py +++ b/gmt/tests/test_surface.py @@ -83,3 +83,14 @@ def test_surface_g_outfile_param(): finally: os.remove(path=TEMP_GRID) return output + + +def test_surface_aliases(): + """ + Run surface using aliases spacing -I and region -R. + """ + ship_data = load_tut_ship() + data = ship_data.values # convert pandas.DataFrame to numpy.ndarray + output = surface(data=data, spacing="5m", region="245/255/20/30") + assert isinstance(output, xr.Dataset) + return output From cbf3979fa7c3f0fd90284ebe811e4b7a1a7c233a Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Fri, 30 Nov 2018 11:22:20 +1300 Subject: [PATCH 07/13] Change load_tut_ship to load_sample_bathymetry Renaming the function to load the @tut_ship.xyz tutorial dataset. --- gmt/datasets/__init__.py | 2 +- gmt/datasets/tutorial.py | 2 +- gmt/tests/test_datasets.py | 6 +++--- gmt/tests/test_surface.py | 14 +++++++------- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/gmt/datasets/__init__.py b/gmt/datasets/__init__.py index 29ca1b8b79b..c3cca555fd1 100644 --- a/gmt/datasets/__init__.py +++ b/gmt/datasets/__init__.py @@ -1,5 +1,5 @@ """ Load sample data included with GMT (downloaded from the GMT cache server). """ -from .tutorial import load_japan_quakes, load_tut_ship, load_usgs_quakes +from .tutorial import load_japan_quakes, load_sample_bathymetry, load_usgs_quakes from .earth_relief import load_earth_relief diff --git a/gmt/datasets/tutorial.py b/gmt/datasets/tutorial.py index dc07c735bb4..4801785abc5 100644 --- a/gmt/datasets/tutorial.py +++ b/gmt/datasets/tutorial.py @@ -38,7 +38,7 @@ def load_japan_quakes(): return data -def load_tut_ship(): +def load_sample_bathymetry(): """ Load a table of ship observations of bathymetry off Baja California as a pandas.DataFrame. diff --git a/gmt/tests/test_datasets.py b/gmt/tests/test_datasets.py index ea618a9c973..84d222d0a50 100644 --- a/gmt/tests/test_datasets.py +++ b/gmt/tests/test_datasets.py @@ -8,7 +8,7 @@ from ..datasets import ( load_japan_quakes, load_earth_relief, - load_tut_ship, + load_sample_bathymetry, load_usgs_quakes, ) from ..exceptions import GMTInvalidInput @@ -27,9 +27,9 @@ def test_japan_quakes(): assert summary.loc["max", "day"] == 31 -def test_tut_ship(): +def test_sample_bathymetry(): "Check that the @tut_ship.xyz dataset loads without errors" - data = load_tut_ship() + data = load_sample_bathymetry() assert data.shape == (82970, 3) summary = data.describe() assert summary.loc["min", "x"] == 245.0 diff --git a/gmt/tests/test_surface.py b/gmt/tests/test_surface.py index 8ed26c1c1b4..d5481d157cc 100644 --- a/gmt/tests/test_surface.py +++ b/gmt/tests/test_surface.py @@ -8,7 +8,7 @@ from .. import surface from .. import which -from ..datasets import load_tut_ship +from ..datasets import load_sample_bathymetry from ..exceptions import GMTInvalidInput from ..helpers import data_kind @@ -30,7 +30,7 @@ def test_surface_input_data_array(): """ Run surface by passing in a numpy array into data """ - ship_data = load_tut_ship() + ship_data = load_sample_bathymetry() data = ship_data.values # convert pandas.DataFrame to numpy.ndarray output = surface(data=data, I="5m", R="245/255/20/30") assert isinstance(output, xr.Dataset) @@ -41,7 +41,7 @@ def test_surface_input_xyz(): """ Run surface by passing in x, y, z numpy.ndarrays individually """ - ship_data = load_tut_ship() + ship_data = load_sample_bathymetry() output = surface( x=ship_data.x, y=ship_data.y, z=ship_data.z, I="5m", R="245/255/20/30" ) @@ -53,7 +53,7 @@ def test_surface_input_xy_no_z(): """ Run surface by passing in x and y, but no z """ - ship_data = load_tut_ship() + ship_data = load_sample_bathymetry() with pytest.raises(GMTInvalidInput): surface(x=ship_data.x, y=ship_data.y, I="5m", R="245/255/20/30") @@ -62,7 +62,7 @@ def test_surface_wrong_kind_of_input(): """ Run surface using grid input that is not file/matrix/vectors """ - ship_data = load_tut_ship() + ship_data = load_sample_bathymetry() data = ship_data.z.to_xarray() # convert pandas.Series to xarray.DataArray assert data_kind(data) == "grid" with pytest.raises(GMTInvalidInput): @@ -73,7 +73,7 @@ def test_surface_g_outfile_param(): """ Run surface with the -Goutputfile.nc parameter. """ - ship_data = load_tut_ship() + ship_data = load_sample_bathymetry() data = ship_data.values # convert pandas.DataFrame to numpy.ndarray try: output = surface(data=data, I="5m", R="245/255/20/30", G=TEMP_GRID) @@ -89,7 +89,7 @@ def test_surface_aliases(): """ Run surface using aliases spacing -I and region -R. """ - ship_data = load_tut_ship() + ship_data = load_sample_bathymetry() data = ship_data.values # convert pandas.DataFrame to numpy.ndarray output = surface(data=data, spacing="5m", region="245/255/20/30") assert isinstance(output, xr.Dataset) From 9882d04d6c22c9bd4c311418cc63ace37cb8f46d Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Fri, 30 Nov 2018 14:06:37 +1300 Subject: [PATCH 08/13] Use long explicit names for aliases and coordinate names Using long aliases 'spacing' and 'region' instead of -I and -R respectively. Change from x, y, z to longitude, latitude, bathymetry for load_sample_bathymetry tutorial dataset. Note that the sample dataset's longitude is in 0-360 format instead of -180 to 180. --- gmt/datasets/tutorial.py | 6 ++++-- gmt/tests/test_datasets.py | 12 ++++++------ gmt/tests/test_surface.py | 29 +++++++++++++++++++---------- 3 files changed, 29 insertions(+), 18 deletions(-) diff --git a/gmt/datasets/tutorial.py b/gmt/datasets/tutorial.py index 4801785abc5..06d3eae2d21 100644 --- a/gmt/datasets/tutorial.py +++ b/gmt/datasets/tutorial.py @@ -52,10 +52,12 @@ def load_sample_bathymetry(): Returns ------- data : pandas.Dataframe - The data table. Columns are x, y, and z. + The data table. Columns are longitude, latitude, and bathymetry. """ fname = which("@tut_ship.xyz", download="c") - data = pd.read_csv(fname, sep="\t", header=None, names=["x", "y", "z"]) + data = pd.read_csv( + fname, sep="\t", header=None, names=["longitude", "latitude", "bathymetry"] + ) return data diff --git a/gmt/tests/test_datasets.py b/gmt/tests/test_datasets.py index 84d222d0a50..b7f3c43f5cb 100644 --- a/gmt/tests/test_datasets.py +++ b/gmt/tests/test_datasets.py @@ -32,12 +32,12 @@ def test_sample_bathymetry(): data = load_sample_bathymetry() assert data.shape == (82970, 3) summary = data.describe() - assert summary.loc["min", "x"] == 245.0 - assert summary.loc["max", "x"] == 254.705 - assert summary.loc["min", "y"] == 20.0 - assert summary.loc["max", "y"] == 29.99131 - assert summary.loc["min", "z"] == -7708.0 - assert summary.loc["max", "z"] == -9.0 + assert summary.loc["min", "longitude"] == 245.0 + assert summary.loc["max", "longitude"] == 254.705 + assert summary.loc["min", "latitude"] == 20.0 + assert summary.loc["max", "latitude"] == 29.99131 + assert summary.loc["min", "bathymetry"] == -7708.0 + assert summary.loc["max", "bathymetry"] == -9.0 def test_usgs_quakes(): diff --git a/gmt/tests/test_surface.py b/gmt/tests/test_surface.py index d5481d157cc..21ce42c3833 100644 --- a/gmt/tests/test_surface.py +++ b/gmt/tests/test_surface.py @@ -21,7 +21,7 @@ def test_surface_input_file(): Run surface by passing in a filename """ fname = which("@tut_ship.xyz", download="c") - output = surface(data=fname, I="5m", R="245/255/20/30") + output = surface(data=fname, spacing="5m", region="245/255/20/30") assert isinstance(output, xr.Dataset) return output @@ -32,7 +32,7 @@ def test_surface_input_data_array(): """ ship_data = load_sample_bathymetry() data = ship_data.values # convert pandas.DataFrame to numpy.ndarray - output = surface(data=data, I="5m", R="245/255/20/30") + output = surface(data=data, spacing="5m", region="245/255/20/30") assert isinstance(output, xr.Dataset) return output @@ -43,7 +43,11 @@ def test_surface_input_xyz(): """ ship_data = load_sample_bathymetry() output = surface( - x=ship_data.x, y=ship_data.y, z=ship_data.z, I="5m", R="245/255/20/30" + x=ship_data.longitude, + y=ship_data.latitude, + z=ship_data.bathymetry, + spacing="5m", + region="245/255/20/30", ) assert isinstance(output, xr.Dataset) return output @@ -55,7 +59,12 @@ def test_surface_input_xy_no_z(): """ ship_data = load_sample_bathymetry() with pytest.raises(GMTInvalidInput): - surface(x=ship_data.x, y=ship_data.y, I="5m", R="245/255/20/30") + surface( + x=ship_data.longitude, + y=ship_data.latitude, + spacing="5m", + region="245/255/20/30", + ) def test_surface_wrong_kind_of_input(): @@ -63,10 +72,10 @@ def test_surface_wrong_kind_of_input(): Run surface using grid input that is not file/matrix/vectors """ ship_data = load_sample_bathymetry() - data = ship_data.z.to_xarray() # convert pandas.Series to xarray.DataArray + data = ship_data.bathymetry.to_xarray() # convert pandas.Series to xarray.DataArray assert data_kind(data) == "grid" with pytest.raises(GMTInvalidInput): - surface(data=data, I="5m", R="245/255/20/30") + surface(data=data, spacing="5m", region="245/255/20/30") def test_surface_g_outfile_param(): @@ -76,7 +85,7 @@ def test_surface_g_outfile_param(): ship_data = load_sample_bathymetry() data = ship_data.values # convert pandas.DataFrame to numpy.ndarray try: - output = surface(data=data, I="5m", R="245/255/20/30", G=TEMP_GRID) + output = surface(data=data, spacing="5m", region="245/255/20/30", G=TEMP_GRID) assert os.path.exists(TEMP_GRID) grid = xr.open_dataset(TEMP_GRID) assert output == grid # check that original output is same as that from file @@ -85,12 +94,12 @@ def test_surface_g_outfile_param(): return output -def test_surface_aliases(): +def test_surface_short_aliases(): """ - Run surface using aliases spacing -I and region -R. + Run surface using short aliases -I for spacing and -R for region. """ ship_data = load_sample_bathymetry() data = ship_data.values # convert pandas.DataFrame to numpy.ndarray - output = surface(data=data, spacing="5m", region="245/255/20/30") + output = surface(data=data, I="5m", R="245/255/20/30") assert isinstance(output, xr.Dataset) return output From 3324235b83a08bde932061c9d3f12bfd29b8a0d6 Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Fri, 30 Nov 2018 16:39:01 +1300 Subject: [PATCH 09/13] Force loading surface dataset into memory The xarray.Dataset may sometimes be lazily loaded, so we use `.load()` to force loading of the data. See e.g. https://github.com/pydata/xarray/blame/9f4474d657193f1c7c9aac25bb2edf94755a8593/doc/io.rst#L169-L170. This is important because we want to persist the data even after the tempfile is deleted. --- gmt/gridding.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gmt/gridding.py b/gmt/gridding.py index e70b2c70da3..5b6c3113067 100644 --- a/gmt/gridding.py +++ b/gmt/gridding.py @@ -73,5 +73,6 @@ def surface(x=None, y=None, z=None, data=None, **kwargs): outfile = kwargs["G"] arg_str = " ".join([infile, build_arg_string(kwargs)]) lib.call_module(module="surface", args=arg_str) - result = xr.open_dataset(outfile) + with xr.open_dataset(outfile) as dataset: + result = dataset.load() return result From 04a3ead9a2f413836f776ad3c0b81108b269b513 Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Fri, 30 Nov 2018 17:22:43 +1300 Subject: [PATCH 10/13] Try to please the bots by updating docs and import order Cosmetic changes mainly. Try to reduce cognitive complexity of the surface docstring. Also fix pylint C0411: third party import "import xarray as xr" should be placed before "import pytest" (wrong-import-order) in test_surface.py --- gmt/gridding.py | 11 +++++++++-- gmt/tests/test_surface.py | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/gmt/gridding.py b/gmt/gridding.py index 5b6c3113067..9f006c4dfec 100644 --- a/gmt/gridding.py +++ b/gmt/gridding.py @@ -19,9 +19,16 @@ @use_alias(I="spacing", R="region") def surface(x=None, y=None, z=None, data=None, **kwargs): """ - Grids table data using adjustable tension continuous curvature splines + Grids table data using adjustable tension continuous curvature splines. - Takes a matrix, (x,y,z) pairs, or a file name as input. + Surface reads randomly-spaced (x,y,z) triples and produces gridded values z(x,y) + by solving: + + (1 - T) * L (L (z)) + T * L (z) = 0 + + where T is a tension factor between 0 and 1, and L indicates the Laplacian operator. + + Takes a matrix, xyz triples, or a file name as input. Must provide either *data* or *x*, *y*, and *z*. diff --git a/gmt/tests/test_surface.py b/gmt/tests/test_surface.py index 21ce42c3833..09d429ad580 100644 --- a/gmt/tests/test_surface.py +++ b/gmt/tests/test_surface.py @@ -3,8 +3,8 @@ """ import os -import pytest import xarray as xr +import pytest from .. import surface from .. import which From 99b2691e7f23747448a76657d0f3b99f470e497f Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Fri, 30 Nov 2018 17:32:00 +1300 Subject: [PATCH 11/13] Add surface to docs under Data Processing - tabular data --- doc/api/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/api/index.rst b/doc/api/index.rst index 53ae930245f..65c00527744 100644 --- a/doc/api/index.rst +++ b/doc/api/index.rst @@ -48,6 +48,7 @@ Operations on tabular data: :toctree: generated info + surface Operations on grids: From fe26180d36c8b993f511039bef6d7ef8d54865f2 Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Sat, 29 Dec 2018 11:48:56 +1300 Subject: [PATCH 12/13] Add load_sample_bathymetry tutorial dataset to docs --- doc/api/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/api/index.rst b/doc/api/index.rst index 65c00527744..c231f1221c8 100644 --- a/doc/api/index.rst +++ b/doc/api/index.rst @@ -81,6 +81,7 @@ and store them in the GMT cache folder. datasets.load_earth_relief datasets.load_usgs_quakes + datasets.load_sample_bathymetry datasets.load_japan_quakes From a987bf5a85c7cc310a436d51b8046fcdedda28ac Mon Sep 17 00:00:00 2001 From: Wei Ji Date: Sat, 29 Dec 2018 12:16:21 +1300 Subject: [PATCH 13/13] Have surface return None if outfile is set, else return xr.DataArray Using 'outfile' as long alias for the 'G' parameter in gmt surface. Created some if-then logic to return no output (None) if this outfile parameter is set, otherwise return an xarray.Dataset if outfile parameter is unset (the default result from previous code). --- gmt/gridding.py | 20 ++++++++++++++------ gmt/tests/test_surface.py | 25 +++++++++++++++++-------- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/gmt/gridding.py b/gmt/gridding.py index 9f006c4dfec..41d85397a71 100644 --- a/gmt/gridding.py +++ b/gmt/gridding.py @@ -16,7 +16,7 @@ @fmt_docstring -@use_alias(I="spacing", R="region") +@use_alias(I="spacing", R="region", G="outfile") def surface(x=None, y=None, z=None, data=None, **kwargs): """ Grids table data using adjustable tension continuous curvature splines. @@ -57,8 +57,11 @@ def surface(x=None, y=None, z=None, data=None, **kwargs): Returns ------- - array: xarray.DataArray - The output grid as a DataArray + ret: xarray.DataArray or None + Return type depends on whether the outfile (G) parameter is set: + + - xarray.DataArray if outfile (G) is not set + - None if outfile (G) is set (grid output will be stored in outfile) """ kind = data_kind(data, x, y, z) if kind == "vectors" and z is None: @@ -75,11 +78,16 @@ def surface(x=None, y=None, z=None, data=None, **kwargs): else: raise GMTInvalidInput("Unrecognized data type: {}".format(type(data))) with file_context as infile: - if "G" not in kwargs.keys(): + if "G" not in kwargs.keys(): # if outfile is unset, output to tmpfile kwargs.update({"G": tmpfile.name}) outfile = kwargs["G"] arg_str = " ".join([infile, build_arg_string(kwargs)]) lib.call_module(module="surface", args=arg_str) - with xr.open_dataset(outfile) as dataset: - result = dataset.load() + + if outfile == tmpfile.name: # if user did not set outfile, return DataArray + with xr.open_dataset(outfile) as dataset: + result = dataset.load() + elif outfile != tmpfile.name: # if user sets an outfile, return None + result = None + return result diff --git a/gmt/tests/test_surface.py b/gmt/tests/test_surface.py index 09d429ad580..7817bf12a1e 100644 --- a/gmt/tests/test_surface.py +++ b/gmt/tests/test_surface.py @@ -78,17 +78,20 @@ def test_surface_wrong_kind_of_input(): surface(data=data, spacing="5m", region="245/255/20/30") -def test_surface_g_outfile_param(): +def test_surface_with_outfile_param(): """ - Run surface with the -Goutputfile.nc parameter. + Run surface with the -Goutputfile.nc parameter """ ship_data = load_sample_bathymetry() data = ship_data.values # convert pandas.DataFrame to numpy.ndarray try: - output = surface(data=data, spacing="5m", region="245/255/20/30", G=TEMP_GRID) - assert os.path.exists(TEMP_GRID) + output = surface( + data=data, spacing="5m", region="245/255/20/30", outfile=TEMP_GRID + ) + assert output is None # check that output is None since outfile is set + assert os.path.exists(path=TEMP_GRID) # check that outfile exists at path grid = xr.open_dataset(TEMP_GRID) - assert output == grid # check that original output is same as that from file + assert isinstance(grid, xr.Dataset) # check that netcdf grid loaded properly finally: os.remove(path=TEMP_GRID) return output @@ -96,10 +99,16 @@ def test_surface_g_outfile_param(): def test_surface_short_aliases(): """ - Run surface using short aliases -I for spacing and -R for region. + Run surface using short aliases -I for spacing, -R for region, -G for outfile """ ship_data = load_sample_bathymetry() data = ship_data.values # convert pandas.DataFrame to numpy.ndarray - output = surface(data=data, I="5m", R="245/255/20/30") - assert isinstance(output, xr.Dataset) + try: + output = surface(data=data, I="5m", R="245/255/20/30", G=TEMP_GRID) + assert output is None # check that output is None since outfile is set + assert os.path.exists(path=TEMP_GRID) # check that outfile exists at path + grid = xr.open_dataset(TEMP_GRID) + assert isinstance(grid, xr.Dataset) # check that netcdf grid loaded properly + finally: + os.remove(path=TEMP_GRID) return output