diff --git a/doc/api/index.rst b/doc/api/index.rst index 53ae930245f..c231f1221c8 100644 --- a/doc/api/index.rst +++ b/doc/api/index.rst @@ -48,6 +48,7 @@ Operations on tabular data: :toctree: generated info + surface Operations on grids: @@ -80,6 +81,7 @@ and store them in the GMT cache folder. datasets.load_earth_relief datasets.load_usgs_quakes + datasets.load_sample_bathymetry datasets.load_japan_quakes diff --git a/gmt/__init__.py b/gmt/__init__.py index 161c18f3a43..b213a547237 100644 --- a/gmt/__init__.py +++ b/gmt/__init__.py @@ -13,6 +13,7 @@ # Import modules to make the high-level GMT Python API from .session_management import begin as _begin, end as _end from .figure import Figure +from .gridding import surface from .modules import info, grdinfo, which from . import datasets diff --git a/gmt/datasets/__init__.py b/gmt/datasets/__init__.py index ffc945f34d1..c3cca555fd1 100644 --- a/gmt/datasets/__init__.py +++ b/gmt/datasets/__init__.py @@ -1,5 +1,5 @@ """ Load sample data included with GMT (downloaded from the GMT cache server). """ -from .tutorial import load_japan_quakes, load_usgs_quakes +from .tutorial import load_japan_quakes, load_sample_bathymetry, load_usgs_quakes from .earth_relief import load_earth_relief diff --git a/gmt/datasets/tutorial.py b/gmt/datasets/tutorial.py index 839fd42568e..06d3eae2d21 100644 --- a/gmt/datasets/tutorial.py +++ b/gmt/datasets/tutorial.py @@ -38,6 +38,29 @@ def load_japan_quakes(): return data +def load_sample_bathymetry(): + """ + Load a table of ship observations of bathymetry off Baja California as a + pandas.DataFrame. + + This is the ``@tut_ship.xyz`` dataset used in the GMT tutorials. + + The data are downloaded to a cache directory (usually ``~/.gmt/cache``) the + first time you invoke this function. Afterwards, it will load the data from + the cache. So you'll need an internet connection the first time around. + + Returns + ------- + data : pandas.Dataframe + The data table. Columns are longitude, latitude, and bathymetry. + """ + fname = which("@tut_ship.xyz", download="c") + data = pd.read_csv( + fname, sep="\t", header=None, names=["longitude", "latitude", "bathymetry"] + ) + return data + + def load_usgs_quakes(): """ Load a table of global earthquakes form the USGS as a pandas.Dataframe. diff --git a/gmt/gridding.py b/gmt/gridding.py new file mode 100644 index 00000000000..41d85397a71 --- /dev/null +++ b/gmt/gridding.py @@ -0,0 +1,93 @@ +""" +GMT modules for Gridding of Data Tables +""" +import xarray as xr + +from .clib import Session +from .helpers import ( + build_arg_string, + fmt_docstring, + GMTTempFile, + use_alias, + data_kind, + dummy_context, +) +from .exceptions import GMTInvalidInput + + +@fmt_docstring +@use_alias(I="spacing", R="region", G="outfile") +def surface(x=None, y=None, z=None, data=None, **kwargs): + """ + Grids table data using adjustable tension continuous curvature splines. + + Surface reads randomly-spaced (x,y,z) triples and produces gridded values z(x,y) + by solving: + + (1 - T) * L (L (z)) + T * L (z) = 0 + + where T is a tension factor between 0 and 1, and L indicates the Laplacian operator. + + Takes a matrix, xyz triples, or a file name as input. + + Must provide either *data* or *x*, *y*, and *z*. + + {gmt_module_docs} + + Parameters + ---------- + x, y, z : 1d arrays + Arrays of x and y coordinates and values z of the data points. + data : str or 2d array + Either a data file name or a 2d numpy array with the tabular data. + + spacing (I) : + ``'xinc[unit][+e|n][/yinc[unit][+e|n]]'``. + x_inc [and optionally y_inc] is the grid spacing. + + region (R) : str or list + ``'xmin/xmax/ymin/ymax[+r][+uunit]'``. + Specify the region of interest. + + outfile (G) : str + Optional. The file name for the output netcdf file with extension .nc + to store the grid in. + + {aliases} + + Returns + ------- + ret: xarray.DataArray or None + Return type depends on whether the outfile (G) parameter is set: + + - xarray.DataArray if outfile (G) is not set + - None if outfile (G) is set (grid output will be stored in outfile) + """ + kind = data_kind(data, x, y, z) + if kind == "vectors" and z is None: + raise GMTInvalidInput("Must provide z with x and y.") + + with GMTTempFile(suffix=".nc") as tmpfile: + with Session() as lib: + if kind == "file": + file_context = dummy_context(data) + elif kind == "matrix": + file_context = lib.virtualfile_from_matrix(data) + elif kind == "vectors": + file_context = lib.virtualfile_from_vectors(x, y, z) + else: + raise GMTInvalidInput("Unrecognized data type: {}".format(type(data))) + with file_context as infile: + if "G" not in kwargs.keys(): # if outfile is unset, output to tmpfile + kwargs.update({"G": tmpfile.name}) + outfile = kwargs["G"] + arg_str = " ".join([infile, build_arg_string(kwargs)]) + lib.call_module(module="surface", args=arg_str) + + if outfile == tmpfile.name: # if user did not set outfile, return DataArray + with xr.open_dataset(outfile) as dataset: + result = dataset.load() + elif outfile != tmpfile.name: # if user sets an outfile, return None + result = None + + return result diff --git a/gmt/tests/test_datasets.py b/gmt/tests/test_datasets.py index 2468d7a8950..23594de5031 100644 --- a/gmt/tests/test_datasets.py +++ b/gmt/tests/test_datasets.py @@ -5,7 +5,12 @@ import numpy.testing as npt import pytest -from ..datasets import load_japan_quakes, load_earth_relief, load_usgs_quakes +from ..datasets import ( + load_japan_quakes, + load_earth_relief, + load_sample_bathymetry, + load_usgs_quakes, +) from ..exceptions import GMTInvalidInput @@ -22,6 +27,19 @@ def test_japan_quakes(): assert summary.loc["max", "day"] == 31 +def test_sample_bathymetry(): + "Check that the @tut_ship.xyz dataset loads without errors" + data = load_sample_bathymetry() + assert data.shape == (82970, 3) + summary = data.describe() + assert summary.loc["min", "longitude"] == 245.0 + assert summary.loc["max", "longitude"] == 254.705 + assert summary.loc["min", "latitude"] == 20.0 + assert summary.loc["max", "latitude"] == 29.99131 + assert summary.loc["min", "bathymetry"] == -7708.0 + assert summary.loc["max", "bathymetry"] == -9.0 + + def test_usgs_quakes(): "Check that the dataset loads without errors" data = load_usgs_quakes() diff --git a/gmt/tests/test_surface.py b/gmt/tests/test_surface.py new file mode 100644 index 00000000000..7817bf12a1e --- /dev/null +++ b/gmt/tests/test_surface.py @@ -0,0 +1,114 @@ +""" +Tests for surface +""" +import os + +import xarray as xr +import pytest + +from .. import surface +from .. import which +from ..datasets import load_sample_bathymetry +from ..exceptions import GMTInvalidInput +from ..helpers import data_kind + +TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "data") +TEMP_GRID = os.path.join(TEST_DATA_DIR, "tmp_grid.nc") + + +def test_surface_input_file(): + """ + Run surface by passing in a filename + """ + fname = which("@tut_ship.xyz", download="c") + output = surface(data=fname, spacing="5m", region="245/255/20/30") + assert isinstance(output, xr.Dataset) + return output + + +def test_surface_input_data_array(): + """ + Run surface by passing in a numpy array into data + """ + ship_data = load_sample_bathymetry() + data = ship_data.values # convert pandas.DataFrame to numpy.ndarray + output = surface(data=data, spacing="5m", region="245/255/20/30") + assert isinstance(output, xr.Dataset) + return output + + +def test_surface_input_xyz(): + """ + Run surface by passing in x, y, z numpy.ndarrays individually + """ + ship_data = load_sample_bathymetry() + output = surface( + x=ship_data.longitude, + y=ship_data.latitude, + z=ship_data.bathymetry, + spacing="5m", + region="245/255/20/30", + ) + assert isinstance(output, xr.Dataset) + return output + + +def test_surface_input_xy_no_z(): + """ + Run surface by passing in x and y, but no z + """ + ship_data = load_sample_bathymetry() + with pytest.raises(GMTInvalidInput): + surface( + x=ship_data.longitude, + y=ship_data.latitude, + spacing="5m", + region="245/255/20/30", + ) + + +def test_surface_wrong_kind_of_input(): + """ + Run surface using grid input that is not file/matrix/vectors + """ + ship_data = load_sample_bathymetry() + data = ship_data.bathymetry.to_xarray() # convert pandas.Series to xarray.DataArray + assert data_kind(data) == "grid" + with pytest.raises(GMTInvalidInput): + surface(data=data, spacing="5m", region="245/255/20/30") + + +def test_surface_with_outfile_param(): + """ + Run surface with the -Goutputfile.nc parameter + """ + ship_data = load_sample_bathymetry() + data = ship_data.values # convert pandas.DataFrame to numpy.ndarray + try: + output = surface( + data=data, spacing="5m", region="245/255/20/30", outfile=TEMP_GRID + ) + assert output is None # check that output is None since outfile is set + assert os.path.exists(path=TEMP_GRID) # check that outfile exists at path + grid = xr.open_dataset(TEMP_GRID) + assert isinstance(grid, xr.Dataset) # check that netcdf grid loaded properly + finally: + os.remove(path=TEMP_GRID) + return output + + +def test_surface_short_aliases(): + """ + Run surface using short aliases -I for spacing, -R for region, -G for outfile + """ + ship_data = load_sample_bathymetry() + data = ship_data.values # convert pandas.DataFrame to numpy.ndarray + try: + output = surface(data=data, I="5m", R="245/255/20/30", G=TEMP_GRID) + assert output is None # check that output is None since outfile is set + assert os.path.exists(path=TEMP_GRID) # check that outfile exists at path + grid = xr.open_dataset(TEMP_GRID) + assert isinstance(grid, xr.Dataset) # check that netcdf grid loaded properly + finally: + os.remove(path=TEMP_GRID) + return output