diff --git a/doc/api/index.rst b/doc/api/index.rst index 80cb112d015..ede258136e4 100644 --- a/doc/api/index.rst +++ b/doc/api/index.rst @@ -69,6 +69,7 @@ Operations on grids: :toctree: generated grdinfo + grdtrack Miscellaneous @@ -97,10 +98,10 @@ and store them in the GMT cache folder. :toctree: generated datasets.load_earth_relief - datasets.load_usgs_quakes - datasets.load_sample_bathymetry datasets.load_japan_quakes - + datasets.load_ocean_ridge_points + datasets.load_sample_bathymetry + datasets.load_usgs_quakes .. automodule:: pygmt.exceptions diff --git a/doc/conf.py b/doc/conf.py index d7a055a5e47..4c98cc35fda 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -62,6 +62,7 @@ [ "../examples/gallery/coast", "../examples/gallery/plot", + "../examples/gallery/grid", "../examples/projections/azim", "../examples/projections/conic", "../examples/projections/cyl", diff --git a/examples/gallery/grid/README.txt b/examples/gallery/grid/README.txt new file mode 100644 index 00000000000..6fd2c9aea1a --- /dev/null +++ b/examples/gallery/grid/README.txt @@ -0,0 +1,2 @@ +Grids +----- diff --git a/examples/gallery/grid/track_sampling.py b/examples/gallery/grid/track_sampling.py new file mode 100644 index 00000000000..66bd9cb2316 --- /dev/null +++ b/examples/gallery/grid/track_sampling.py @@ -0,0 +1,39 @@ +""" +Sampling along tracks +--------------------- + +The :func:`pygmt.grdtrack` function samples a raster grid's value along specified +points. We will need to input a 2D raster to ``grid`` which can be an +``xarray.DataArray``. The ``points`` parameter can be a ``pandas.DataFrame`` table where +the first two columns are x and y (or longitude and latitude). Note also that there is a +``newcolname`` parameter that will be used to name the new column of values we sampled +from the grid. + +Alternatively, we can provide a NetCDF file path to ``grid``. An ASCII file path can +also be accepted for ``points``, but an ``outfile`` parameter will then need to be set +to name the resulting output ASCII file. +""" + +import pygmt + +# Load sample grid and point datasets +grid = pygmt.datasets.load_earth_relief() +points = pygmt.datasets.load_ocean_ridge_points() +# Sample the bathymetry along the world's ocean ridges at specified track points +track = pygmt.grdtrack(points=points, grid=grid, newcolname="bathymetry") + +fig = pygmt.Figure() +# Plot the earth relief grid on Cylindrical Stereographic projection, masking land areas +fig.basemap(region="g", frame=True, projection="Cyl_stere/150/-20/8i") +fig.grdimage(grid=grid, cmap="gray") +fig.coast(land="#666666") +# Plot using circles (c) of 0.15cm, the sampled bathymetry points +# Points are colored using elevation values (normalized for visual purposes) +fig.plot( + x=track.longitude, + y=track.latitude, + style="c0.15c", + cmap="terra", + color=(track.bathymetry - track.bathymetry.mean()) / track.bathymetry.std(), +) +fig.show() diff --git a/pygmt/__init__.py b/pygmt/__init__.py index 2c035e913ef..fd6e8626d7c 100644 --- a/pygmt/__init__.py +++ b/pygmt/__init__.py @@ -15,6 +15,7 @@ from .session_management import begin as _begin, end as _end from .figure import Figure from .gridding import surface +from .sampling import grdtrack from .mathops import makecpt from .modules import info, grdinfo, which from . import datasets diff --git a/pygmt/datasets/__init__.py b/pygmt/datasets/__init__.py index 44d6229fcee..4454903ee11 100644 --- a/pygmt/datasets/__init__.py +++ b/pygmt/datasets/__init__.py @@ -2,5 +2,10 @@ # # Load sample data included with GMT (downloaded from the GMT cache server). -from .tutorial import load_japan_quakes, load_sample_bathymetry, load_usgs_quakes +from .tutorial import ( + load_japan_quakes, + load_ocean_ridge_points, + load_sample_bathymetry, + load_usgs_quakes, +) from .earth_relief import load_earth_relief diff --git a/pygmt/datasets/tutorial.py b/pygmt/datasets/tutorial.py index 77f7bbbd12d..b56e0ce0e1e 100644 --- a/pygmt/datasets/tutorial.py +++ b/pygmt/datasets/tutorial.py @@ -38,6 +38,29 @@ def load_japan_quakes(): return data +def load_ocean_ridge_points(): + """ + Load a table of ocean ridge points for the entire world as a + pandas.DataFrame. + + This is the ``@ridge.txt`` dataset used in the GMT tutorials. + + The data are downloaded to a cache directory (usually ``~/.gmt/cache``) the + first time you invoke this function. Afterwards, it will load the data from + the cache. So you'll need an internet connection the first time around. + + Returns + ------- + data : pandas.Dataframe + The data table. Columns are longitude and latitude. + """ + fname = which("@ridge.txt", download="c") + data = pd.read_csv( + fname, sep=r"\s+", names=["longitude", "latitude"], skiprows=1, comment=">" + ) + return data + + def load_sample_bathymetry(): """ Load a table of ship observations of bathymetry off Baja California as a diff --git a/pygmt/sampling.py b/pygmt/sampling.py new file mode 100644 index 00000000000..4ed2a6d171b --- /dev/null +++ b/pygmt/sampling.py @@ -0,0 +1,101 @@ +""" +GMT modules for Sampling of 1-D and 2-D Data +""" +import pandas as pd + +from .clib import Session +from .helpers import ( + build_arg_string, + fmt_docstring, + GMTTempFile, + data_kind, + dummy_context, +) +from .exceptions import GMTInvalidInput + + +@fmt_docstring +def grdtrack(points, grid, newcolname=None, outfile=None, **kwargs): + """ + Sample grids at specified (x,y) locations. + + Grdtrack reads one or more grid files and a table with (x,y) [or (lon,lat)] + positions in the first two columns (more columns may be present). It + interpolates the grid(s) at the positions in the table and writes out the + table with the interpolated values added as (one or more) new columns. A + bicubic [Default], bilinear, B-spline or nearest-neighbor (see -n) + interpolation is used, requiring boundary conditions at the limits of the + region. + + Full option list at :gmt-docs:`grdtrack.html` + + Parameters + ---------- + points: pandas.DataFrame or file (csv, txt, etc) + Either a table with (x, y) or (lon, lat) values in the first two + columns, or a data file name. More columns may be present. + + grid: xarray.DataArray or file (netcdf) + Gridded array from which to sample values from. + + newcolname: str + Required if 'points' is a pandas.DataFrame. The name for the new column + in the track pandas.DataFrame table where the sampled values will be + placed. + + outfile: str + Required if 'points' is a file. The file name for the output ASCII + file. + + Returns + ------- + track: pandas.DataFrame or None + Return type depends on whether the outfile parameter is set: + - pandas.DataFrame table with (x, y, ..., newcolname) if outfile is not + set + - None if outfile is set (track output will be stored in outfile) + + """ + + with GMTTempFile(suffix=".csv") as tmpfile: + with Session() as lib: + # Store the pandas.DataFrame points table in virtualfile + if data_kind(points) == "matrix": + if newcolname is None: + raise GMTInvalidInput("Please pass in a str to 'newcolname'") + table_context = lib.virtualfile_from_matrix(points.values) + elif data_kind(points) == "file": + if outfile is None: + raise GMTInvalidInput("Please pass in a str to 'outfile'") + table_context = dummy_context(points) + else: + raise GMTInvalidInput(f"Unrecognized data type {type(points)}") + + # Store the xarray.DataArray grid in virtualfile + if data_kind(grid) == "grid": + grid_context = lib.virtualfile_from_grid(grid) + elif data_kind(grid) == "file": + grid_context = dummy_context(grid) + else: + raise GMTInvalidInput(f"Unrecognized data type {type(grid)}") + + # Run grdtrack on the temporary (csv) points table + # and (netcdf) grid virtualfile + with table_context as csvfile: + with grid_context as grdfile: + kwargs.update({"G": grdfile}) + if outfile is None: # Output to tmpfile if outfile is not set + outfile = tmpfile.name + arg_str = " ".join( + [csvfile, build_arg_string(kwargs), "->" + outfile] + ) + lib.call_module(module="grdtrack", args=arg_str) + + # Read temporary csv output to a pandas table + if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame + column_names = points.columns.to_list() + [newcolname] + result = pd.read_csv(tmpfile.name, sep="\t", names=column_names) + elif outfile != tmpfile.name: # return None if outfile set, output in outfile + result = None + + return result diff --git a/pygmt/tests/test_datasets.py b/pygmt/tests/test_datasets.py index ad9fe68bf8e..eba0f563370 100644 --- a/pygmt/tests/test_datasets.py +++ b/pygmt/tests/test_datasets.py @@ -8,6 +8,7 @@ from ..datasets import ( load_japan_quakes, load_earth_relief, + load_ocean_ridge_points, load_sample_bathymetry, load_usgs_quakes, ) @@ -27,6 +28,17 @@ def test_japan_quakes(): assert summary.loc["max", "day"] == 31 +def test_ocean_ridge_points(): + "Check that the @ridge.txt dataset loads without errors" + data = load_ocean_ridge_points() + assert data.shape == (4146, 2) + summary = data.describe() + assert summary.loc["min", "longitude"] == -179.9401 + assert summary.loc["max", "longitude"] == 179.935 + assert summary.loc["min", "latitude"] == -65.6182 + assert summary.loc["max", "latitude"] == 86.8 + + def test_sample_bathymetry(): "Check that the @tut_ship.xyz dataset loads without errors" data = load_sample_bathymetry() diff --git a/pygmt/tests/test_grdtrack.py b/pygmt/tests/test_grdtrack.py new file mode 100644 index 00000000000..acc1c5c00c7 --- /dev/null +++ b/pygmt/tests/test_grdtrack.py @@ -0,0 +1,138 @@ +""" +Tests for grdtrack +""" +import os + +import numpy.testing as npt +import pandas as pd +import pytest + +from .. import grdtrack +from .. import which +from ..datasets import load_earth_relief, load_ocean_ridge_points +from ..exceptions import GMTInvalidInput +from ..helpers import data_kind + +TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "data") +TEMP_TRACK = os.path.join(TEST_DATA_DIR, "tmp_track.txt") + + +def test_grdtrack_input_dataframe_and_dataarray(): + """ + Run grdtrack by passing in a pandas.DataFrame and xarray.DataArray as + inputs + """ + dataframe = load_ocean_ridge_points() + dataarray = load_earth_relief().sel(lat=slice(-49, -42), lon=slice(-118, -107)) + + output = grdtrack(points=dataframe, grid=dataarray, newcolname="bathymetry") + assert isinstance(output, pd.DataFrame) + assert output.columns.to_list() == ["longitude", "latitude", "bathymetry"] + npt.assert_allclose(output.iloc[0], [-110.9536, -42.2489, -2797.482959]) + + return output + + +def test_grdtrack_input_csvfile_and_dataarray(): + """ + Run grdtrack by passing in a csvfile and xarray.DataArray as inputs + """ + csvfile = which("@ridge.txt", download="c") + dataarray = load_earth_relief().sel(lat=slice(-49, -42), lon=slice(-118, -107)) + + try: + output = grdtrack(points=csvfile, grid=dataarray, outfile=TEMP_TRACK) + assert output is None # check that output is None since outfile is set + assert os.path.exists(path=TEMP_TRACK) # check that outfile exists at path + + track = pd.read_csv(TEMP_TRACK, sep="\t", header=None, comment=">") + npt.assert_allclose(track.iloc[0], [-110.9536, -42.2489, -2797.482959]) + finally: + os.remove(path=TEMP_TRACK) + + return output + + +def test_grdtrack_input_dataframe_and_ncfile(): + """ + Run grdtrack by passing in a pandas.DataFrame and netcdf file as inputs + """ + dataframe = load_ocean_ridge_points() + ncfile = which("@earth_relief_60m", download="c") + + output = grdtrack(points=dataframe, grid=ncfile, newcolname="bathymetry") + assert isinstance(output, pd.DataFrame) + assert output.columns.to_list() == ["longitude", "latitude", "bathymetry"] + npt.assert_allclose(output.iloc[0], [-32.2971, 37.4118, -1686.692142]) + + return output + + +def test_grdtrack_input_csvfile_and_ncfile(): + """ + Run grdtrack by passing in a csvfile and netcdf file as inputs + """ + csvfile = which("@ridge.txt", download="c") + ncfile = which("@earth_relief_60m", download="c") + + try: + output = grdtrack(points=csvfile, grid=ncfile, outfile=TEMP_TRACK) + assert output is None # check that output is None since outfile is set + assert os.path.exists(path=TEMP_TRACK) # check that outfile exists at path + + track = pd.read_csv(TEMP_TRACK, sep="\t", header=None, comment=">") + npt.assert_allclose(track.iloc[0], [-32.2971, 37.4118, -1686.692142]) + finally: + os.remove(path=TEMP_TRACK) + + return output + + +def test_grdtrack_wrong_kind_of_points_input(): + """ + Run grdtrack using points input that is not a pandas.DataFrame (matrix) or + file + """ + dataframe = load_ocean_ridge_points() + invalid_points = dataframe.longitude.to_xarray() + dataarray = load_earth_relief().sel(lat=slice(-49, -42), lon=slice(-118, -107)) + + assert data_kind(invalid_points) == "grid" + with pytest.raises(GMTInvalidInput): + grdtrack(points=invalid_points, grid=dataarray, newcolname="bathymetry") + + +def test_grdtrack_wrong_kind_of_grid_input(): + """ + Run grdtrack using grid input that is not as xarray.DataArray (grid) or + file + """ + dataframe = load_ocean_ridge_points() + dataarray = load_earth_relief().sel(lat=slice(-49, -42), lon=slice(-118, -107)) + invalid_grid = dataarray.to_dataset() + + assert data_kind(invalid_grid) == "matrix" + with pytest.raises(GMTInvalidInput): + grdtrack(points=dataframe, grid=invalid_grid, newcolname="bathymetry") + + +def test_grdtrack_without_newcolname_setting(): + """ + Run grdtrack by not passing in newcolname parameter setting + """ + dataframe = load_ocean_ridge_points() + dataarray = load_earth_relief().sel(lat=slice(-49, -42), lon=slice(-118, -107)) + + with pytest.raises(GMTInvalidInput): + grdtrack(points=dataframe, grid=dataarray) + + +def test_grdtrack_without_outfile_setting(): + """ + Run grdtrack by not passing in outfile parameter setting + """ + csvfile = which("@ridge.txt", download="c") + dataarray = load_earth_relief().sel(lat=slice(-49, -42), lon=slice(-118, -107)) + + with pytest.raises(GMTInvalidInput): + grdtrack(points=csvfile, grid=dataarray)