Wrap grdtrack (#308)

weiji14 · web-flow · commit 16f5a1042792 · 2020-03-26T15:39:52.000+13:00
Implement GMT grdtrack function under sampling.py. Original GMT `grdtrack` documentation can be found at https://docs.generic-mapping-tools.org/latest/grdtrack.html. Implementation here takes in a pandas.DataFrame table and xarray.DataArray grid as input, and returns a pandas.DataFrame with an extra column for the sampled grid data. Also possible to use text file or netcdf inputs. Test cases checking proper pandas.DataFrame and xarray.DataArray inputs stored in test_grdtrack.py. Sample datasets for tests uses newly created load_ocean_ridge_points functions in datasets/tutorial.py. Added gallery example 'sampling along tracks'.
diff --git a/doc/api/index.rst b/doc/api/index.rst
@@ -69,6 +69,7 @@ Operations on grids:
     :toctree: generated
 
     grdinfo
+    grdtrack
 
 
 Miscellaneous
@@ -97,10 +98,10 @@ and store them in the GMT cache folder.
     :toctree: generated
 
     datasets.load_earth_relief
-    datasets.load_usgs_quakes
-    datasets.load_sample_bathymetry
     datasets.load_japan_quakes
-
+    datasets.load_ocean_ridge_points
+    datasets.load_sample_bathymetry
+    datasets.load_usgs_quakes
 
 .. automodule:: pygmt.exceptions
 
diff --git a/doc/conf.py b/doc/conf.py
@@ -62,6 +62,7 @@
         [
             "../examples/gallery/coast",
             "../examples/gallery/plot",
+            "../examples/gallery/grid",
             "../examples/projections/azim",
             "../examples/projections/conic",
             "../examples/projections/cyl",
diff --git a/examples/gallery/grid/README.txt b/examples/gallery/grid/README.txt
@@ -0,0 +1,2 @@
+Grids
+-----
diff --git a/examples/gallery/grid/track_sampling.py b/examples/gallery/grid/track_sampling.py
@@ -0,0 +1,39 @@
+"""
+Sampling along tracks
+---------------------
+
+The :func:`pygmt.grdtrack` function samples a raster grid's value along specified
+points. We will need to input a 2D raster to ``grid`` which can be an
+``xarray.DataArray``. The ``points`` parameter can be a ``pandas.DataFrame`` table where
+the first two columns are x and y (or longitude and latitude). Note also that there is a
+``newcolname`` parameter that will be used to name the new column of values we sampled
+from the grid.
+
+Alternatively, we can provide a NetCDF file path to ``grid``. An ASCII file path can
+also be accepted for ``points``, but an ``outfile`` parameter will then need to be set
+to name the resulting output ASCII file.
+"""
+
+import pygmt
+
+# Load sample grid and point datasets
+grid = pygmt.datasets.load_earth_relief()
+points = pygmt.datasets.load_ocean_ridge_points()
+# Sample the bathymetry along the world's ocean ridges at specified track points
+track = pygmt.grdtrack(points=points, grid=grid, newcolname="bathymetry")
+
+fig = pygmt.Figure()
+# Plot the earth relief grid on Cylindrical Stereographic projection, masking land areas
+fig.basemap(region="g", frame=True, projection="Cyl_stere/150/-20/8i")
+fig.grdimage(grid=grid, cmap="gray")
+fig.coast(land="#666666")
+# Plot using circles (c) of 0.15cm, the sampled bathymetry points
+# Points are colored using elevation values (normalized for visual purposes)
+fig.plot(
+    x=track.longitude,
+    y=track.latitude,
+    style="c0.15c",
+    cmap="terra",
+    color=(track.bathymetry - track.bathymetry.mean()) / track.bathymetry.std(),
+)
+fig.show()
diff --git a/pygmt/__init__.py b/pygmt/__init__.py
@@ -15,6 +15,7 @@
 from .session_management import begin as _begin, end as _end
 from .figure import Figure
 from .gridding import surface
+from .sampling import grdtrack
 from .mathops import makecpt
 from .modules import info, grdinfo, which
 from . import datasets
diff --git a/pygmt/datasets/__init__.py b/pygmt/datasets/__init__.py
@@ -2,5 +2,10 @@
 #
 # Load sample data included with GMT (downloaded from the GMT cache server).
 
-from .tutorial import load_japan_quakes, load_sample_bathymetry, load_usgs_quakes
+from .tutorial import (
+    load_japan_quakes,
+    load_ocean_ridge_points,
+    load_sample_bathymetry,
+    load_usgs_quakes,
+)
 from .earth_relief import load_earth_relief
diff --git a/pygmt/datasets/tutorial.py b/pygmt/datasets/tutorial.py
@@ -38,6 +38,29 @@ def load_japan_quakes():
     return data
 
 
+def load_ocean_ridge_points():
+    """
+    Load a table of ocean ridge points for the entire world as a
+    pandas.DataFrame.
+
+    This is the ``@ridge.txt`` dataset used in the GMT tutorials.
+
+    The data are downloaded to a cache directory (usually ``~/.gmt/cache``) the
+    first time you invoke this function. Afterwards, it will load the data from
+    the cache. So you'll need an internet connection the first time around.
+
+    Returns
+    -------
+    data :  pandas.Dataframe
+        The data table. Columns are longitude and latitude.
+    """
+    fname = which("@ridge.txt", download="c")
+    data = pd.read_csv(
+        fname, sep=r"\s+", names=["longitude", "latitude"], skiprows=1, comment=">"
+    )
+    return data
+
+
 def load_sample_bathymetry():
     """
     Load a table of ship observations of bathymetry off Baja California as a
diff --git a/pygmt/sampling.py b/pygmt/sampling.py
@@ -0,0 +1,101 @@
+"""
+GMT modules for Sampling of 1-D and 2-D Data
+"""
+import pandas as pd
+
+from .clib import Session
+from .helpers import (
+    build_arg_string,
+    fmt_docstring,
+    GMTTempFile,
+    data_kind,
+    dummy_context,
+)
+from .exceptions import GMTInvalidInput
+
+
+@fmt_docstring
+def grdtrack(points, grid, newcolname=None, outfile=None, **kwargs):
+    """
+    Sample grids at specified (x,y) locations.
+
+    Grdtrack reads one or more grid files and a table with (x,y) [or (lon,lat)]
+    positions in the first two columns (more columns may be present). It
+    interpolates the grid(s) at the positions in the table and writes out the
+    table with the interpolated values added as (one or more) new columns. A
+    bicubic [Default], bilinear, B-spline or nearest-neighbor (see -n)
+    interpolation is used, requiring boundary conditions at the limits of the
+    region.
+
+    Full option list at :gmt-docs:`grdtrack.html`
+
+    Parameters
+    ----------
+    points: pandas.DataFrame or file (csv, txt, etc)
+        Either a table with (x, y) or (lon, lat) values in the first two
+        columns, or a data file name. More columns may be present.
+
+    grid: xarray.DataArray or file (netcdf)
+        Gridded array from which to sample values from.
+
+    newcolname: str
+        Required if 'points' is a pandas.DataFrame. The name for the new column
+        in the track pandas.DataFrame table where the sampled values will be
+        placed.
+
+    outfile: str
+        Required if 'points' is a file. The file name for the output ASCII
+        file.
+
+    Returns
+    -------
+    track: pandas.DataFrame or None
+        Return type depends on whether the outfile parameter is set:
+        - pandas.DataFrame table with (x, y, ..., newcolname) if outfile is not
+          set
+        - None if outfile is set (track output will be stored in outfile)
+
+    """
+
+    with GMTTempFile(suffix=".csv") as tmpfile:
+        with Session() as lib:
+            # Store the pandas.DataFrame points table in virtualfile
+            if data_kind(points) == "matrix":
+                if newcolname is None:
+                    raise GMTInvalidInput("Please pass in a str to 'newcolname'")
+                table_context = lib.virtualfile_from_matrix(points.values)
+            elif data_kind(points) == "file":
+                if outfile is None:
+                    raise GMTInvalidInput("Please pass in a str to 'outfile'")
+                table_context = dummy_context(points)
+            else:
+                raise GMTInvalidInput(f"Unrecognized data type {type(points)}")
+
+            # Store the xarray.DataArray grid in virtualfile
+            if data_kind(grid) == "grid":
+                grid_context = lib.virtualfile_from_grid(grid)
+            elif data_kind(grid) == "file":
+                grid_context = dummy_context(grid)
+            else:
+                raise GMTInvalidInput(f"Unrecognized data type {type(grid)}")
+
+            # Run grdtrack on the temporary (csv) points table
+            # and (netcdf) grid virtualfile
+            with table_context as csvfile:
+                with grid_context as grdfile:
+                    kwargs.update({"G": grdfile})
+                    if outfile is None:  # Output to tmpfile if outfile is not set
+                        outfile = tmpfile.name
+                    arg_str = " ".join(
+                        [csvfile, build_arg_string(kwargs), "->" + outfile]
+                    )
+                    lib.call_module(module="grdtrack", args=arg_str)
+
+        # Read temporary csv output to a pandas table
+        if outfile == tmpfile.name:  # if user did not set outfile, return pd.DataFrame
+            column_names = points.columns.to_list() + [newcolname]
+            result = pd.read_csv(tmpfile.name, sep="\t", names=column_names)
+        elif outfile != tmpfile.name:  # return None if outfile set, output in outfile
+            result = None
+
+    return result
diff --git a/pygmt/tests/test_datasets.py b/pygmt/tests/test_datasets.py
@@ -8,6 +8,7 @@
 from ..datasets import (
     load_japan_quakes,
     load_earth_relief,
+    load_ocean_ridge_points,
     load_sample_bathymetry,
     load_usgs_quakes,
 )
@@ -27,6 +28,17 @@ def test_japan_quakes():
     assert summary.loc["max", "day"] == 31
 
 
+def test_ocean_ridge_points():
+    "Check that the @ridge.txt dataset loads without errors"
+    data = load_ocean_ridge_points()
+    assert data.shape == (4146, 2)
+    summary = data.describe()
+    assert summary.loc["min", "longitude"] == -179.9401
+    assert summary.loc["max", "longitude"] == 179.935
+    assert summary.loc["min", "latitude"] == -65.6182
+    assert summary.loc["max", "latitude"] == 86.8
+
+
 def test_sample_bathymetry():
     "Check that the @tut_ship.xyz dataset loads without errors"
     data = load_sample_bathymetry()
diff --git a/pygmt/tests/test_grdtrack.py b/pygmt/tests/test_grdtrack.py
@@ -0,0 +1,138 @@
+"""
+Tests for grdtrack
+"""
+import os
+
+import numpy.testing as npt
+import pandas as pd
+import pytest
+
+from .. import grdtrack
+from .. import which
+from ..datasets import load_earth_relief, load_ocean_ridge_points
+from ..exceptions import GMTInvalidInput
+from ..helpers import data_kind
+
+TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
+TEMP_TRACK = os.path.join(TEST_DATA_DIR, "tmp_track.txt")
+
+
+def test_grdtrack_input_dataframe_and_dataarray():
+    """
+    Run grdtrack by passing in a pandas.DataFrame and xarray.DataArray as
+    inputs
+    """
+    dataframe = load_ocean_ridge_points()
+    dataarray = load_earth_relief().sel(lat=slice(-49, -42), lon=slice(-118, -107))
+
+    output = grdtrack(points=dataframe, grid=dataarray, newcolname="bathymetry")
+    assert isinstance(output, pd.DataFrame)
+    assert output.columns.to_list() == ["longitude", "latitude", "bathymetry"]
+    npt.assert_allclose(output.iloc[0], [-110.9536, -42.2489, -2797.482959])
+
+    return output
+
+
+def test_grdtrack_input_csvfile_and_dataarray():
+    """
+    Run grdtrack by passing in a csvfile and xarray.DataArray as inputs
+    """
+    csvfile = which("@ridge.txt", download="c")
+    dataarray = load_earth_relief().sel(lat=slice(-49, -42), lon=slice(-118, -107))
+
+    try:
+        output = grdtrack(points=csvfile, grid=dataarray, outfile=TEMP_TRACK)
+        assert output is None  # check that output is None since outfile is set
+        assert os.path.exists(path=TEMP_TRACK)  # check that outfile exists at path
+
+        track = pd.read_csv(TEMP_TRACK, sep="\t", header=None, comment=">")
+        npt.assert_allclose(track.iloc[0], [-110.9536, -42.2489, -2797.482959])
+    finally:
+        os.remove(path=TEMP_TRACK)
+
+    return output
+
+
+def test_grdtrack_input_dataframe_and_ncfile():
+    """
+    Run grdtrack by passing in a pandas.DataFrame and netcdf file as inputs
+    """
+    dataframe = load_ocean_ridge_points()
+    ncfile = which("@earth_relief_60m", download="c")
+
+    output = grdtrack(points=dataframe, grid=ncfile, newcolname="bathymetry")
+    assert isinstance(output, pd.DataFrame)
+    assert output.columns.to_list() == ["longitude", "latitude", "bathymetry"]
+    npt.assert_allclose(output.iloc[0], [-32.2971, 37.4118, -1686.692142])
+
+    return output
+
+
+def test_grdtrack_input_csvfile_and_ncfile():
+    """
+    Run grdtrack by passing in a csvfile and netcdf file as inputs
+    """
+    csvfile = which("@ridge.txt", download="c")
+    ncfile = which("@earth_relief_60m", download="c")
+
+    try:
+        output = grdtrack(points=csvfile, grid=ncfile, outfile=TEMP_TRACK)
+        assert output is None  # check that output is None since outfile is set
+        assert os.path.exists(path=TEMP_TRACK)  # check that outfile exists at path
+
+        track = pd.read_csv(TEMP_TRACK, sep="\t", header=None, comment=">")
+        npt.assert_allclose(track.iloc[0], [-32.2971, 37.4118, -1686.692142])
+    finally:
+        os.remove(path=TEMP_TRACK)
+
+    return output
+
+
+def test_grdtrack_wrong_kind_of_points_input():
+    """
+    Run grdtrack using points input that is not a pandas.DataFrame (matrix) or
+    file
+    """
+    dataframe = load_ocean_ridge_points()
+    invalid_points = dataframe.longitude.to_xarray()
+    dataarray = load_earth_relief().sel(lat=slice(-49, -42), lon=slice(-118, -107))
+
+    assert data_kind(invalid_points) == "grid"
+    with pytest.raises(GMTInvalidInput):
+        grdtrack(points=invalid_points, grid=dataarray, newcolname="bathymetry")
+
+
+def test_grdtrack_wrong_kind_of_grid_input():
+    """
+    Run grdtrack using grid input that is not as xarray.DataArray (grid) or
+    file
+    """
+    dataframe = load_ocean_ridge_points()
+    dataarray = load_earth_relief().sel(lat=slice(-49, -42), lon=slice(-118, -107))
+    invalid_grid = dataarray.to_dataset()
+
+    assert data_kind(invalid_grid) == "matrix"
+    with pytest.raises(GMTInvalidInput):
+        grdtrack(points=dataframe, grid=invalid_grid, newcolname="bathymetry")
+
+
+def test_grdtrack_without_newcolname_setting():
+    """
+    Run grdtrack by not passing in newcolname parameter setting
+    """
+    dataframe = load_ocean_ridge_points()
+    dataarray = load_earth_relief().sel(lat=slice(-49, -42), lon=slice(-118, -107))
+
+    with pytest.raises(GMTInvalidInput):
+        grdtrack(points=dataframe, grid=dataarray)
+
+
+def test_grdtrack_without_outfile_setting():
+    """
+    Run grdtrack by not passing in outfile parameter setting
+    """
+    csvfile = which("@ridge.txt", download="c")
+    dataarray = load_earth_relief().sel(lat=slice(-49, -42), lon=slice(-118, -107))
+
+    with pytest.raises(GMTInvalidInput):
+        grdtrack(points=csvfile, grid=dataarray)

Original file line number	Diff line number	Diff line change
`@@ -62,6 +62,7 @@`
`62`	`62`	`[`
`63`	`63`	`"../examples/gallery/coast",`
`64`	`64`	`"../examples/gallery/plot",`
	`65`	`+ "../examples/gallery/grid",`
`65`	`66`	`"../examples/projections/azim",`
`66`	`67`	`"../examples/projections/conic",`
`67`	`68`	`"../examples/projections/cyl",`