Wrap grdtrack

weiji14 · weiji14 · commit a6cef02b485b · 2019-05-22T14:28:14.000+02:00
Initial commit for #307. Implement GMT grdtrack function under sampling.py. Test cases checking proper pandas.DataFrame and xarray.DataArray inputs stored in test_grdtrack.py. Sample datasets for tests uses newly created load_east_pacific_rise_grid and load_ocean_ridge_points functions in datasets/tutorial.py. GMT grdtrack documentation can be found at https://gmt.soest.hawaii.edu/doc/latest/grdtrack. Originally, grdtrack should take in an xyfile and -Ggridfile as parameters, and pass the output table to stdout. Here, the implementation (currently) takes in a pandas.DataFrame table and xarray.DataArray grid instead as input, and returns a pandas.DataFrame with an extra column for the sampled grid data.
diff --git a/pygmt/__init__.py b/pygmt/__init__.py
@@ -15,6 +15,7 @@
 from .session_management import begin as _begin, end as _end
 from .figure import Figure
 from .gridding import surface
+from .sampling import grdtrack
 from .modules import info, grdinfo, which
 from . import datasets
 
diff --git a/pygmt/datasets/__init__.py b/pygmt/datasets/__init__.py
@@ -2,5 +2,11 @@
 #
 # Load sample data included with GMT (downloaded from the GMT cache server).
 
-from .tutorial import load_japan_quakes, load_sample_bathymetry, load_usgs_quakes
+from .tutorial import (
+    load_east_pacific_rise_grid,
+    load_japan_quakes,
+    load_ocean_ridge_points,
+    load_sample_bathymetry,
+    load_usgs_quakes,
+)
 from .earth_relief import load_earth_relief
diff --git a/pygmt/datasets/tutorial.py b/pygmt/datasets/tutorial.py
@@ -2,10 +2,33 @@
 Functions to load sample data from the GMT tutorials.
 """
 import pandas as pd
+import xarray as xr
 
 from .. import which
 
 
+def load_east_pacific_rise_grid():
+    """
+    Load a grid of bathymetry over part of the East Pacific Rise as a xarray.DataArray.
+
+    This is the ``@spac_33.nc`` dataset used in the GMT tutorials.
+
+    The data are downloaded to a cache directory (usually ``~/.gmt/cache``) the
+    first time you invoke this function. Afterwards, it will load the data from
+    the cache. So you'll need an internet connection the first time around.
+
+    Returns
+    -------
+    data : xarray.DataArray
+        The data grid. Coordinates in longitude (lon) and latitude (lat).
+        Data attributes: bathymetry (z) in metres.
+    """
+    fname = which("@spac_33.nc", download="c")
+    with xr.open_dataarray(fname) as dataarray:
+        data = dataarray.load()
+    return data
+
+
 def load_japan_quakes():
     """
     Load a table of earthquakes around Japan as a pandas.Dataframe.
@@ -38,6 +61,28 @@ def load_japan_quakes():
     return data
 
 
+def load_ocean_ridge_points():
+    """
+    Load a table of ocean ridge points for the entire world as a pandas.DataFrame.
+
+    This is the ``@ridge.txt`` dataset used in the GMT tutorials.
+
+    The data are downloaded to a cache directory (usually ``~/.gmt/cache``) the
+    first time you invoke this function. Afterwards, it will load the data from
+    the cache. So you'll need an internet connection the first time around.
+
+    Returns
+    -------
+    data :  pandas.Dataframe
+        The data table. Columns are longitude and latitude.
+    """
+    fname = which("@ridge.txt", download="c")
+    data = pd.read_csv(
+        fname, sep=r"\s+", names=["longitude", "latitude"], skiprows=1, comment=">"
+    )
+    return data
+
+
 def load_sample_bathymetry():
     """
     Load a table of ship observations of bathymetry off Baja California as a
diff --git a/pygmt/sampling.py b/pygmt/sampling.py
@@ -0,0 +1,70 @@
+"""
+GMT modules for Sampling of 1-D and 2-D Data
+"""
+import pandas as pd
+import xarray as xr
+
+from .clib import Session
+from .helpers import build_arg_string, fmt_docstring, GMTTempFile, data_kind
+from .exceptions import GMTInvalidInput
+
+
+@fmt_docstring
+def grdtrack(table: pd.DataFrame, grid: xr.DataArray, newcolname: str = "z_", **kwargs):
+    """
+    Sample grids at specified (x,y) locations.
+
+    Grdtrack reads one or more grid files and a table with (x,y) [or (lon,lat)]
+    positions in the first two columns (more columns may be present). It interpolates
+    the grid(s) at the positions in the table and writes out the table with the
+    interpolated values added as (one or more) new columns. A bicubic [Default],
+    bilinear, B-spline or nearest-neighbor (see -n) interpolation is used, requiring
+    boundary conditions at the limits of the region.
+
+    Parameters
+    ----------
+    table: pandas.DataFrame
+        Table with (x, y) or (lon, lat) values in the first two columns. More columns
+        may be present.
+
+    grid: xarray.DataArray
+        Gridded array from which to sample values from.
+
+    newcolname: str
+        Name for the new column in the table where the sampled values will be placed.
+        Defaults to "z_".
+
+    Returns
+    -------
+    ret: pandas.DataFrame
+        Table with (x, y, ..., z_) or (lon, lat, ..., z_) values.
+
+    """
+    with GMTTempFile(suffix=".csv") as tmpfile:
+        with Session() as lib:
+            # Store the pandas.DataFrame table in virtualfile
+            if data_kind(table) == "matrix":
+                table_context = lib.virtualfile_from_matrix(table.values)
+            else:
+                raise GMTInvalidInput(f"Unrecognized data type {type(table)}")
+
+            # Store the xarray.DataArray grid in virtualfile
+            if data_kind(grid) == "grid":
+                grid_context = lib.virtualfile_from_grid(grid)
+            else:
+                raise GMTInvalidInput(f"Unrecognized data type {type(grid)}")
+
+            # Run grdtrack on the temporary (csv) table and (netcdf) grid virtualfiles
+            with table_context as csvfile:
+                with grid_context as grdfile:
+                    kwargs = {"G": grdfile}
+                    arg_str = " ".join(
+                        [csvfile, build_arg_string(kwargs), "->" + tmpfile.name]
+                    )
+                    lib.call_module(module="grdtrack", args=arg_str)
+
+        # Read temporary csv output to a pandas table
+        column_names = table.columns.to_list() + [newcolname]
+        result = pd.read_csv(tmpfile.name, sep="\t", names=column_names)
+
+    return result
diff --git a/pygmt/tests/test_grdtrack.py b/pygmt/tests/test_grdtrack.py
@@ -0,0 +1,66 @@
+"""
+Tests for grdtrack
+"""
+
+import pandas as pd
+import pytest
+
+from .. import grdtrack
+from ..datasets import load_east_pacific_rise_grid, load_ocean_ridge_points
+from ..exceptions import GMTInvalidInput
+from ..helpers import data_kind
+
+
+def test_grdtrack_input_dataframe_and_dataarray():
+    """
+    Run grdtrack by passing in a pandas.DataFrame and xarray.DataArray as inputs
+    """
+    dataframe = load_ocean_ridge_points()
+    dataarray = load_east_pacific_rise_grid()
+
+    output = grdtrack(table=dataframe, grid=dataarray)
+    assert isinstance(output, pd.DataFrame)
+    assert output.columns.to_list() == ["longitude", "latitude", "z_"]
+    assert output.iloc[0].to_list() == [-110.9536, -42.2489, -2950.49576833]
+
+    return output
+
+
+def test_grdtrack_input_wrong_kind_of_table():
+    """
+    Run grdtrack using table input that is not a pandas.DataFrame (matrix)
+    """
+    dataframe = load_ocean_ridge_points()
+    invalid_table = dataframe.longitude.to_xarray()
+    dataarray = load_east_pacific_rise_grid()
+
+    assert data_kind(invalid_table) == "grid"
+    with pytest.raises(GMTInvalidInput):
+        grdtrack(table=invalid_table, grid=dataarray)
+
+
+def test_grdtrack_input_wrong_kind_of_grid():
+    """
+    Run grdtrack using grid input that is not an xarray.DataArray (grid)
+    """
+    dataframe = load_ocean_ridge_points()
+    dataarray = load_east_pacific_rise_grid()
+    invalid_grid = dataarray.to_dataset()
+
+    assert data_kind(invalid_grid) == "matrix"
+    with pytest.raises(GMTInvalidInput):
+        grdtrack(table=dataframe, grid=invalid_grid)
+
+
+def test_grdtrack_newcolname_setting():
+    """
+    Run grdtrack by passing in a non-default newcolname parameter setting
+    """
+    dataframe = load_ocean_ridge_points()
+    dataarray = load_east_pacific_rise_grid()
+
+    output = grdtrack(table=dataframe, grid=dataarray, newcolname="bathymetry")
+    assert output.columns.to_list() == ["longitude", "latitude", "bathymetry"]
+    assert output.iloc[0].to_list() == [-110.9536, -42.2489, -2950.49576833]
+
+    return output