diff --git a/doc/api.rst b/doc/api.rst index b97f74d2ba7..2a4cb545b98 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -497,6 +497,7 @@ Advanced API Variable IndexVariable + as_variable register_dataset_accessor register_dataarray_accessor diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d8e75234bd7..8a79c322740 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -38,6 +38,10 @@ Enhancements to ``py.test`` (:issue:`1393`). By `Matthew Gidden `_. +- ``xarray.core.variable.as_variable`` is now part of the public API and + can be accessed using :py:meth:`~xarray.as_variable` (:issue:`1303`). + By `Benoit Bovy `_. + - :py:func:`~xarray.align` now supports ``join='exact'``, which raises an error instead of aligning when indexes to be aligned are not equal. By `Stephan Hoyer `_. diff --git a/xarray/__init__.py b/xarray/__init__.py index 858bc3ec047..60ae7f18a1d 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -8,7 +8,7 @@ from .core.combine import concat, auto_combine from .core.extensions import (register_dataarray_accessor, register_dataset_accessor) -from .core.variable import Variable, IndexVariable, Coordinate +from .core.variable import as_variable, Variable, IndexVariable, Coordinate from .core.dataset import Dataset from .core.dataarray import DataArray from .core.merge import merge, MergeError diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 3c3ed7dcc12..2e31ae5bc2a 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -539,7 +539,7 @@ def _combine(self, applied, shortcut=False): combined = self._restore_dim_order(combined) if coord is not None: if shortcut: - combined._coords[coord.name] = as_variable(coord, copy=True) + combined._coords[coord.name] = as_variable(coord) else: combined.coords[coord.name] = coord combined = self._maybe_restore_empty_groups(combined) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 34b86275374..ad4836b930f 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -28,14 +28,32 @@ pass -def as_variable(obj, name=None, copy=False): - """Convert an object into an Variable. - - - If the object is already an `Variable`, return a shallow copy. - - Otherwise, if the object has 'dims' and 'data' attributes, convert - it into a new `Variable`. - - If all else fails, attempt to convert the object into an `Variable` by - unpacking it into the arguments for `Variable.__init__`. +def as_variable(obj, name=None): + """Convert an object into a Variable. + + Parameters + ---------- + obj : object + Object to convert into a Variable. + + - If the object is already a Variable, return a shallow copy. + - Otherwise, if the object has 'dims' and 'data' attributes, convert + it into a new Variable. + - If all else fails, attempt to convert the object into a Variable by + unpacking it into the arguments for creating a new Variable. + name : str, optional + If provided: + + - `obj` can be a 1D array, which is assumed to label coordinate values + along a dimension of this given name. + - Variables with name matching one of their dimensions are converted + into `IndexVariable` objects. + + Returns + ------- + var : Variable + The newly created variable. + """ # TODO: consider extending this method to automatically handle Iris and # pandas objects. @@ -47,7 +65,10 @@ def as_variable(obj, name=None, copy=False): obj = obj.copy(deep=False) elif hasattr(obj, 'dims') and (hasattr(obj, 'data') or hasattr(obj, 'values')): - obj = Variable(obj.dims, getattr(obj, 'data', obj.values), + obj_data = getattr(obj, 'data', None) + if obj_data is None: + obj_data = getattr(obj, 'values') + obj = Variable(obj.dims, obj_data, getattr(obj, 'attrs', None), getattr(obj, 'encoding', None)) elif isinstance(obj, tuple): @@ -75,7 +96,7 @@ def as_variable(obj, name=None, copy=False): 'explicit list of dimensions: %r' % obj) if name is not None and name in obj.dims: - # convert the into an Index + # convert the Variable into an Index if obj.ndim != 1: raise ValueError( '%r has more than 1-dimension and the same name as one of its ' diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index aa061516949..f5d207d0978 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -609,11 +609,14 @@ def test_no_conflicts(self): def test_as_variable(self): data = np.arange(10) expected = Variable('x', data) + expected_extra = Variable('x', data, attrs={'myattr': 'val'}, + encoding={'scale_factor': 1}) self.assertVariableIdentical(expected, as_variable(expected)) ds = Dataset({'x': expected}) - self.assertVariableIdentical(expected, as_variable(ds['x']).to_base_variable()) + var = as_variable(ds['x']).to_base_variable() + self.assertVariableIdentical(expected, var) self.assertNotIsInstance(ds['x'], Variable) self.assertIsInstance(as_variable(ds['x']), Variable) @@ -621,8 +624,20 @@ def test_as_variable(self): fake_xarray = FakeVariable(expected.values, expected.dims) self.assertVariableIdentical(expected, as_variable(fake_xarray)) - xarray_tuple = (expected.dims, expected.values) - self.assertVariableIdentical(expected, as_variable(xarray_tuple)) + FakeVariable = namedtuple('FakeVariable', 'data dims') + fake_xarray = FakeVariable(expected.data, expected.dims) + self.assertVariableIdentical(expected, as_variable(fake_xarray)) + + FakeVariable = namedtuple('FakeVariable', + 'data values dims attrs encoding') + fake_xarray = FakeVariable(expected_extra.data, expected_extra.values, + expected_extra.dims, expected_extra.attrs, + expected_extra.encoding) + self.assertVariableIdentical(expected_extra, as_variable(fake_xarray)) + + xarray_tuple = (expected_extra.dims, expected_extra.values, + expected_extra.attrs, expected_extra.encoding) + self.assertVariableIdentical(expected_extra, as_variable(xarray_tuple)) with self.assertRaisesRegexp(TypeError, 'tuples to convert'): as_variable(tuple(data)) @@ -637,6 +652,15 @@ def test_as_variable(self): expected = Variable([], 0) self.assertVariableIdentical(expected, actual) + data = np.arange(9).reshape((3, 3)) + expected = Variable(('x', 'y'), data) + with self.assertRaisesRegexp( + ValueError, 'without explicit dimension names'): + as_variable(data, name='x') + with self.assertRaisesRegexp( + ValueError, 'has more than 1-dimension'): + as_variable(expected, name='x') + def test_repr(self): v = Variable(['time', 'x'], [[1, 2, 3], [4, 5, 6]], {'foo': 'bar'}) expected = dedent("""