Skip to content

Commit a7d55b9

Browse files
authored
to_dict without data (#2659)
* add data=False to to_dict methods * doc and whats-new * fix pep8 errors * small tweaks * added shape and dtype
1 parent ec255eb commit a7d55b9

File tree

7 files changed

+70
-29
lines changed

7 files changed

+70
-29
lines changed

doc/io.rst

+11-1
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,16 @@ require external libraries and dicts can easily be pickled, or converted to
8181
json, or geojson. All the values are converted to lists, so dicts might
8282
be quite large.
8383

84+
To export just the dataset schema, without the data itself, use the
85+
``data=False`` option:
86+
87+
.. ipython:: python
88+
89+
ds.to_dict(data=False)
90+
91+
This can be useful for generating indices of dataset contents to expose to
92+
search indices or other automated data discovery tools.
93+
8494
.. _io.netcdf:
8595

8696
netCDF
@@ -665,7 +675,7 @@ To read a consolidated store, pass the ``consolidated=True`` option to
665675
:py:func:`~xarray.open_zarr`::
666676

667677
ds = xr.open_zarr('foo.zarr', consolidated=True)
668-
678+
669679
Xarray can't perform consolidation on pre-existing zarr datasets. This should
670680
be done directly from zarr, as described in the
671681
`zarr docs <https://zarr.readthedocs.io/en/latest/tutorial.html#consolidating-metadata>`_.

doc/whats-new.rst

+2
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ Breaking changes
2828
Enhancements
2929
~~~~~~~~~~~~
3030

31+
- Add ``data=False`` option to ``to_dict()`` methods. (:issue:`2656`)
32+
By `Ryan Abernathey <https://github.com/rabernat>`_
3133
- :py:meth:`~xarray.DataArray.coarsen` and
3234
:py:meth:`~xarray.Dataset.coarsen` are newly added.
3335
See :ref:`comput.coarsen` for details.

xarray/core/dataarray.py

+10-12
Original file line numberDiff line numberDiff line change
@@ -1760,7 +1760,7 @@ def to_netcdf(self, *args, **kwargs):
17601760

17611761
return dataset.to_netcdf(*args, **kwargs)
17621762

1763-
def to_dict(self):
1763+
def to_dict(self, data=True):
17641764
"""
17651765
Convert this xarray.DataArray into a dictionary following xarray
17661766
naming conventions.
@@ -1769,22 +1769,20 @@ def to_dict(self):
17691769
Useful for coverting to json. To avoid datetime incompatibility
17701770
use decode_times=False kwarg in xarrray.open_dataset.
17711771
1772+
Parameters
1773+
----------
1774+
data : bool, optional
1775+
Whether to include the actual data in the dictionary. When set to
1776+
False, returns just the schema.
1777+
17721778
See also
17731779
--------
17741780
DataArray.from_dict
17751781
"""
1776-
d = {'coords': {}, 'attrs': decode_numpy_dict_values(self.attrs),
1777-
'dims': self.dims}
1778-
1782+
d = self.variable.to_dict(data=data)
1783+
d.update({'coords': {}, 'name': self.name})
17791784
for k in self.coords:
1780-
data = ensure_us_time_resolution(self[k].values).tolist()
1781-
d['coords'].update({
1782-
k: {'data': data,
1783-
'dims': self[k].dims,
1784-
'attrs': decode_numpy_dict_values(self[k].attrs)}})
1785-
1786-
d.update({'data': ensure_us_time_resolution(self.values).tolist(),
1787-
'name': self.name})
1785+
d['coords'][k] = self.coords[k].variable.to_dict(data=data)
17881786
return d
17891787

17901788
@classmethod

xarray/core/dataset.py

+9-12
Original file line numberDiff line numberDiff line change
@@ -3221,7 +3221,7 @@ def to_dask_dataframe(self, dim_order=None, set_index=False):
32213221

32223222
return df
32233223

3224-
def to_dict(self):
3224+
def to_dict(self, data=True):
32253225
"""
32263226
Convert this dataset to a dictionary following xarray naming
32273227
conventions.
@@ -3230,25 +3230,22 @@ def to_dict(self):
32303230
Useful for coverting to json. To avoid datetime incompatibility
32313231
use decode_times=False kwarg in xarrray.open_dataset.
32323232
3233+
Parameters
3234+
----------
3235+
data : bool, optional
3236+
Whether to include the actual data in the dictionary. When set to
3237+
False, returns just the schema.
3238+
32333239
See also
32343240
--------
32353241
Dataset.from_dict
32363242
"""
32373243
d = {'coords': {}, 'attrs': decode_numpy_dict_values(self.attrs),
32383244
'dims': dict(self.dims), 'data_vars': {}}
3239-
32403245
for k in self.coords:
3241-
data = ensure_us_time_resolution(self[k].values).tolist()
3242-
d['coords'].update({
3243-
k: {'data': data,
3244-
'dims': self[k].dims,
3245-
'attrs': decode_numpy_dict_values(self[k].attrs)}})
3246+
d['coords'].update({k: self[k].variable.to_dict(data=data)})
32463247
for k in self.data_vars:
3247-
data = ensure_us_time_resolution(self[k].values).tolist()
3248-
d['data_vars'].update({
3249-
k: {'data': data,
3250-
'dims': self[k].dims,
3251-
'attrs': decode_numpy_dict_values(self[k].attrs)}})
3248+
d['data_vars'].update({k: self[k].variable.to_dict(data=data)})
32523249
return d
32533250

32543251
@classmethod

xarray/core/variable.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
from .options import _get_keep_attrs
2020
from .pycompat import (
2121
OrderedDict, basestring, dask_array_type, integer_types, zip)
22-
from .utils import OrderedSet, either_dict_or_kwargs
22+
from .utils import (OrderedSet, either_dict_or_kwargs,
23+
decode_numpy_dict_values, ensure_us_time_resolution)
2324

2425
try:
2526
import dask.array as da
@@ -410,6 +411,16 @@ def to_index(self):
410411
"""Convert this variable to a pandas.Index"""
411412
return self.to_index_variable().to_index()
412413

414+
def to_dict(self, data=True):
415+
"""Dictionary representation of variable."""
416+
item = {'dims': self.dims,
417+
'attrs': decode_numpy_dict_values(self.attrs)}
418+
if data:
419+
item['data'] = ensure_us_time_resolution(self.values).tolist()
420+
else:
421+
item.update({'dtype': str(self.dtype), 'shape': self.shape})
422+
return item
423+
413424
@property
414425
def dims(self):
415426
"""Tuple of dimension names with which this variable is associated.

xarray/tests/test_dataarray.py

+9
Original file line numberDiff line numberDiff line change
@@ -2909,6 +2909,15 @@ def test_to_and_from_dict(self):
29092909
ValueError, "cannot convert dict without the key 'data'"):
29102910
DataArray.from_dict(d)
29112911

2912+
# check the data=False option
2913+
expected_no_data = expected.copy()
2914+
del expected_no_data['data']
2915+
del expected_no_data['coords']['x']['data']
2916+
expected_no_data['coords']['x'].update({'dtype': '<U1', 'shape': (2,)})
2917+
expected_no_data.update({'dtype': 'float64', 'shape': (2, 3)})
2918+
actual_no_data = array.to_dict(data=False)
2919+
assert expected_no_data == actual_no_data
2920+
29122921
def test_to_and_from_dict_with_time_dim(self):
29132922
x = np.random.randn(10, 3)
29142923
t = pd.date_range('20130101', periods=10)

xarray/tests/test_dataset.py

+17-3
Original file line numberDiff line numberDiff line change
@@ -3042,11 +3042,25 @@ def test_to_and_from_dict(self):
30423042
# check roundtrip
30433043
assert_identical(ds, Dataset.from_dict(actual))
30443044

3045+
# check the data=False option
3046+
expected_no_data = expected.copy()
3047+
del expected_no_data['coords']['t']['data']
3048+
del expected_no_data['data_vars']['a']['data']
3049+
del expected_no_data['data_vars']['b']['data']
3050+
expected_no_data['coords']['t'].update({'dtype': '<U1',
3051+
'shape': (10,)})
3052+
expected_no_data['data_vars']['a'].update({'dtype': 'float64',
3053+
'shape': (10,)})
3054+
expected_no_data['data_vars']['b'].update({'dtype': 'float64',
3055+
'shape': (10,)})
3056+
actual_no_data = ds.to_dict(data=False)
3057+
assert expected_no_data == actual_no_data
3058+
30453059
# verify coords are included roundtrip
3046-
expected = ds.set_coords('b')
3047-
actual = Dataset.from_dict(expected.to_dict())
3060+
expected_ds = ds.set_coords('b')
3061+
actual = Dataset.from_dict(expected_ds.to_dict())
30483062

3049-
assert_identical(expected, actual)
3063+
assert_identical(expected_ds, actual)
30503064

30513065
# test some incomplete dicts:
30523066
# this one has no attrs field, the dims are strings, and x, y are

0 commit comments

Comments
 (0)