Skip to content

Commit 6a0a1f4

Browse files
committed
Fix dataset tests
1 parent 510997a commit 6a0a1f4

File tree

7 files changed

+180
-161
lines changed

7 files changed

+180
-161
lines changed

doc/whats-new.rst

+14
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,20 @@ v0.9.0 (unreleased)
2121
Breaking changes
2222
~~~~~~~~~~~~~~~~
2323

24+
- Index coordinates for each dimensions are now optional, and no longer created
25+
by default. This has a number of implications:
26+
27+
- :py:func:`~align` and :py:meth:`~Dataset.reindex` can now error, if
28+
dimensions labels are missing and dimensions have different sizes.
29+
- Because pandas does not support missing indexes, methods such as
30+
``to_dataframe``/``from_dataframe`` and ``stack``/``unstack`` no longer
31+
roundtrip faithfully on all inputs. Use :py:meth:`~Dataset.reset_index` to
32+
remove undesired indexes.
33+
- ``Dataset.__delitem__`` no longer deletes all variables matching
34+
dimension names.
35+
- ``DataArray.coords.__delitem__`` is now allowed on variables matching
36+
dimension names.
37+
2438
- The default behavior of ``merge`` is now ``compat='no_conflicts'``, so some
2539
merges will now succeed in cases that previously raised
2640
``xarray.MergeError``. Set ``compat='broadcast_equals'`` to restore the

xarray/core/alignment.py

+16-10
Original file line numberDiff line numberDiff line change
@@ -197,11 +197,11 @@ def reindex_variables(variables, dims, indexes, indexers, method=None,
197197
"""
198198
# build up indexers for assignment along each dimension
199199
to_indexers = {}
200-
to_shape = {}
201200
from_indexers = {}
201+
# size of reindexed dimensions
202+
new_sizes = {}
202203

203204
for name, index in iteritems(indexes):
204-
to_shape[name] = index.size
205205
if name in indexers:
206206
target = utils.safe_cast_to_index(indexers[name])
207207
if not index.is_unique:
@@ -210,7 +210,7 @@ def reindex_variables(variables, dims, indexes, indexers, method=None,
210210
'index has duplicate values' % name)
211211
indexer = get_indexer(index, target, method, tolerance)
212212

213-
to_shape[name] = len(target)
213+
new_sizes[name] = len(target)
214214
# Note pandas uses negative values from get_indexer to signify
215215
# values that are missing in the index
216216
# The non-negative values thus indicate the non-missing values
@@ -246,12 +246,17 @@ def var_indexers(var, indexers):
246246

247247
# create variables for the new dataset
248248
reindexed = OrderedDict()
249-
for name, var in iteritems(variables):
250-
if name in indexers:
251-
# no need to copy, because index data is immutable
252-
new_var = IndexVariable(var.dims, indexers[name], var.attrs,
253-
var.encoding)
249+
250+
for dim, indexer in indexers.items():
251+
if dim in variables:
252+
var = variables[dim]
253+
args = (var.attrs, var.encoding)
254254
else:
255+
args = ()
256+
reindexed[dim] = IndexVariable((dim,), indexers[dim], *args)
257+
258+
for name, var in iteritems(variables):
259+
if name not in indexers:
255260
assign_to = var_indexers(var, to_indexers)
256261
assign_from = var_indexers(var, from_indexers)
257262

@@ -261,7 +266,8 @@ def var_indexers(var, indexers):
261266
dtype, fill_value = _maybe_promote(var.dtype)
262267

263268
if isinstance(data, np.ndarray):
264-
shape = tuple(to_shape[dim] for dim in var.dims)
269+
shape = tuple(new_sizes.get(dim, size)
270+
for dim, size in zip(var.dims, var.shape))
265271
new_data = np.empty(shape, dtype=dtype)
266272
new_data[...] = fill_value
267273
# create a new Variable so we can use orthogonal indexing
@@ -291,7 +297,7 @@ def var_indexers(var, indexers):
291297
# we neither created a new ndarray nor used fancy indexing
292298
new_var = var.copy(deep=copy)
293299

294-
reindexed[name] = new_var
300+
reindexed[name] = new_var
295301
return reindexed
296302

297303

xarray/core/coordinates.py

+17-10
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def to_index(self, ordered_dims=None):
5151
"""
5252
if ordered_dims is None:
5353
ordered_dims = self.dims
54-
indexes = [self.variables[k].to_index() for k in ordered_dims]
54+
indexes = [self.indexes.get(k) for k in ordered_dims]
5555
return pd.MultiIndex.from_product(indexes, names=list(ordered_dims))
5656

5757
def update(self, other):
@@ -213,9 +213,6 @@ def to_dataset(self):
213213
return self._to_dataset()
214214

215215
def __delitem__(self, key):
216-
if key in self.dims:
217-
raise ValueError('cannot delete a coordinate corresponding to a '
218-
'DataArray dimension')
219216
del self._data._coords[key]
220217

221218

@@ -244,11 +241,11 @@ def __init__(self, variables, dims):
244241
245242
Arguments
246243
---------
247-
variables : OrderedDict
244+
variables : OrderedDict[Any, Variable]
248245
Reference to OrderedDict holding variable objects. Should be the
249246
same dictionary used by the source object.
250-
dims : sequence or mapping
251-
Should be the same dimensions used by the source object.
247+
dims : OrderedDict[Any, int]
248+
Map from dimension names to sizes.
252249
"""
253250
self._variables = variables
254251
self._dims = dims
@@ -265,10 +262,20 @@ def __contains__(self, key):
265262
return key in self._dims and key in self._variables
266263

267264
def __getitem__(self, key):
268-
if key in self:
269-
return self._variables[key].to_index()
270-
else:
265+
if key not in self._dims:
271266
raise KeyError(key)
267+
return self._variables[key].to_index()
272268

273269
def __unicode__(self):
274270
return formatting.indexes_repr(self)
271+
272+
def get(self, key):
273+
"""Get an index for a dimension, supplying default RangeIndex if needed.
274+
"""
275+
if key not in self._dims:
276+
raise KeyError(key)
277+
278+
if key in self._variables:
279+
return self._variables[key].to_index()
280+
else:
281+
return pd.Index(range(self._dims[key]), name=key)

xarray/core/dataarray.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from .coordinates import (DataArrayCoordinates, LevelCoordinatesSource,
1818
Indexes)
1919
from .dataset import Dataset
20-
from .pycompat import iteritems, basestring, OrderedDict, zip
20+
from .pycompat import iteritems, basestring, OrderedDict, zip, range
2121
from .variable import (as_variable, Variable, as_compatible_data, IndexVariable,
2222
default_index_coordinate,
2323
assert_unique_multiindex_level_names)
@@ -505,7 +505,7 @@ def encoding(self, value):
505505
def indexes(self):
506506
"""OrderedDict of pandas.Index objects used for label based indexing
507507
"""
508-
return Indexes(self._coords, self.dims)
508+
return Indexes(self._coords, OrderedDict(zip(self.dims, self.shape)))
509509

510510
@property
511511
def coords(self):
@@ -1066,7 +1066,8 @@ def to_pandas(self):
10661066
except KeyError:
10671067
raise ValueError('cannot convert arrays with %s dimensions into '
10681068
'pandas objects' % self.ndim)
1069-
return constructor(self.values, *self.indexes.values())
1069+
indexes = [self.indexes.get(dim) for dim in self.dims]
1070+
return constructor(self.values, *indexes)
10701071

10711072
def to_dataframe(self, name=None):
10721073
"""Convert this array and its coordinates into a tidy pandas.DataFrame.

xarray/core/dataset.py

+15-31
Original file line numberDiff line numberDiff line change
@@ -557,22 +557,9 @@ def __setitem__(self, key, value):
557557

558558
def __delitem__(self, key):
559559
"""Remove a variable from this dataset.
560-
561-
If this variable is a dimension, all variables containing this
562-
dimension are also removed.
563560
"""
564-
def remove(k):
565-
del self._variables[k]
566-
self._coord_names.discard(k)
567-
568-
remove(key)
569-
570-
if key in self._dims:
571-
del self._dims[key]
572-
also_delete = [k for k, v in iteritems(self._variables)
573-
if key in v.dims]
574-
for key in also_delete:
575-
remove(key)
561+
del self._variables[key]
562+
self._coord_names.discard(key)
576563

577564
# mutable objects should not be hashable
578565
__hash__ = None
@@ -1224,7 +1211,9 @@ def reindex(self, indexers=None, method=None, tolerance=None, copy=True, **kw_in
12241211
variables = alignment.reindex_variables(
12251212
self.variables, self.dims, self.indexes, indexers, method,
12261213
tolerance, copy=copy)
1227-
return self._replace_vars_and_dims(variables)
1214+
coord_names = set(self._coord_names)
1215+
coord_names.update(indexers)
1216+
return self._replace_vars_and_dims(variables, coord_names)
12281217

12291218
def rename(self, name_dict, inplace=False):
12301219
"""Returns a new object with renamed variables and dimensions.
@@ -1250,9 +1239,9 @@ def rename(self, name_dict, inplace=False):
12501239
DataArray.rename
12511240
"""
12521241
for k, v in name_dict.items():
1253-
if k not in self:
1242+
if k not in self and k not in self.dims:
12541243
raise ValueError("cannot rename %r because it is not a "
1255-
"variable in this dataset" % k)
1244+
"variable or dimension in this dataset" % k)
12561245
if v in self and k != v:
12571246
raise ValueError('the new name %r already exists' % v)
12581247

@@ -1339,18 +1328,8 @@ def _stack_once(self, dims, new_dim):
13391328
else:
13401329
variables[name] = var.copy(deep=False)
13411330

1342-
indexes = self.indexes
1343-
dim_sizes = self.dims
1344-
1345-
levels = []
1346-
for dim in dims:
1347-
if dim in indexes:
1348-
level = indexes[dim]
1349-
else:
1350-
level = np.arange(dim_sizes[dim])
1351-
levels.append(level)
1352-
13531331
# consider dropping levels that are unused?
1332+
levels = [self.indexes.get(dim) for dim in dims]
13541333
idx = utils.multiindex_from_product_levels(levels, names=dims)
13551334
variables[new_dim] = IndexVariable(new_dim, idx)
13561335

@@ -1409,7 +1388,7 @@ def unstack(self, dim):
14091388
if dim not in self.dims:
14101389
raise ValueError('invalid dimension: %s' % dim)
14111390

1412-
index = self.indexes[dim]
1391+
index = self.indexes.get(dim)
14131392
if not isinstance(index, pd.MultiIndex):
14141393
raise ValueError('cannot unstack a dimension that does not have '
14151394
'a MultiIndex')
@@ -1551,7 +1530,12 @@ def drop(self, labels, dim=None):
15511530
if dim is None:
15521531
return self._drop_vars(labels)
15531532
else:
1554-
new_index = self.indexes[dim].drop(labels)
1533+
try:
1534+
index = self.indexes[dim]
1535+
except KeyError:
1536+
raise ValueError(
1537+
'dimension %r does not have coordinate labels' % dim)
1538+
new_index = index.drop(labels)
15551539
return self.loc[{dim: new_index}]
15561540

15571541
def _drop_vars(self, names):

xarray/test/test_dataarray.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -651,11 +651,9 @@ def test_coords(self):
651651
actual = repr(da.coords)
652652
self.assertEquals(expected, actual)
653653

654-
with self.assertRaisesRegexp(ValueError, 'cannot delete'):
655-
del da['x']
656-
657-
with self.assertRaisesRegexp(ValueError, 'cannot delete'):
658-
del da.coords['x']
654+
del da.coords['x']
655+
expected = DataArray(da.values, {'y': [0, 1, 2]}, dims=['x', 'y'])
656+
self.assertDataArrayIdentical(da, expected)
659657

660658
with self.assertRaisesRegexp(ValueError, 'conflicting MultiIndex'):
661659
self.mda['level_1'] = np.arange(4)

0 commit comments

Comments
 (0)