Skip to content

Commit 1d0a2bc

Browse files
authored
Detailed report for testing.assert_equal and testing.assert_identical (#1507)
* more detailed AssertionError message for assert_identical * print differing dimensions/data/variables/attributes * minor tweaks * add what's new entry * add tests for diff_array_repr and diff_dataset_repr * pep8 * add differing dimensions in diff_array_repr * fix tests (explicit numpy dtypes) * fix tests (dtype shown / not shown in array repr) * minor tweaks
1 parent dc87dea commit 1d0a2bc

File tree

4 files changed

+272
-11
lines changed

4 files changed

+272
-11
lines changed

doc/whats-new.rst

+5
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ Enhancements
3636
- Upsampling an array via interpolation with resample is now dask-compatible,
3737
as long as the array is not chunked along the resampling dimension.
3838
By `Spencer Clark <https://github.com/spencerkclark>`_.
39+
- :py:func:`xarray.testing.assert_equal` and
40+
:py:func:`xarray.testing.assert_identical` now provide a more detailed
41+
report showing what exactly differs between the two objects (dimensions /
42+
coordinates / variables / attributes) (:issue:`1507`).
43+
By `Benoit Bovy <https://github.com/benbovy>`_.
3944

4045
Bug fixes
4146
~~~~~~~~~

xarray/core/formatting.py

+140-6
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import numpy as np
1414
import pandas as pd
1515

16+
from .duck_array_ops import array_equiv
1617
from .options import OPTIONS
1718
from .pycompat import (
1819
PY2, bytes_type, dask_array_type, unicode_type, zip_longest)
@@ -411,6 +412,15 @@ def short_dask_repr(array, show_dtype=True):
411412
return 'dask.array<shape=%s, chunksize=%s>' % (array.shape, chunksize)
412413

413414

415+
def short_data_repr(array):
416+
if isinstance(getattr(array, 'variable', array)._data, dask_array_type):
417+
return short_dask_repr(array)
418+
elif array._in_memory or array.size < 1e5:
419+
return short_array_repr(array.values)
420+
else:
421+
return u'[%s values with dtype=%s]' % (array.size, array.dtype)
422+
423+
414424
def array_repr(arr):
415425
# used for DataArray, Variable and IndexVariable
416426
if hasattr(arr, 'name') and arr.name is not None:
@@ -421,12 +431,7 @@ def array_repr(arr):
421431
summary = [u'<xarray.%s %s(%s)>'
422432
% (type(arr).__name__, name_str, dim_summary(arr))]
423433

424-
if isinstance(getattr(arr, 'variable', arr)._data, dask_array_type):
425-
summary.append(short_dask_repr(arr))
426-
elif arr._in_memory or arr.size < 1e5:
427-
summary.append(short_array_repr(arr.values))
428-
else:
429-
summary.append(u'[%s values with dtype=%s]' % (arr.size, arr.dtype))
434+
summary.append(short_data_repr(arr))
430435

431436
if hasattr(arr, 'coords'):
432437
if arr.coords:
@@ -463,3 +468,132 @@ def dataset_repr(ds):
463468
summary.append(attrs_repr(ds.attrs))
464469

465470
return u'\n'.join(summary)
471+
472+
473+
def diff_dim_summary(a, b):
474+
if a.dims != b.dims:
475+
return "Differing dimensions:\n ({}) != ({})".format(
476+
dim_summary(a), dim_summary(b))
477+
else:
478+
return ""
479+
480+
481+
def _diff_mapping_repr(a_mapping, b_mapping, compat,
482+
title, summarizer, col_width=None):
483+
484+
def extra_items_repr(extra_keys, mapping, ab_side):
485+
extra_repr = [summarizer(k, mapping[k], col_width) for k in extra_keys]
486+
if extra_repr:
487+
header = "{} only on the {} object:".format(title, ab_side)
488+
return [header] + extra_repr
489+
else:
490+
return []
491+
492+
a_keys = set(a_mapping)
493+
b_keys = set(b_mapping)
494+
495+
summary = []
496+
497+
diff_items = []
498+
499+
for k in a_keys & b_keys:
500+
try:
501+
# compare xarray variable
502+
compatible = getattr(a_mapping[k], compat)(b_mapping[k])
503+
is_variable = True
504+
except AttributeError:
505+
# compare attribute value
506+
compatible = a_mapping[k] == b_mapping[k]
507+
is_variable = False
508+
509+
if not compatible:
510+
temp = [summarizer(k, vars[k], col_width)
511+
for vars in (a_mapping, b_mapping)]
512+
513+
if compat == 'identical' and is_variable:
514+
attrs_summary = []
515+
516+
for m in (a_mapping, b_mapping):
517+
attr_s = "\n".join([summarize_attr(ak, av)
518+
for ak, av in m[k].attrs.items()])
519+
attrs_summary.append(attr_s)
520+
521+
temp = ["\n".join([var_s, attr_s]) if attr_s else var_s
522+
for var_s, attr_s in zip(temp, attrs_summary)]
523+
524+
diff_items += [ab_side + s[1:]
525+
for ab_side, s in zip(('L', 'R'), temp)]
526+
527+
if diff_items:
528+
summary += ["Differing {}:".format(title.lower())] + diff_items
529+
530+
summary += extra_items_repr(a_keys - b_keys, a_mapping, "left")
531+
summary += extra_items_repr(b_keys - a_keys, b_mapping, "right")
532+
533+
return "\n".join(summary)
534+
535+
536+
diff_coords_repr = functools.partial(_diff_mapping_repr,
537+
title="Coordinates",
538+
summarizer=summarize_coord)
539+
540+
541+
diff_data_vars_repr = functools.partial(_diff_mapping_repr,
542+
title="Data variables",
543+
summarizer=summarize_datavar)
544+
545+
546+
diff_attrs_repr = functools.partial(_diff_mapping_repr,
547+
title="Attributes",
548+
summarizer=summarize_attr)
549+
550+
551+
def _compat_to_str(compat):
552+
if compat == "equals":
553+
return "equal"
554+
else:
555+
return compat
556+
557+
558+
def diff_array_repr(a, b, compat):
559+
# used for DataArray, Variable and IndexVariable
560+
summary = ["Left and right {} objects are not {}"
561+
.format(type(a).__name__, _compat_to_str(compat))]
562+
563+
summary.append(diff_dim_summary(a, b))
564+
565+
if not array_equiv(a.data, b.data):
566+
temp = [wrap_indent(short_array_repr(obj), start=' ')
567+
for obj in (a, b)]
568+
diff_data_repr = [ab_side + "\n" + ab_data_repr
569+
for ab_side, ab_data_repr in zip(('L', 'R'), temp)]
570+
summary += ["Differing values:"] + diff_data_repr
571+
572+
if hasattr(a, 'coords'):
573+
col_width = _calculate_col_width(set(a.coords) | set(b.coords))
574+
summary.append(diff_coords_repr(a.coords, b.coords, compat,
575+
col_width=col_width))
576+
577+
if compat == 'identical':
578+
summary.append(diff_attrs_repr(a.attrs, b.attrs, compat))
579+
580+
return "\n".join(summary)
581+
582+
583+
def diff_dataset_repr(a, b, compat):
584+
summary = ["Left and right {} objects are not {}"
585+
.format(type(a).__name__, _compat_to_str(compat))]
586+
587+
col_width = _calculate_col_width(
588+
set(_get_col_items(a.variables) + _get_col_items(b.variables)))
589+
590+
summary.append(diff_dim_summary(a, b))
591+
summary.append(diff_coords_repr(a.coords, b.coords, compat,
592+
col_width=col_width))
593+
summary.append(diff_data_vars_repr(a.data_vars, b.data_vars, compat,
594+
col_width=col_width))
595+
596+
if compat == 'identical':
597+
summary.append(diff_attrs_repr(a.attrs, b.attrs, compat))
598+
599+
return "\n".join(summary)

xarray/testing.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import numpy as np
55

66
from xarray.core import duck_array_ops
7+
from xarray.core import formatting
78

89

910
def _decode_string_data(data):
@@ -49,8 +50,10 @@ def assert_equal(a, b):
4950
import xarray as xr
5051
__tracebackhide__ = True # noqa: F841
5152
assert type(a) == type(b) # noqa
52-
if isinstance(a, (xr.Variable, xr.DataArray, xr.Dataset)):
53-
assert a.equals(b), '{}\n{}'.format(a, b)
53+
if isinstance(a, (xr.Variable, xr.DataArray)):
54+
assert a.equals(b), formatting.diff_array_repr(a, b, 'equals')
55+
elif isinstance(a, xr.Dataset):
56+
assert a.equals(b), formatting.diff_dataset_repr(a, b, 'equals')
5457
else:
5558
raise TypeError('{} not supported by assertion comparison'
5659
.format(type(a)))
@@ -76,11 +79,13 @@ def assert_identical(a, b):
7679
import xarray as xr
7780
__tracebackhide__ = True # noqa: F841
7881
assert type(a) == type(b) # noqa
79-
if isinstance(a, xr.DataArray):
82+
if isinstance(a, xr.Variable):
83+
assert a.identical(b), formatting.diff_array_repr(a, b, 'identical')
84+
elif isinstance(a, xr.DataArray):
8085
assert a.name == b.name
81-
assert_identical(a._to_temp_dataset(), b._to_temp_dataset())
86+
assert a.identical(b), formatting.diff_array_repr(a, b, 'identical')
8287
elif isinstance(a, (xr.Dataset, xr.Variable)):
83-
assert a.identical(b), '{}\n{}'.format(a, b)
88+
assert a.identical(b), formatting.diff_dataset_repr(a, b, 'identical')
8489
else:
8590
raise TypeError('{} not supported by assertion comparison'
8691
.format(type(a)))

xarray/tests/test_formatting.py

+117
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
# -*- coding: utf-8 -*-
22
from __future__ import absolute_import, division, print_function
33

4+
from textwrap import dedent
5+
46
import numpy as np
57
import pandas as pd
68

9+
import xarray as xr
710
from xarray.core import formatting
811
from xarray.core.pycompat import PY3
912

@@ -190,6 +193,120 @@ def test_attribute_repr(self):
190193
assert u'\n' not in newlines
191194
assert u'\t' not in tabs
192195

196+
def test_diff_array_repr(self):
197+
da_a = xr.DataArray(
198+
np.array([[1, 2, 3], [4, 5, 6]], dtype='int64'),
199+
dims=('x', 'y'),
200+
coords={'x': np.array(['a', 'b'], dtype='U1'),
201+
'y': np.array([1, 2, 3], dtype='int64')},
202+
attrs={'units': 'm', 'description': 'desc'})
203+
204+
da_b = xr.DataArray(
205+
np.array([1, 2], dtype='int64'),
206+
dims='x',
207+
coords={'x': np.array(['a', 'c'], dtype='U1'),
208+
'label': ('x', np.array([1, 2], dtype='int64'))},
209+
attrs={'units': 'kg'})
210+
211+
expected = dedent("""\
212+
Left and right DataArray objects are not identical
213+
Differing dimensions:
214+
(x: 2, y: 3) != (x: 2)
215+
Differing values:
216+
L
217+
array([[1, 2, 3],
218+
[4, 5, 6]], dtype=int64)
219+
R
220+
array([1, 2], dtype=int64)
221+
Differing coordinates:
222+
L * x (x) <U1 'a' 'b'
223+
R * x (x) <U1 'a' 'c'
224+
Coordinates only on the left object:
225+
* y (y) int64 1 2 3
226+
Coordinates only on the right object:
227+
label (x) int64 1 2
228+
Differing attributes:
229+
L units: m
230+
R units: kg
231+
Attributes only on the left object:
232+
description: desc""")
233+
234+
actual = formatting.diff_array_repr(da_a, da_b, 'identical')
235+
try:
236+
assert actual == expected
237+
except AssertionError:
238+
# depending on platform, dtype may not be shown in numpy array repr
239+
assert actual == expected.replace(", dtype=int64", "")
240+
241+
va = xr.Variable('x', np.array([1, 2, 3], dtype='int64'),
242+
{'title': 'test Variable'})
243+
vb = xr.Variable(('x', 'y'),
244+
np.array([[1, 2, 3], [4, 5, 6]], dtype='int64'))
245+
246+
expected = dedent("""\
247+
Left and right Variable objects are not equal
248+
Differing dimensions:
249+
(x: 3) != (x: 2, y: 3)
250+
Differing values:
251+
L
252+
array([1, 2, 3], dtype=int64)
253+
R
254+
array([[1, 2, 3],
255+
[4, 5, 6]], dtype=int64)""")
256+
257+
actual = formatting.diff_array_repr(va, vb, 'equals')
258+
try:
259+
assert actual == expected
260+
except AssertionError:
261+
assert actual == expected.replace(", dtype=int64", "")
262+
263+
def test_diff_dataset_repr(self):
264+
ds_a = xr.Dataset(
265+
data_vars={
266+
'var1': (('x', 'y'),
267+
np.array([[1, 2, 3], [4, 5, 6]], dtype='int64')),
268+
'var2': ('x', np.array([3, 4], dtype='int64'))
269+
},
270+
coords={'x': np.array(['a', 'b'], dtype='U1'),
271+
'y': np.array([1, 2, 3], dtype='int64')},
272+
attrs={'units': 'm', 'description': 'desc'}
273+
)
274+
275+
ds_b = xr.Dataset(
276+
data_vars={'var1': ('x', np.array([1, 2], dtype='int64'))},
277+
coords={
278+
'x': ('x', np.array(['a', 'c'], dtype='U1'), {'source': 0}),
279+
'label': ('x', np.array([1, 2], dtype='int64'))
280+
},
281+
attrs={'units': 'kg'}
282+
)
283+
284+
expected = dedent("""\
285+
Left and right Dataset objects are not identical
286+
Differing dimensions:
287+
(x: 2, y: 3) != (x: 2)
288+
Differing coordinates:
289+
L * x (x) <U1 'a' 'b'
290+
R * x (x) <U1 'a' 'c'
291+
source: 0
292+
Coordinates only on the left object:
293+
* y (y) int64 1 2 3
294+
Coordinates only on the right object:
295+
label (x) int64 1 2
296+
Differing data variables:
297+
L var1 (x, y) int64 1 2 3 4 5 6
298+
R var1 (x) int64 1 2
299+
Data variables only on the left object:
300+
var2 (x) int64 3 4
301+
Differing attributes:
302+
L units: m
303+
R units: kg
304+
Attributes only on the left object:
305+
description: desc""")
306+
307+
actual = formatting.diff_dataset_repr(ds_a, ds_b, 'identical')
308+
assert actual == expected
309+
193310

194311
def test_set_numpy_options():
195312
original_options = np.get_printoptions()

0 commit comments

Comments
 (0)