Skip to content

dataset __repr__ updates #5580

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Aug 21, 2021
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ Breaking changes
pre-existing array values. This is a safer default than the prior ``mode="a"``,
and allows for higher performance writes (:pull:`5252`).
By `Stephan Hoyer <https://github.com/shoyer>`_.
- The ``__repr__`` of a :py:class:`xarray.Dataset`'s ``attrs``, ``coords``,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note that .attrs is a standard python dict, so there's no custom repr:

Suggested change
- The ``__repr__`` of a :py:class:`xarray.Dataset`'s ``attrs``, ``coords``,
- The ``__repr__`` of a :py:class:`xarray.Dataset`'s ``coords``

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, there is a attrs_repr in xarray/core/formatting.py.
But I already changed the whats_new entry, probably I'll have to revert that again.

and ``data_vars`` ignore ``xarray.set_option(display_max_rows=...)`` and
show the full output when called directly as, e.g., ``ds.data_vars`` or
``print(ds.data_vars)`` (:issue:`5545`, :pull:`5580`).
By `Stefan Bender <https://github.com/st-bender>`_.

Deprecations
~~~~~~~~~~~~
Expand Down
14 changes: 7 additions & 7 deletions xarray/core/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,14 +377,12 @@ def _mapping_repr(
):
if col_width is None:
col_width = _calculate_col_width(mapping)
if max_rows is None:
max_rows = OPTIONS["display_max_rows"]
summary = [f"{title}:"]
if mapping:
len_mapping = len(mapping)
if not _get_boolean_with_default(expand_option_name, default=True):
summary = [f"{summary[0]} ({len_mapping})"]
elif len_mapping > max_rows:
elif max_rows is not None and len_mapping > max_rows:
summary = [f"{summary[0]} ({max_rows}/{len_mapping})"]
first_rows = max_rows // 2 + max_rows % 2
items = list(mapping.items())
Expand Down Expand Up @@ -416,7 +414,7 @@ def _mapping_repr(
)


def coords_repr(coords, col_width=None):
def coords_repr(coords, col_width=None, max_rows=None):
if col_width is None:
col_width = _calculate_col_width(_get_col_items(coords))
return _mapping_repr(
Expand All @@ -425,6 +423,7 @@ def coords_repr(coords, col_width=None):
summarizer=summarize_coord,
expand_option_name="display_expand_coords",
col_width=col_width,
max_rows=max_rows,
)


Expand Down Expand Up @@ -542,21 +541,22 @@ def dataset_repr(ds):
summary = ["<xarray.{}>".format(type(ds).__name__)]

col_width = _calculate_col_width(_get_col_items(ds.variables))
max_rows = OPTIONS["display_max_rows"]

dims_start = pretty_print("Dimensions:", col_width)
summary.append("{}({})".format(dims_start, dim_summary(ds)))

if ds.coords:
summary.append(coords_repr(ds.coords, col_width=col_width))
summary.append(coords_repr(ds.coords, col_width=col_width, max_rows=max_rows))

unindexed_dims_str = unindexed_dims_repr(ds.dims, ds.coords)
if unindexed_dims_str:
summary.append(unindexed_dims_str)

summary.append(data_vars_repr(ds.data_vars, col_width=col_width))
summary.append(data_vars_repr(ds.data_vars, col_width=col_width, max_rows=max_rows))

if ds.attrs:
summary.append(attrs_repr(ds.attrs))
summary.append(attrs_repr(ds.attrs, max_rows=max_rows))

return "\n".join(summary)

Expand Down
35 changes: 27 additions & 8 deletions xarray/tests/test_formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,41 +509,60 @@ def test__mapping_repr(display_max_rows, n_vars, n_attr):
long_name = "long_name"
a = np.core.defchararray.add(long_name, np.arange(0, n_vars).astype(str))
b = np.core.defchararray.add("attr_", np.arange(0, n_attr).astype(str))
c = np.core.defchararray.add("coord", np.arange(0, n_vars).astype(str))
attrs = {k: 2 for k in b}
coords = dict(time=np.array([0, 1]))
coords = {_c: np.array([0, 1]) for _c in c}
data_vars = dict()
for v in a:
for (v, _c) in zip(a, coords.items()):
data_vars[v] = xr.DataArray(
name=v,
data=np.array([3, 4]),
dims=["time"],
coords=coords,
dims=[_c[0]],
coords=dict([_c]),
)
ds = xr.Dataset(data_vars)
ds.attrs = attrs

with xr.set_options(display_max_rows=display_max_rows):

# Parse the data_vars print and show only data_vars rows:
summary = formatting.data_vars_repr(ds.data_vars).split("\n")
summary = formatting.dataset_repr(ds).split("\n")
summary = [v for v in summary if long_name in v]

# The length should be less than or equal to display_max_rows:
len_summary = len(summary)
data_vars_print_size = min(display_max_rows, len_summary)
assert len_summary == data_vars_print_size

summary = formatting.data_vars_repr(ds.data_vars).split("\n")
summary = [v for v in summary if long_name in v]
# The length should be equal to the number of data variables
len_summary = len(summary)
assert len_summary == n_vars

summary = formatting.coords_repr(ds.coords).split("\n")
summary = [v for v in summary if "coord" in v]
# The length should be equal to the number of data variables
len_summary = len(summary)
assert len_summary == n_vars

summary = formatting.attrs_repr(ds.attrs).split("\n")
summary = [v for v in summary if "attr_" in v]
# The length should be equal to the number of attributes
len_summary = len(summary)
assert len_summary == n_attr
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

related to that, I don't think we need to test this because attrs_repr will only ever be called by dataset_repr / array_repr: on its own, the standard python dict's repr will be used

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's used to format the Attributes section of Dataset, DataArray and Variable objects, repr(ds.attrs) will call dict.__repr__

Yes, you are right, I missed that.


with xr.set_options(
display_expand_coords=False,
display_expand_data_vars=False,
display_expand_attrs=False,
):
actual = formatting.dataset_repr(ds)
coord_s = ", ".join([f"{c}: {len(v)}" for c, v in coords.items()])
expected = dedent(
f"""\
<xarray.Dataset>
Dimensions: (time: 2)
Coordinates: (1)
Dimensions: ({coord_s})
Coordinates: (40)
Data variables: ({n_vars})
Attributes: ({n_attr})"""
)
Expand Down