-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
DataArray: propagate index coordinates with non-array dimensions #10116
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f93b45a
01fc5fd
75b9ae5
b647df7
90ce0f9
ebcfa22
f27f2d0
528f356
27a1e38
695fb86
eea90cf
bc578d8
5e8093b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -130,22 +130,59 @@ | |||||
T_XarrayOther = TypeVar("T_XarrayOther", bound="DataArray" | Dataset) | ||||||
|
||||||
|
||||||
def _check_coords_dims(shape, coords, dim): | ||||||
sizes = dict(zip(dim, shape, strict=True)) | ||||||
for k, v in coords.items(): | ||||||
if any(d not in dim for d in v.dims): | ||||||
def check_dataarray_coords( | ||||||
shape: tuple[int, ...], coords: Coordinates, dims: tuple[Hashable, ...] | ||||||
): | ||||||
benbovy marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
"""Check that ``coords`` dimension names and sizes do not conflict with | ||||||
array ``shape`` and dimensions ``dims``. | ||||||
|
||||||
If a coordinate is associated with an index, the coordinate may have any | ||||||
arbitrary dimension(s) as long as the index's dimensions (i.e., the union of | ||||||
the dimensions of all coordinates associated with this index) intersects the | ||||||
array dimensions. | ||||||
|
||||||
If a coordinate has no index, then its dimensions much match (or be a subset | ||||||
of) the array dimensions. Scalar coordinates are also allowed. | ||||||
|
||||||
""" | ||||||
indexes = coords.xindexes | ||||||
skip_check_coord_name: set[Hashable] = set() | ||||||
skip_check_dim_size: set[Hashable] = set() | ||||||
|
||||||
# check dimension names | ||||||
for name, var in coords.items(): | ||||||
if name in skip_check_coord_name: | ||||||
continue | ||||||
elif name in indexes: | ||||||
index_dims = indexes.get_all_dims(name) | ||||||
if any(d in dims for d in index_dims): | ||||||
# can safely skip checking index's non-array dimensions | ||||||
# and index's other coordinates since those must be all | ||||||
# included in the dataarray so the index is not corrupted | ||||||
skip_check_coord_name.update(indexes.get_all_coords(name)) | ||||||
skip_check_dim_size.update(d for d in index_dims if d not in dims) | ||||||
raise_error = False | ||||||
else: | ||||||
raise_error = True | ||||||
else: | ||||||
raise_error = any(d not in dims for d in var.dims) | ||||||
if raise_error: | ||||||
raise ValueError( | ||||||
f"coordinate {k} has dimensions {v.dims}, but these " | ||||||
f"coordinate {name} has dimensions {var.dims}, but these " | ||||||
"are not a subset of the DataArray " | ||||||
f"dimensions {dim}" | ||||||
f"dimensions {dims}" | ||||||
) | ||||||
Comment on lines
+152
to
174
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm having a hard time following the logic in this loop. Could you please separate it into two separate loops:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably even better checking indexes and non-indexed coordinates in two separate loops, as you suggests in #10116 (comment). |
||||||
|
||||||
for d, s in v.sizes.items(): | ||||||
if s != sizes[d]: | ||||||
# check dimension sizes | ||||||
sizes = dict(zip(dims, shape, strict=True)) | ||||||
|
||||||
for name, var in coords.items(): | ||||||
for d, s in var.sizes.items(): | ||||||
if d not in skip_check_dim_size and s != sizes[d]: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I understand correctly, the only case in which a dimension is in In that case, I think we could simplify this by checking
Suggested change
|
||||||
raise ValueError( | ||||||
f"conflicting sizes for dimension {d!r}: " | ||||||
f"length {sizes[d]} on the data but length {s} on " | ||||||
f"coordinate {k!r}" | ||||||
f"coordinate {name!r}" | ||||||
) | ||||||
|
||||||
|
||||||
|
@@ -212,8 +249,6 @@ def _infer_coords_and_dims( | |||||
var.dims = (dim,) | ||||||
new_coords[dim] = var.to_index_variable() | ||||||
|
||||||
_check_coords_dims(shape, new_coords, dims_tuple) | ||||||
|
||||||
return new_coords, dims_tuple | ||||||
|
||||||
|
||||||
|
@@ -487,6 +522,7 @@ def __init__( | |||||
|
||||||
if not isinstance(coords, Coordinates): | ||||||
coords = create_coords_with_default_indexes(coords) | ||||||
check_dataarray_coords(data.shape, coords, dims) | ||||||
indexes = dict(coords.xindexes) | ||||||
coords = {k: v.copy() for k, v in coords.variables.items()} | ||||||
|
||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1210,10 +1210,20 @@ def _construct_dataarray(self, name: Hashable) -> DataArray: | |
needed_dims = set(variable.dims) | ||
|
||
coords: dict[Hashable, Variable] = {} | ||
temp_indexes = self.xindexes | ||
# preserve ordering | ||
for k in self._variables: | ||
if k in self._coord_names and set(self._variables[k].dims) <= needed_dims: | ||
coords[k] = self._variables[k] | ||
if k in self._coord_names: | ||
benbovy marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if ( | ||
k not in coords | ||
and k in temp_indexes | ||
and set(temp_indexes.get_all_dims(k)) & needed_dims | ||
): | ||
# add all coordinates of each index that shares at least one dimension | ||
# with the dimensions of the extracted variable | ||
coords.update(temp_indexes.get_all_coords(k)) | ||
Comment on lines
+1217
to
+1224
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Coud this use a separate loop after the existing loop instead? e.g., for k in self._indexes:
if k in coords:
coords.update(self.xindexes.get_all_coords(k)) Or if we allow indexes without a coordinate of the same name: for k in self._indexes:
if set(self.xindexes.get_all_dims(k)) & needed_dims:
coords.update(self.xindexes.get_all_coords(k)) Ideally, I would like the logic here to be just as simple as the words describing how it works, so a comment is not necessary! |
||
elif set(self._variables[k].dims) <= needed_dims: | ||
coords[k] = self._variables[k] | ||
|
||
indexes = filter_indexes_from_coords(self._indexes, set(coords)) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -330,10 +330,13 @@ def _assert_indexes_invariants_checks( | |
k: type(v) for k, v in indexes.items() | ||
} | ||
|
||
index_vars = { | ||
k for k, v in possible_coord_variables.items() if isinstance(v, IndexVariable) | ||
} | ||
assert indexes.keys() <= index_vars, (set(indexes), index_vars) | ||
if check_default: | ||
index_vars = { | ||
k | ||
for k, v in possible_coord_variables.items() | ||
if isinstance(v, IndexVariable) | ||
} | ||
assert indexes.keys() <= index_vars, (set(indexes), index_vars) | ||
|
||
# check pandas index wrappers vs. coordinate data adapters | ||
for k, index in indexes.items(): | ||
|
@@ -399,9 +402,14 @@ def _assert_dataarray_invariants(da: DataArray, check_default_indexes: bool): | |
da.dims, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are the variables in We should add one more invariant: something like "a dimension in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch! The tests I added compare Coordinates objects, which thus skip this invariant check that we indeed need to update. |
||
{k: v.dims for k, v in da._coords.items()}, | ||
) | ||
assert all( | ||
isinstance(v, IndexVariable) for (k, v) in da._coords.items() if v.dims == (k,) | ||
), {k: type(v) for k, v in da._coords.items()} | ||
|
||
if check_default_indexes: | ||
assert all( | ||
isinstance(v, IndexVariable) | ||
for (k, v) in da._coords.items() | ||
if v.dims == (k,) | ||
), {k: type(v) for k, v in da._coords.items()} | ||
|
||
for k, v in da._coords.items(): | ||
_assert_variable_invariants(v, k) | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.