Skip to content

Commit 47be135

Browse files
committed
transform numpy object-dtype strings (vlen) to numpy unicode strings
1 parent 0b66dbd commit 47be135

File tree

2 files changed

+10
-0
lines changed

2 files changed

+10
-0
lines changed

xarray/conventions.py

+9
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,15 @@ def decode_cf_variable(
389389
if decode_times:
390390
var = times.CFDatetimeCoder(use_cftime=use_cftime).decode(var, name=name)
391391

392+
# transform numpy object-dtype strings to numpy unicode strings
393+
if (
394+
"dtype" in var.encoding
395+
and var.encoding["dtype"] == str
396+
and original_dtype == object
397+
):
398+
original_dtype = var.encoding["dtype"]
399+
var = var.astype(var.encoding["dtype"])
400+
392401
dimensions, data, attributes, encoding = variables.unpack_for_decoding(var)
393402
# TODO(shoyer): convert everything below to use coders
394403

xarray/tests/test_backends.py

+1
Original file line numberDiff line numberDiff line change
@@ -1360,6 +1360,7 @@ def test_encoding_kwarg_vlen_string(self) -> None:
13601360
with self.roundtrip(original, save_kwargs=kwargs) as actual:
13611361
assert actual["x"].encoding["dtype"] is str
13621362
assert_identical(actual, expected)
1363+
assert actual["x"].dtype == "<U3"
13631364

13641365
def test_roundtrip_string_with_fill_value_vlen(self) -> None:
13651366
values = np.array(["ab", "cdef", np.nan], dtype=object)

0 commit comments

Comments
 (0)