@@ -864,12 +864,13 @@ def test_roundtrip_empty_vlen_string_array(self) -> None:
864
864
assert check_vlen_dtype (original ["a" ].dtype ) == str
865
865
with self .roundtrip (original ) as actual :
866
866
assert_identical (original , actual )
867
- assert object == actual ["a" ].dtype
868
- assert actual ["a" ].dtype == original ["a" ].dtype
869
- # only check metadata for capable backends
870
- # eg. NETCDF3 based backends do not roundtrip metadata
871
- if actual ["a" ].dtype .metadata is not None :
872
- assert check_vlen_dtype (actual ["a" ].dtype ) == str
867
+ if np .issubdtype (actual ["a" ].dtype , object ):
868
+ # only check metadata for capable backends
869
+ # eg. NETCDF3 based backends do not roundtrip metadata
870
+ if actual ["a" ].dtype .metadata is not None :
871
+ assert check_vlen_dtype (actual ["a" ].dtype ) == str
872
+ else :
873
+ assert actual ["a" ].dtype == np .dtype ("<U1" )
873
874
874
875
@pytest .mark .parametrize (
875
876
"decoded_fn, encoded_fn" ,
@@ -1374,32 +1375,39 @@ def test_write_groups(self) -> None:
1374
1375
with self .open (tmp_file , group = "data/2" ) as actual2 :
1375
1376
assert_identical (data2 , actual2 )
1376
1377
1377
- def test_encoding_kwarg_vlen_string (self ) -> None :
1378
- for input_strings in [[b"foo" , b"bar" , b"baz" ], ["foo" , "bar" , "baz" ]]:
1379
- original = Dataset ({"x" : input_strings })
1380
- expected = Dataset ({"x" : ["foo" , "bar" , "baz" ]})
1381
- kwargs = dict (encoding = {"x" : {"dtype" : str }})
1382
- with self .roundtrip (original , save_kwargs = kwargs ) as actual :
1383
- assert actual ["x" ].encoding ["dtype" ] is str
1384
- assert_identical (actual , expected )
1385
-
1386
- def test_roundtrip_string_with_fill_value_vlen (self ) -> None :
1378
+ @pytest .mark .parametrize (
1379
+ "input_strings, is_bytes" ,
1380
+ [
1381
+ ([b"foo" , b"bar" , b"baz" ], True ),
1382
+ (["foo" , "bar" , "baz" ], False ),
1383
+ (["foó" , "bár" , "baź" ], False ),
1384
+ ],
1385
+ )
1386
+ def test_encoding_kwarg_vlen_string (
1387
+ self , input_strings : list [str ], is_bytes : bool
1388
+ ) -> None :
1389
+ original = Dataset ({"x" : input_strings })
1390
+
1391
+ expected_string = ["foo" , "bar" , "baz" ] if is_bytes else input_strings
1392
+ expected = Dataset ({"x" : expected_string })
1393
+ kwargs = dict (encoding = {"x" : {"dtype" : str }})
1394
+ with self .roundtrip (original , save_kwargs = kwargs ) as actual :
1395
+ assert actual ["x" ].encoding ["dtype" ] == "<U3"
1396
+ assert actual ["x" ].dtype == "<U3"
1397
+ assert_identical (actual , expected )
1398
+
1399
+ @pytest .mark .parametrize ("fill_value" , ["XXX" , "" , "bár" ])
1400
+ def test_roundtrip_string_with_fill_value_vlen (self , fill_value : str ) -> None :
1387
1401
values = np .array (["ab" , "cdef" , np .nan ], dtype = object )
1388
1402
expected = Dataset ({"x" : ("t" , values )})
1389
1403
1390
- # netCDF4-based backends don't support an explicit fillvalue
1391
- # for variable length strings yet.
1392
- # https://github.com/Unidata/netcdf4-python/issues/730
1393
- # https://github.com/h5netcdf/h5netcdf/issues/37
1394
- original = Dataset ({"x" : ("t" , values , {}, {"_FillValue" : "XXX" })})
1395
- with pytest .raises (NotImplementedError ):
1396
- with self .roundtrip (original ) as actual :
1397
- assert_identical (expected , actual )
1404
+ original = Dataset ({"x" : ("t" , values , {}, {"_FillValue" : fill_value })})
1405
+ with self .roundtrip (original ) as actual :
1406
+ assert_identical (expected , actual )
1398
1407
1399
1408
original = Dataset ({"x" : ("t" , values , {}, {"_FillValue" : "" })})
1400
- with pytest .raises (NotImplementedError ):
1401
- with self .roundtrip (original ) as actual :
1402
- assert_identical (expected , actual )
1409
+ with self .roundtrip (original ) as actual :
1410
+ assert_identical (expected , actual )
1403
1411
1404
1412
def test_roundtrip_character_array (self ) -> None :
1405
1413
with create_tmp_file () as tmp_file :
0 commit comments