@@ -866,12 +866,13 @@ def test_roundtrip_empty_vlen_string_array(self) -> None:
866866 assert check_vlen_dtype (original ["a" ].dtype ) == str
867867 with self .roundtrip (original ) as actual :
868868 assert_identical (original , actual )
869- assert object == actual ["a" ].dtype
870- assert actual ["a" ].dtype == original ["a" ].dtype
871- # only check metadata for capable backends
872- # eg. NETCDF3 based backends do not roundtrip metadata
873- if actual ["a" ].dtype .metadata is not None :
874- assert check_vlen_dtype (actual ["a" ].dtype ) == str
869+ if np .issubdtype (actual ["a" ].dtype , object ):
870+ # only check metadata for capable backends
871+ # eg. NETCDF3 based backends do not roundtrip metadata
872+ if actual ["a" ].dtype .metadata is not None :
873+ assert check_vlen_dtype (actual ["a" ].dtype ) == str
874+ else :
875+ assert actual ["a" ].dtype == np .dtype ("<U1" )
875876
876877 @pytest .mark .parametrize (
877878 "decoded_fn, encoded_fn" ,
@@ -1376,32 +1377,39 @@ def test_write_groups(self) -> None:
13761377 with self .open (tmp_file , group = "data/2" ) as actual2 :
13771378 assert_identical (data2 , actual2 )
13781379
1379- def test_encoding_kwarg_vlen_string (self ) -> None :
1380- for input_strings in [[b"foo" , b"bar" , b"baz" ], ["foo" , "bar" , "baz" ]]:
1381- original = Dataset ({"x" : input_strings })
1382- expected = Dataset ({"x" : ["foo" , "bar" , "baz" ]})
1383- kwargs = dict (encoding = {"x" : {"dtype" : str }})
1384- with self .roundtrip (original , save_kwargs = kwargs ) as actual :
1385- assert actual ["x" ].encoding ["dtype" ] is str
1386- assert_identical (actual , expected )
1387-
1388- def test_roundtrip_string_with_fill_value_vlen (self ) -> None :
1380+ @pytest .mark .parametrize (
1381+ "input_strings, is_bytes" ,
1382+ [
1383+ ([b"foo" , b"bar" , b"baz" ], True ),
1384+ (["foo" , "bar" , "baz" ], False ),
1385+ (["foó" , "bár" , "baź" ], False ),
1386+ ],
1387+ )
1388+ def test_encoding_kwarg_vlen_string (
1389+ self , input_strings : list [str ], is_bytes : bool
1390+ ) -> None :
1391+ original = Dataset ({"x" : input_strings })
1392+
1393+ expected_string = ["foo" , "bar" , "baz" ] if is_bytes else input_strings
1394+ expected = Dataset ({"x" : expected_string })
1395+ kwargs = dict (encoding = {"x" : {"dtype" : str }})
1396+ with self .roundtrip (original , save_kwargs = kwargs ) as actual :
1397+ assert actual ["x" ].encoding ["dtype" ] == "<U3"
1398+ assert actual ["x" ].dtype == "<U3"
1399+ assert_identical (actual , expected )
1400+
1401+ @pytest .mark .parametrize ("fill_value" , ["XXX" , "" , "bár" ])
1402+ def test_roundtrip_string_with_fill_value_vlen (self , fill_value : str ) -> None :
13891403 values = np .array (["ab" , "cdef" , np .nan ], dtype = object )
13901404 expected = Dataset ({"x" : ("t" , values )})
13911405
1392- # netCDF4-based backends don't support an explicit fillvalue
1393- # for variable length strings yet.
1394- # https://github.com/Unidata/netcdf4-python/issues/730
1395- # https://github.com/h5netcdf/h5netcdf/issues/37
1396- original = Dataset ({"x" : ("t" , values , {}, {"_FillValue" : "XXX" })})
1397- with pytest .raises (NotImplementedError ):
1398- with self .roundtrip (original ) as actual :
1399- assert_identical (expected , actual )
1406+ original = Dataset ({"x" : ("t" , values , {}, {"_FillValue" : fill_value })})
1407+ with self .roundtrip (original ) as actual :
1408+ assert_identical (expected , actual )
14001409
14011410 original = Dataset ({"x" : ("t" , values , {}, {"_FillValue" : "" })})
1402- with pytest .raises (NotImplementedError ):
1403- with self .roundtrip (original ) as actual :
1404- assert_identical (expected , actual )
1411+ with self .roundtrip (original ) as actual :
1412+ assert_identical (expected , actual )
14051413
14061414 def test_roundtrip_character_array (self ) -> None :
14071415 with create_tmp_file () as tmp_file :
0 commit comments