Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@ New Features
Breaking changes
~~~~~~~~~~~~~~~~

- When writing to NetCDF files with groups, Xarray no longer redefines dimensions
that have the same size in parent groups (:issue:`10241`). This conforms with
`CF Conventions for group scrope <https://cfconventions.org/cf-conventions/cf-conventions.html#_scope>`_
but may require adjustments for code that consumes NetCDF files produced by
Xarray.
By `Stephan Hoyer <https://github.com/shoyer>`_.


Deprecations
~~~~~~~~~~~~
Expand Down
22 changes: 20 additions & 2 deletions xarray/backends/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,20 @@ def find_root_and_group(ds):
return ds, group


def collect_ancestor_dimensions(group) -> dict[str, int]:
"""Returns dimensions defined in parent groups.

If dimensions are defined in multiple ancestors, use the size of the closest
ancestor.
"""
dims = {}
while (group := group.parent) is not None:
for k, v in group.dimensions.items():
if k not in dims:
dims[k] = len(v)
return dims


def datatree_from_dict_with_io_cleanup(groups_dict: Mapping[str, Dataset]) -> DataTree:
"""DataTree.from_dict with file clean-up."""
try:
Expand Down Expand Up @@ -308,6 +322,9 @@ class AbstractDataStore:
def get_dimensions(self): # pragma: no cover
raise NotImplementedError()

def get_parent_dimensions(self): # pragma: no cover
return {}

def get_attrs(self): # pragma: no cover
raise NotImplementedError()

Expand Down Expand Up @@ -563,21 +580,22 @@ def set_dimensions(self, variables, unlimited_dims=None):
if unlimited_dims is None:
unlimited_dims = set()

parent_dims = self.get_parent_dimensions()
existing_dims = self.get_dimensions()

dims = {}
for v in unlimited_dims: # put unlimited_dims first
dims[v] = None
for v in variables.values():
dims.update(dict(zip(v.dims, v.shape, strict=True)))
dims |= v.sizes

for dim, length in dims.items():
if dim in existing_dims and length != existing_dims[dim]:
raise ValueError(
"Unable to update size for existing dimension"
f"{dim!r} ({length} != {existing_dims[dim]})"
)
elif dim not in existing_dims:
elif dim not in existing_dims and length != parent_dims.get(dim):
is_unlimited = dim in unlimited_dims
self.set_dimension(dim, length, is_unlimited)

Expand Down
4 changes: 4 additions & 0 deletions xarray/backends/h5netcdf_.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
WritableCFDataStore,
_normalize_path,
_open_remote_file,
collect_ancestor_dimensions,
datatree_from_dict_with_io_cleanup,
find_root_and_group,
)
Expand Down Expand Up @@ -287,6 +288,9 @@ def get_attrs(self):
def get_dimensions(self):
return FrozenDict((k, len(v)) for k, v in self.ds.dimensions.items())

def get_parent_dimensions(self):
return FrozenDict(collect_ancestor_dimensions(self.ds))

def get_encoding(self):
return {
"unlimited_dims": {
Expand Down
4 changes: 4 additions & 0 deletions xarray/backends/netCDF4_.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
T_PathFileOrDataStore,
WritableCFDataStore,
_normalize_path,
collect_ancestor_dimensions,
datatree_from_dict_with_io_cleanup,
find_root_and_group,
robust_getitem,
Expand Down Expand Up @@ -518,6 +519,9 @@ def get_attrs(self):
def get_dimensions(self):
return FrozenDict((k, len(v)) for k, v in self.ds.dimensions.items())

def get_parent_dimensions(self):
return FrozenDict(collect_ancestor_dimensions(self.ds))

def get_encoding(self):
return {
"unlimited_dims": {
Expand Down
11 changes: 11 additions & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -1651,6 +1651,17 @@ def test_write_groups(self) -> None:
with self.open(tmp_file, group="data/2") as actual2:
assert_identical(data2, actual2)

def test_child_group_with_inconsistent_dimensions(self) -> None:
base = Dataset(coords={"x": [1, 2]})
child = Dataset(coords={"x": [1, 2, 3]})
with create_tmp_file() as tmp_file:
self.save(base, tmp_file)
self.save(child, tmp_file, group="child", mode="a")
with self.open(tmp_file) as actual_base:
assert_identical(base, actual_base)
with self.open(tmp_file, group="child") as actual_child:
assert_identical(child, actual_child)

@pytest.mark.parametrize(
"input_strings, is_bytes",
[
Expand Down
17 changes: 17 additions & 0 deletions xarray/tests/test_backends_datatree.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,23 @@ def test_write_subgroup(self, tmpdir):
assert_equal(original_dt, roundtrip_dt)
assert_identical(expected_dt, roundtrip_dt)

@requires_netCDF4
def test_no_redundant_dimensions(self, tmpdir):
# regression test for https://github.com/pydata/xarray/issues/10241
original_dt = DataTree.from_dict(
{
"/": xr.Dataset(coords={"x": [1, 2, 3]}),
"/child": xr.Dataset({"foo": ("x", [4, 5, 6])}),
}
)
filepath = tmpdir / "test.zarr"
original_dt.to_netcdf(filepath, engine=self.engine)

root = nc4.Dataset(str(filepath))
child = root.groups["child"]
assert list(root.dimensions) == ["x"]
assert list(child.dimensions) == []


@requires_netCDF4
class TestNetCDF4DatatreeIO(DatatreeIOBase):
Expand Down
Loading