From 11f36d25b0f76e3b370f8824b8ed32f272f6d859 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 25 Oct 2023 01:34:34 -0400 Subject: [PATCH 01/12] working regex --- datatree/datatree.py | 77 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 76 insertions(+), 1 deletion(-) diff --git a/datatree/datatree.py b/datatree/datatree.py index f2618d2a..42cc5b1d 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -4,6 +4,7 @@ import itertools from collections import OrderedDict from html import escape +import re from typing import ( TYPE_CHECKING, Any, @@ -79,6 +80,13 @@ T_Path = Union[str, NodePath] +_SYMBOLIC_NODE_NAME = r"\w+" +_SYMBOLIC_NODEPATH = ( + f"\/?{_SYMBOLIC_NODE_NAME}(\/{_SYMBOLIC_NODE_NAME})*\/?" +) +_SYMBOLIC_REORDERING = f"^{_SYMBOLIC_NODEPATH}->{_SYMBOLIC_NODEPATH}$" + + def _coerce_to_dataset(data: Dataset | DataArray | None) -> Dataset: if isinstance(data, DataArray): ds = data.to_dataset() @@ -1302,6 +1310,48 @@ def match(self, pattern: str) -> DataTree: } return DataTree.from_dict(matching_nodes, name=self.root.name) + def reorder(self, order: str) -> DataTree: + """ + Reorder levels of all nodes in this subtree by rearranging the parts of each of their paths. + + In general this operation will preserve the depth of every node (and hence depth of the whole subtree), + but will not preserve the width at any level. + + Parameters + ---------- + order: str + String specifying symbolically how to reorder levels of each path, for example: + 'a/b/c -> b/c/a' + + Generally must be of the form: + '{OLD_ORDER} -> {NEW_ORDER}' + where OLD_ORDER = 'a/b/***/y/z', representing a symbolic ordering of the parts of the node path, + and NEW_ORDER = 'z/a/***/b/y', representing an arbitrary re-ordering of the same number of parts. + (Here the triple asterisk stands in for an arbitrary number of parts.) + + Symbols must be unique, and each symbol in the old order must have a corresponding entry in the new order, + so the number of symbols must be the same in the new order as in the old order. + + By default paths will be re-ordered starting at the root. To re-order at the leaves instead, an ellipsis can + be pre-prended, e.g. '.../a/b -> .../b/a'. The ellipsis can be present in the new order, old order, both, + or neither. (Ellipses will have no effect on a node which has a depth equal to the number of symbols.) + + Returns + ------- + reordered: DataTree + DataTree object where each node has the same depth as it did originally. + + Examples + -------- + """ + old_order, new_order = _parse_order(order) + old_dict = self.to_dict() + + # TODO should we amputate the subtree then re-attach? + reordered_dict = {_reorder_path(node.path, old_order, new_order): node.ds for node in self.subtree} + + return DataTree.from_dict(reordered_dict) + def map_over_subtree( self, func: Callable, @@ -1477,7 +1527,7 @@ def to_netcdf( Note that unlimited_dims may also be set via ``dataset.encoding["unlimited_dims"]``. kwargs : - Addional keyword arguments to be passed to ``xarray.Dataset.to_netcdf`` + Additional keyword arguments to be passed to ``xarray.Dataset.to_netcdf`` """ from .io import _datatree_to_netcdf @@ -1529,3 +1579,28 @@ def to_zarr( def plot(self): raise NotImplementedError + + +def _parse_order(ordering: str) -> Tuple[List[str], List[str]]: + """Parse a reordering string of the form 'a/b -> b/a'.""" + if not re.match(_SYMBOLIC_REORDERING, ordering): + raise ValueError(f"Not a valid symbolic ordering: {ordering}") + + in_txt, out_txt = ordering.split("->") + old_order = re.findall(_SYMBOLIC_NODE_NAME, in_txt) + new_order = re.findall(_SYMBOLIC_NODE_NAME, out_txt) + + return old_order, new_order + +def _reorder_path(path: str, old_order: List[str], new_order: List[str]) -> str: + """Re-orders the parts of the given path from old_order to match new_order.""" + + parts = NodePath(path).parts + if len(old_order) > len(parts): + raise ValueError(f"node {path} only has depth {len(parts)}") + + new_order_indices = [new_order.index(el) for el in old_order] + + reordered_parts = [parts[i] for i in new_order_indices] + + return str(NodePath(reordered_parts)) From d3318a8343c660e5ab54be6b1a997dbe2013064f Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 25 Oct 2023 01:49:04 -0400 Subject: [PATCH 02/12] tidy --- datatree/datatree.py | 71 ++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/datatree/datatree.py b/datatree/datatree.py index 42cc5b1d..796e545a 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -2,9 +2,9 @@ import copy import itertools +import re from collections import OrderedDict from html import escape -import re from typing import ( TYPE_CHECKING, Any, @@ -81,12 +81,36 @@ _SYMBOLIC_NODE_NAME = r"\w+" -_SYMBOLIC_NODEPATH = ( - f"\/?{_SYMBOLIC_NODE_NAME}(\/{_SYMBOLIC_NODE_NAME})*\/?" -) +_SYMBOLIC_NODEPATH = f"\/?{_SYMBOLIC_NODE_NAME}(\/{_SYMBOLIC_NODE_NAME})*\/?" _SYMBOLIC_REORDERING = f"^{_SYMBOLIC_NODEPATH}->{_SYMBOLIC_NODEPATH}$" +def _parse_ordering(ordering: str) -> Tuple[List[str], List[str]]: + """Parse a reordering string of the form 'a/b -> b/a'.""" + if not re.match(_SYMBOLIC_REORDERING, ordering): + raise ValueError(f"Not a valid symbolic ordering: {ordering}") + + in_txt, out_txt = ordering.split("->") + old_order = re.findall(_SYMBOLIC_NODE_NAME, in_txt) + new_order = re.findall(_SYMBOLIC_NODE_NAME, out_txt) + + return old_order, new_order + + +def _reorder_path(path: str, old_order: List[str], new_order: List[str]) -> str: + """Re-orders the parts of the given path from old_order to match new_order.""" + + parts = NodePath(path).parts + if len(old_order) > len(parts): + raise ValueError(f"node {path} only has depth {len(parts)}") + + new_order_indices = [new_order.index(el) for el in old_order] + + reordered_parts = [parts[i] for i in new_order_indices] + + return str(NodePath(reordered_parts)) + + def _coerce_to_dataset(data: Dataset | DataArray | None) -> Dataset: if isinstance(data, DataArray): ds = data.to_dataset() @@ -1310,7 +1334,7 @@ def match(self, pattern: str) -> DataTree: } return DataTree.from_dict(matching_nodes, name=self.root.name) - def reorder(self, order: str) -> DataTree: + def reorder(self, ordering: str) -> DataTree: """ Reorder levels of all nodes in this subtree by rearranging the parts of each of their paths. @@ -1319,7 +1343,7 @@ def reorder(self, order: str) -> DataTree: Parameters ---------- - order: str + ordering: str String specifying symbolically how to reorder levels of each path, for example: 'a/b/c -> b/c/a' @@ -1344,11 +1368,13 @@ def reorder(self, order: str) -> DataTree: Examples -------- """ - old_order, new_order = _parse_order(order) - old_dict = self.to_dict() + old_order, new_order = _parse_ordering(ordering) - # TODO should we amputate the subtree then re-attach? - reordered_dict = {_reorder_path(node.path, old_order, new_order): node.ds for node in self.subtree} + # only re-order the subtree, and return a new copy, to avoid messing up parents of this node + reordered_dict = { + _reorder_path(node.path.relative_to(self), old_order, new_order): node.ds + for node in self.subtree + } return DataTree.from_dict(reordered_dict) @@ -1579,28 +1605,3 @@ def to_zarr( def plot(self): raise NotImplementedError - - -def _parse_order(ordering: str) -> Tuple[List[str], List[str]]: - """Parse a reordering string of the form 'a/b -> b/a'.""" - if not re.match(_SYMBOLIC_REORDERING, ordering): - raise ValueError(f"Not a valid symbolic ordering: {ordering}") - - in_txt, out_txt = ordering.split("->") - old_order = re.findall(_SYMBOLIC_NODE_NAME, in_txt) - new_order = re.findall(_SYMBOLIC_NODE_NAME, out_txt) - - return old_order, new_order - -def _reorder_path(path: str, old_order: List[str], new_order: List[str]) -> str: - """Re-orders the parts of the given path from old_order to match new_order.""" - - parts = NodePath(path).parts - if len(old_order) > len(parts): - raise ValueError(f"node {path} only has depth {len(parts)}") - - new_order_indices = [new_order.index(el) for el in old_order] - - reordered_parts = [parts[i] for i in new_order_indices] - - return str(NodePath(reordered_parts)) From 8af09d0ea171555b323a9aaa72a70ec78ef2cd69 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 25 Oct 2023 11:25:30 -0400 Subject: [PATCH 03/12] input validation tests passing --- datatree/datatree.py | 59 +++++++++++++++++++++++++++------ datatree/tests/test_datatree.py | 38 +++++++++++++++++++++ 2 files changed, 87 insertions(+), 10 deletions(-) diff --git a/datatree/datatree.py b/datatree/datatree.py index 796e545a..4246151e 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -85,16 +85,55 @@ _SYMBOLIC_REORDERING = f"^{_SYMBOLIC_NODEPATH}->{_SYMBOLIC_NODEPATH}$" -def _parse_ordering(ordering: str) -> Tuple[List[str], List[str]]: - """Parse a reordering string of the form 'a/b -> b/a'.""" +def _parse_symbolic_ordering(ordering: str) -> Tuple[List[str], List[str]]: + """Parse a symbolic reordering string of the form 'a/b -> b/a'.""" if not re.match(_SYMBOLIC_REORDERING, ordering): - raise ValueError(f"Not a valid symbolic ordering: {ordering}") + raise ValueError(f"Invalid symbolic reordering: {ordering}") in_txt, out_txt = ordering.split("->") - old_order = re.findall(_SYMBOLIC_NODE_NAME, in_txt) - new_order = re.findall(_SYMBOLIC_NODE_NAME, out_txt) + old_symbolic_order = re.findall(_SYMBOLIC_NODE_NAME, in_txt) + new_symbolic_order = re.findall(_SYMBOLIC_NODE_NAME, out_txt) + + # Check number of symbols is the same on both sides + if len(old_symbolic_order) != len(new_symbolic_order): + raise ValueError( + "Invalid symbolic reordering. The depth of the symbolic path on each side must be equal, " + f"but the left has {len(old_symbolic_order)} parts and the right has {len(new_symbolic_order)}" + f" parts." + ) - return old_order, new_order + # Check every symbol appears only once + if len(set(old_symbolic_order)) < len(old_symbolic_order): + # TODO + repeated_symbols = ... + raise ValueError( + "Invalid symbolic reordering. Each symbol must appear only once on each side, " + f"but the symbols {repeated_symbols} appear more than once in the left-hand side." + ) + if len(set(new_symbolic_order)) < len(new_symbolic_order): + # TODO + repeated_symbols = ... + raise ValueError( + "Invalid symbolic reordering. Each symbol must appear only once on each side, " + f"but the symbols {repeated_symbols} appear more than once in the right-hand side." + ) + + # Check every symbol appears on both sides + all_symbols = set(old_symbolic_order).union(set(new_symbolic_order)) + if len(set(old_symbolic_order)) < len(all_symbols): + unmatched_symbols = all_symbols - set(old_symbolic_order) + raise ValueError( + "Invalid symbolic reordering. Every symbol must be present on both sides, but" + f"the symbols {unmatched_symbols} are only present on the right-hand side." + ) + if len(set(new_symbolic_order)) < len(all_symbols): + unmatched_symbols = all_symbols - set(new_symbolic_order) + raise ValueError( + "Invalid symbolic reordering. Every symbol must be present on both sides, but" + f"the symbols {unmatched_symbols} are only present on the left-hand side." + ) + + return old_symbolic_order, new_symbolic_order def _reorder_path(path: str, old_order: List[str], new_order: List[str]) -> str: @@ -105,9 +144,7 @@ def _reorder_path(path: str, old_order: List[str], new_order: List[str]) -> str: raise ValueError(f"node {path} only has depth {len(parts)}") new_order_indices = [new_order.index(el) for el in old_order] - reordered_parts = [parts[i] for i in new_order_indices] - return str(NodePath(reordered_parts)) @@ -1368,11 +1405,13 @@ def reorder(self, ordering: str) -> DataTree: Examples -------- """ - old_order, new_order = _parse_ordering(ordering) + old_symbolic_order, new_symbolic_order = _parse_symbolic_ordering(ordering) # only re-order the subtree, and return a new copy, to avoid messing up parents of this node reordered_dict = { - _reorder_path(node.path.relative_to(self), old_order, new_order): node.ds + _reorder_path( + node.relative_to(self), old_symbolic_order, new_symbolic_order + ): node.ds for node in self.subtree } diff --git a/datatree/tests/test_datatree.py b/datatree/tests/test_datatree.py index 9dd601c8..8646b30e 100644 --- a/datatree/tests/test_datatree.py +++ b/datatree/tests/test_datatree.py @@ -645,6 +645,44 @@ def test_assign(self): dtt.assert_equal(result, expected) +class TestReorder: + @pytest.mark.parametrize( + "in_dict, order, expected", + [ + ({"a": None}, "a -> a", {"a": None}), + ({"a/b": None}, "a/b -> b/a", {"b/a": None}), + ], + ) + def test_reorder(self, in_dict, order, expected): + dt = DataTree.from_dict(in_dict) + out_dict = dt.reorder("a/b -> b/a").to_dict() + assert out_dict == expected + + def test_invalid_order(self): + dt = DataTree.from_dict({"A/B/C": None}) + + with pytest.raises(ValueError, match="Invalid symbolic reordering"): + dt.reorder("a") + + with pytest.raises(ValueError, match="Invalid symbolic reordering"): + dt.reorder("a->") + + with pytest.raises(ValueError, match="depth of the symbolic path on each side must be equal"): + dt.reorder("a->a/b") + + with pytest.raises(ValueError, match="must be present on both sides"): + dt.reorder("a->b") + + with pytest.raises(ValueError, match="must appear only once"): + dt.reorder("a/a/b->b/a/a") + + def test_not_deep_enough(self): + dt = DataTree.from_dict({"A/B/C": None}) + + with pytest.raises(ValueError, match="node X only has depth Y"): + dt.reorder("a/b->b/a") + + class TestPipe: def test_noop(self, create_test_datatree): dt = create_test_datatree() From bac89b2a00aa2de676bcff795fbac60c3bdfcd63 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 25 Oct 2023 15:28:31 +0000 Subject: [PATCH 04/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- datatree/tests/test_datatree.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datatree/tests/test_datatree.py b/datatree/tests/test_datatree.py index 8646b30e..bce8440b 100644 --- a/datatree/tests/test_datatree.py +++ b/datatree/tests/test_datatree.py @@ -667,7 +667,9 @@ def test_invalid_order(self): with pytest.raises(ValueError, match="Invalid symbolic reordering"): dt.reorder("a->") - with pytest.raises(ValueError, match="depth of the symbolic path on each side must be equal"): + with pytest.raises( + ValueError, match="depth of the symbolic path on each side must be equal" + ): dt.reorder("a->a/b") with pytest.raises(ValueError, match="must be present on both sides"): From d525aad90cef8931e4ffadb2ba5f282a6fac17ac Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 25 Oct 2023 11:51:13 -0400 Subject: [PATCH 05/12] whatsnew --- docs/source/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/source/whats-new.rst b/docs/source/whats-new.rst index eb2c0340..5d5765f8 100644 --- a/docs/source/whats-new.rst +++ b/docs/source/whats-new.rst @@ -23,6 +23,9 @@ v0.0.13 (unreleased) New Features ~~~~~~~~~~~~ +- New :py:meth:`DataTree.reorder` method for re-ordering levels of all nodes in the tree according to a + symbolic pattern such as ``a/b->b/a``. (:pull:`271`) + By `Tom Nicholas `_. - New :py:meth:`DataTree.match` method for glob-like pattern matching of node paths. (:pull:`267`) By `Tom Nicholas `_. - Indicate which node caused the problem if error encountered while applying user function using :py:func:`map_over_subtree` From 9faa3c726085ffc1c9337214ee6359be75de42a7 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 25 Oct 2023 11:54:42 -0400 Subject: [PATCH 06/12] add to API --- docs/source/api.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/api.rst b/docs/source/api.rst index 54a98639..1efab089 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -104,6 +104,7 @@ For manipulating, traversing, navigating, or mapping over the tree structure. DataTree.pipe DataTree.match DataTree.filter + DataTree.reorder DataTree Contents ----------------- From 4bfec94ea8c08d08f030efe12675d12cb10e1da7 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 25 Oct 2023 13:28:35 -0400 Subject: [PATCH 07/12] check for hollowness --- datatree/datatree.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/datatree/datatree.py b/datatree/datatree.py index 4246151e..69866d41 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -1373,9 +1373,11 @@ def match(self, pattern: str) -> DataTree: def reorder(self, ordering: str) -> DataTree: """ - Reorder levels of all nodes in this subtree by rearranging the parts of each of their paths. + Reorder levels of all leaf nodes in this subtree by rearranging the parts of each of their paths. - In general this operation will preserve the depth of every node (and hence depth of the whole subtree), + Raises an error on non-hollow trees. + + In general this operation will preserve the depth of each leaf node (and hence depth of the whole subtree), but will not preserve the width at any level. Parameters @@ -1405,6 +1407,10 @@ def reorder(self, ordering: str) -> DataTree: Examples -------- """ + if not self.is_hollow: + # TODO can we relax this restriction to only raising if a data-filled node would be moved? + raise ValueError("Only hollow trees can be unambiguously reordered.") + old_symbolic_order, new_symbolic_order = _parse_symbolic_ordering(ordering) # only re-order the subtree, and return a new copy, to avoid messing up parents of this node @@ -1412,7 +1418,7 @@ def reorder(self, ordering: str) -> DataTree: _reorder_path( node.relative_to(self), old_symbolic_order, new_symbolic_order ): node.ds - for node in self.subtree + for node in self.leaves } return DataTree.from_dict(reordered_dict) From 96cfd4f9ebf5569265deed435c85efea5b4a3af4 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 25 Oct 2023 14:42:42 -0400 Subject: [PATCH 08/12] better error messages --- datatree/datatree.py | 48 ++++++++++++++++----------------- datatree/tests/test_datatree.py | 12 ++++----- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/datatree/datatree.py b/datatree/datatree.py index 07a38046..80f02383 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -102,35 +102,30 @@ def _parse_symbolic_ordering(ordering: str) -> Tuple[List[str], List[str]]: f" parts." ) - # Check every symbol appears only once - if len(set(old_symbolic_order)) < len(old_symbolic_order): - # TODO - repeated_symbols = ... + # Check every symbol appears on both sides + unmatched_symbols = set(old_symbolic_order).symmetric_difference(new_symbolic_order) + if unmatched_symbols: raise ValueError( - "Invalid symbolic reordering. Each symbol must appear only once on each side, " - f"but the symbols {repeated_symbols} appear more than once in the left-hand side." - ) - if len(set(new_symbolic_order)) < len(new_symbolic_order): - # TODO - repeated_symbols = ... - raise ValueError( - "Invalid symbolic reordering. Each symbol must appear only once on each side, " - f"but the symbols {repeated_symbols} appear more than once in the right-hand side." + "Invalid symbolic reordering. Every symbol must be present on both sides, but " + f"the symbols {unmatched_symbols} are only present on one side." ) - # Check every symbol appears on both sides - all_symbols = set(old_symbolic_order).union(set(new_symbolic_order)) - if len(set(old_symbolic_order)) < len(all_symbols): - unmatched_symbols = all_symbols - set(old_symbolic_order) + # Check each symbol appears only once on each side + repeated_symbols_in_old_order = set( + sym for sym in old_symbolic_order if old_symbolic_order.count(sym) > 1 + ) + if repeated_symbols_in_old_order: raise ValueError( - "Invalid symbolic reordering. Every symbol must be present on both sides, but" - f"the symbols {unmatched_symbols} are only present on the right-hand side." + "Invalid symbolic reordering. Each symbol must appear only once on each side, " + f"but the symbols {repeated_symbols_in_old_order} appear more than once in the left-hand side." ) - if len(set(new_symbolic_order)) < len(all_symbols): - unmatched_symbols = all_symbols - set(new_symbolic_order) + repeated_symbols_in_new_order = set( + sym for sym in new_symbolic_order if new_symbolic_order.count(sym) > 1 + ) + if repeated_symbols_in_new_order: raise ValueError( - "Invalid symbolic reordering. Every symbol must be present on both sides, but" - f"the symbols {unmatched_symbols} are only present on the left-hand side." + "Invalid symbolic reordering. Each symbol must appear only once on each side, " + f"but the symbols {repeated_symbols_in_new_order} appear more than once in the right-hand side." ) return old_symbolic_order, new_symbolic_order @@ -141,7 +136,10 @@ def _reorder_path(path: str, old_order: List[str], new_order: List[str]) -> str: parts = NodePath(path).parts if len(old_order) > len(parts): - raise ValueError(f"node {path} only has depth {len(parts)}") + raise ValueError( + f"Node {path} only has depth {len(parts)}, " + f"but the reordering requires depth >= {len(old_order)}" + ) new_order_indices = [new_order.index(el) for el in old_order] reordered_parts = [parts[i] for i in new_order_indices] @@ -1416,6 +1414,8 @@ def reorder(self, ordering: str) -> DataTree: # TODO can we relax this restriction to only raising if a data-filled node would be moved? raise ValueError("Only hollow trees can be unambiguously reordered.") + # TODO do we require the root to have a name if we are to reorder from the root? + old_symbolic_order, new_symbolic_order = _parse_symbolic_ordering(ordering) # only re-order the subtree, and return a new copy, to avoid messing up parents of this node diff --git a/datatree/tests/test_datatree.py b/datatree/tests/test_datatree.py index b2164dd8..c7e6833b 100644 --- a/datatree/tests/test_datatree.py +++ b/datatree/tests/test_datatree.py @@ -682,17 +682,17 @@ def test_invalid_order(self): ): dt.reorder("a->a/b") - with pytest.raises(ValueError, match="must be present on both sides"): + with pytest.raises(ValueError, match="only present on one side"): dt.reorder("a->b") - with pytest.raises(ValueError, match="must appear only once"): - dt.reorder("a/a/b->b/a/a") + with pytest.raises(ValueError, match="symbols {'a'} appear more than once"): + dt.reorder("a/a/b->a/b/b") def test_not_deep_enough(self): - dt = DataTree.from_dict({"A/B/C": None}) + dt = DataTree.from_dict({"A": None}) - with pytest.raises(ValueError, match="node X only has depth Y"): - dt.reorder("a/b->b/a") + with pytest.raises(ValueError, match="Node A only has depth 1"): + dt.reorder("a/b/c->c/b/a") class TestPipe: From 1a458241c823352da335b17cef0304689d7a8699 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 25 Oct 2023 16:08:04 -0400 Subject: [PATCH 09/12] works for cases where path length = symboic depth --- datatree/datatree.py | 2 +- datatree/tests/test_datatree.py | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/datatree/datatree.py b/datatree/datatree.py index 80f02383..07b65492 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -143,7 +143,7 @@ def _reorder_path(path: str, old_order: List[str], new_order: List[str]) -> str: new_order_indices = [new_order.index(el) for el in old_order] reordered_parts = [parts[i] for i in new_order_indices] - return str(NodePath(reordered_parts)) + return str(NodePath(*reordered_parts)) def _coerce_to_dataset(data: Dataset | DataArray | None) -> Dataset: diff --git a/datatree/tests/test_datatree.py b/datatree/tests/test_datatree.py index c7e6833b..3bb595a7 100644 --- a/datatree/tests/test_datatree.py +++ b/datatree/tests/test_datatree.py @@ -657,16 +657,18 @@ def test_assign(self): class TestReorder: @pytest.mark.parametrize( - "in_dict, order, expected", + "in_dict, reordering, expected_dict", [ - ({"a": None}, "a -> a", {"a": None}), - ({"a/b": None}, "a/b -> b/a", {"b/a": None}), + ({"A": xr.Dataset()}, "a->a", {"A": xr.Dataset()}), + ({"A/B": xr.Dataset()}, "a/b->b/a", {"B/A": xr.Dataset()}), + ({"A/B/C": xr.Dataset()}, "a/b/c->c/b/a", {"C/B/A": xr.Dataset()}), ], ) - def test_reorder(self, in_dict, order, expected): + def test_reorder(self, in_dict, reordering, expected_dict): dt = DataTree.from_dict(in_dict) - out_dict = dt.reorder("a/b -> b/a").to_dict() - assert out_dict == expected + result = dt.reorder(reordering) + expected = DataTree.from_dict(expected_dict) + dtt.assert_equal(result, expected) def test_invalid_order(self): dt = DataTree.from_dict({"A/B/C": None}) From 650905513f271e65e440f07650e726629ac03892 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 25 Oct 2023 16:10:13 -0400 Subject: [PATCH 10/12] test that error raised if tree not hollow --- datatree/tests/test_datatree.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/datatree/tests/test_datatree.py b/datatree/tests/test_datatree.py index 3bb595a7..c5d24e3b 100644 --- a/datatree/tests/test_datatree.py +++ b/datatree/tests/test_datatree.py @@ -690,12 +690,17 @@ def test_invalid_order(self): with pytest.raises(ValueError, match="symbols {'a'} appear more than once"): dt.reorder("a/a/b->a/b/b") - def test_not_deep_enough(self): + def test_invalid_tree(self): dt = DataTree.from_dict({"A": None}) with pytest.raises(ValueError, match="Node A only has depth 1"): dt.reorder("a/b/c->c/b/a") + dt = DataTree.from_dict({"A": xr.Dataset({'t': 1}), "A/B": None}) + + with pytest.raises(ValueError, match="Only hollow trees"): + dt.reorder("a/b->b/a") + class TestPipe: def test_noop(self, create_test_datatree): From 5e429c48ceb9e71bbb939aca3a92915c61fc62d8 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 25 Oct 2023 16:17:16 -0400 Subject: [PATCH 11/12] satisfy pre-commit --- datatree/datatree.py | 2 +- datatree/tests/test_datatree.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datatree/datatree.py b/datatree/datatree.py index 07b65492..a3315594 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -81,7 +81,7 @@ _SYMBOLIC_NODE_NAME = r"\w+" -_SYMBOLIC_NODEPATH = f"\/?{_SYMBOLIC_NODE_NAME}(\/{_SYMBOLIC_NODE_NAME})*\/?" +_SYMBOLIC_NODEPATH = rf"\/?{_SYMBOLIC_NODE_NAME}(\/{_SYMBOLIC_NODE_NAME})*\/?" _SYMBOLIC_REORDERING = f"^{_SYMBOLIC_NODEPATH}->{_SYMBOLIC_NODEPATH}$" diff --git a/datatree/tests/test_datatree.py b/datatree/tests/test_datatree.py index c5d24e3b..211cbaf7 100644 --- a/datatree/tests/test_datatree.py +++ b/datatree/tests/test_datatree.py @@ -696,7 +696,7 @@ def test_invalid_tree(self): with pytest.raises(ValueError, match="Node A only has depth 1"): dt.reorder("a/b/c->c/b/a") - dt = DataTree.from_dict({"A": xr.Dataset({'t': 1}), "A/B": None}) + dt = DataTree.from_dict({"A": xr.Dataset({"t": 1}), "A/B": None}) with pytest.raises(ValueError, match="Only hollow trees"): dt.reorder("a/b->b/a") From 045ab30634f5e2348cbebfd9f9f21e880a024cd0 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 25 Oct 2023 22:55:42 -0400 Subject: [PATCH 12/12] add example to docstring --- datatree/datatree.py | 32 ++++++++++++++++++++++++++++++- datatree/tests/test_datatree.py | 5 +++++ docs/source/hierarchical-data.rst | 4 ++-- 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/datatree/datatree.py b/datatree/datatree.py index a3315594..ec7ffb4b 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -1409,6 +1409,32 @@ def reorder(self, ordering: str) -> DataTree: Examples -------- + >>> dt = DataTree.from_dict( + ... {"A/B1": xr.Dataset({"x": 1}), "A/B2": xr.Dataset({"x": 2})} + ... ) + >>> dt + DataTree('None', parent=None) + └── DataTree('A') + ├── DataTree('B1') + │ Dimensions: () + │ Data variables: + │ x int64 1 + └── DataTree('B2') + Dimensions: () + Data variables: + x int64 2 + >>> dt.reorder("a/b->b/a") + DataTree('None', parent=None) + ├── DataTree('B1') + │ └── DataTree('A') + │ Dimensions: () + │ Data variables: + │ x int64 1 + └── DataTree('B2') + └── DataTree('A') + Dimensions: () + Data variables: + x int64 2 """ if not self.is_hollow: # TODO can we relax this restriction to only raising if a data-filled node would be moved? @@ -1423,9 +1449,13 @@ def reorder(self, ordering: str) -> DataTree: _reorder_path( node.relative_to(self), old_symbolic_order, new_symbolic_order ): node.ds - for node in self.leaves + for node in self.leaves # hollow trees are defined entirely by their leaves } + if self.depth > len(new_symbolic_order): + # TODO implement this + raise NotImplementedError() + return DataTree.from_dict(reordered_dict) def map_over_subtree( diff --git a/datatree/tests/test_datatree.py b/datatree/tests/test_datatree.py index 211cbaf7..1629a308 100644 --- a/datatree/tests/test_datatree.py +++ b/datatree/tests/test_datatree.py @@ -662,6 +662,11 @@ class TestReorder: ({"A": xr.Dataset()}, "a->a", {"A": xr.Dataset()}), ({"A/B": xr.Dataset()}, "a/b->b/a", {"B/A": xr.Dataset()}), ({"A/B/C": xr.Dataset()}, "a/b/c->c/b/a", {"C/B/A": xr.Dataset()}), + ( + {"A/B1": xr.Dataset({"x": 1}), "A/B2": xr.Dataset({"x": 2})}, + "a/b->b/a", + {"B1/A": xr.Dataset({"x": 1}), "B2/A": xr.Dataset({"x": 2})}, + ), ], ) def test_reorder(self, in_dict, reordering, expected_dict): diff --git a/docs/source/hierarchical-data.rst b/docs/source/hierarchical-data.rst index 51bcea56..c1ca1ba9 100644 --- a/docs/source/hierarchical-data.rst +++ b/docs/source/hierarchical-data.rst @@ -25,11 +25,11 @@ Many real-world datasets are composed of multiple differing components, and it can often be be useful to think of these in terms of a hierarchy of related groups of data. Examples of data which one might want organise in a grouped or hierarchical manner include: -- Simulation data at multiple resolutions, +- Simulation data at multiple resolutions, or using multiple models, - Observational data about the same system but from multiple different types of sensors, - Mixed experimental and theoretical data, - A systematic study recording the same experiment but with different parameters, -- Heterogenous data, such as demographic and metereological data, +- Heterogeneous data, such as demographic and meteorological data, or even any combination of the above.