Skip to content

Commit e2236be

Browse files
authored
Generalise support for netcdf load+save of "special" attributes (#6566)
* First workings. * Tweaks, add tests for StashHandler. * Fix existing tests for more restricted attribute content. * Fix existing tests for more restricted attribute content. * Implement generalised handled attribute loading. * Fix type handling in attribute handlers, but catch+warn errors in load/save code, not handlers. * Catch+warn conversion errors in loading; GRIB_PARAM is local even without split-attrs. * Add integration tests for managed attribute load+save. * Add special testcases for stash alternate names. * Use thread-save nc.Dataset wrappers for tests. * Place all load/save warnings into proper IrisUserWarning subcategories. * Added specific tests for UkmoProcessFlags and GribParam attribute handlers. * Exclude 'um_stash_source' from roundtrip attribute testing. * Minor changes to match existing tested behaviours. * Further small test and behaviour alignment. * Save a 'STASH' attribute when stash translation fails. * Make iris-grib optional for tests. * Confine split-attrs usage. * Remove some redundant code and obsolete tests. * Don't need typing-derived Dict/List/Tuple. * Firm up handler typing. * Remove redundant arg in handler.decode_attribute method. Fixup * Rationalise access to the iris/netcdf attribute names of handlers. * Add whatsnew.
1 parent 41876b4 commit e2236be

File tree

13 files changed

+1100
-93
lines changed

13 files changed

+1100
-93
lines changed

docs/src/whatsnew/latest.rst

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ This document explains the changes made to Iris for this release
7272
real data but will leave the :class:`iris.MeshCoord` (and attached mesh) lazy. (:issue:`4757`, :pull:`6405`)
7373

7474
#. `@pp-mo`_ made it possible for the reference surfaces of derived coordinates, like orography, to be lazy.
75-
(:pull: 6517).
75+
(:pull:`6517`).
7676

7777
#. `@HGWright`_ and `@pp-mo`_ enabled correct loading and saving of the bounds of CF
7878
parametric coordinates (that is, Iris derived coordinates). This was previously
@@ -84,6 +84,11 @@ This document explains the changes made to Iris for this release
8484
to also include any auxiliary coordinates and ancillary variables with the same ``shape``.
8585
(:issue:`6539`, :pull:`6552`)
8686

87+
#. `@pp-mo`_ added support for saving and loading the special ``GRIB_PARAM`` attributes to netcdf, as used
88+
by iris-grib to record the exact grib-file encoding of phenomenon types. This means that data sourced
89+
from GRIB grib files can be freely saved and re-loaded to netcdf without loss of information.
90+
(`Issue Iris-grib#596 <https://github.com/SciTools/iris-grib/issues/596>`__, :pull:`6566`).
91+
8792

8893
🐛 Bugs Fixed
8994
=============
@@ -185,6 +190,10 @@ This document explains the changes made to Iris for this release
185190
#. `@DarkVoyager11`_ added a round trip integration test for NetCDF calendar attributes.
186191
(:issue:`2985`, :pull:`6562`)
187192

193+
#. `@pp-mo`_ made a unified mechanism for 'managed' cube attributes: ones which get
194+
converted between an iris-internal and an in-file form for saving/loading to netcdf,
195+
such as STASH objects in a STASH attribute.
196+
(:pull:`6566`).
188197

189198

190199
.. comment

lib/iris/fileformats/_nc_load_rules/actions.py

Lines changed: 55 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444

4545
from iris.config import get_logger
4646
import iris.fileformats.cf
47-
import iris.fileformats.pp as pp
4847
from iris.loading import LOAD_PROBLEMS, LoadProblems
4948
import iris.warnings
5049

@@ -533,51 +532,66 @@ def action_build_auxiliary_coordinate(engine, auxcoord_fact):
533532

534533

535534
@action_function
536-
def action_ukmo_stash(engine):
537-
"""Convert 'ukmo stash' cf property into a cube attribute."""
538-
rule_name = "fc_attribute_ukmo__um_stash_source"
539-
var = engine.cf_var
540-
attr_name = "ukmo__um_stash_source"
541-
attr_value = getattr(var, attr_name, None)
542-
if attr_value is None:
543-
attr_name = "um_stash_source" # legacy form
544-
attr_value = getattr(var, attr_name, None)
545-
if attr_value is None:
546-
rule_name += "(NOT-TRIGGERED)"
547-
else:
548-
# No helper routine : just do it
549-
try:
550-
stash_code = pp.STASH.from_msi(attr_value)
551-
except (TypeError, ValueError):
552-
engine.cube.attributes[attr_name] = attr_value
553-
msg = (
554-
"Unable to set attribute STASH as not a valid MSI "
555-
f'string "mXXsXXiXXX", got "{attr_value}"'
556-
)
557-
logger.debug(msg)
558-
else:
559-
engine.cube.attributes["STASH"] = stash_code
560-
535+
def action_managed_attribute(engine, attr_name, attr_value):
536+
"""Record a managed attribute, as successfully translated."""
537+
rule_name = f"fc_special_attribute__{attr_name}"
538+
engine.cube.attributes[attr_name] = attr_value
561539
return rule_name
562540

563541

564542
@action_function
565-
def action_ukmo_processflags(engine):
566-
"""Convert 'ukmo process flags' cf property into a cube attribute."""
567-
rule_name = "fc_attribute_ukmo__process_flags"
568-
var = engine.cf_var
569-
attr_name = "ukmo__process_flags"
570-
attr_value = getattr(var, attr_name, None)
571-
if attr_value is None:
572-
rule_name += "(NOT-TRIGGERED)"
573-
else:
574-
# No helper routine : just do it
575-
flags = [x.replace("_", " ") for x in attr_value.split(" ")]
576-
engine.cube.attributes["ukmo__process_flags"] = tuple(flags)
577-
543+
def action_unmanaged_attribute(engine, attr_name, attr_value):
544+
"""Record the original attribute, when translation of a managed one failed."""
545+
rule_name = f"fc_special_attribute__fallback__{attr_name}"
546+
engine.cube.attributes[attr_name] = attr_value
578547
return rule_name
579548

580549

550+
def action_all_managed_attributes(engine):
551+
"""Check for and convert all 'handled' attributes."""
552+
from iris.fileformats.netcdf._attribute_handlers import ATTRIBUTE_HANDLERS
553+
554+
var = engine.cf_var
555+
for handler in ATTRIBUTE_HANDLERS.values():
556+
# Each handler can have several match names, but ideally only 0 or 1 appears !
557+
iris_name = handler.iris_name
558+
matches = []
559+
for match_name in handler.netcdf_names:
560+
match_value = getattr(var, match_name, None)
561+
if match_value is not None:
562+
matches.append((match_name, match_value))
563+
564+
if len(matches) > 1:
565+
msg = (
566+
f"Multiple file attributes would set the iris '.{iris_name}' cube "
567+
"attribute:"
568+
+ "".join(f"\n {name!r}: {val!r}" for name, val in matches)
569+
+ "\n- only the first of these is actioned."
570+
)
571+
warnings.warn(msg, category=_WarnComboLoadIgnoring)
572+
573+
if len(matches) > 0:
574+
# Take the first as priority
575+
input_name, input_value = matches[0]
576+
try:
577+
iris_value = handler.decode_attribute(input_value)
578+
# process as a rule
579+
action_managed_attribute(engine, iris_name, iris_value)
580+
581+
except (ValueError, TypeError):
582+
msg = (
583+
f"Invalid content for managed attribute name {match_name!r} "
584+
f"= {input_value!r}: The attribute is retained untranslated, which "
585+
"may not re-save correctly."
586+
)
587+
warnings.warn(msg, category=iris.warnings.IrisLoadWarning)
588+
589+
# ALSO record the attribute on the cube since, now it has been fetched
590+
# by the CF interpreting code, it will be discounted from inclusion.
591+
# Since translation failed, record as original name=value.
592+
action_unmanaged_attribute(engine, input_name, input_value)
593+
594+
581595
@action_function
582596
def action_build_cell_measure(engine, cellm_fact):
583597
"""Convert a CFCellMeasureVariable into a cube cell-measure."""
@@ -693,10 +707,9 @@ def run_actions(engine):
693707
for auxcoord_fact in auxcoord_facts:
694708
action_build_auxiliary_coordinate(engine, auxcoord_fact)
695709

696-
# Detect + process and special 'ukmo' attributes
710+
# Detect + process and special handling attributes
697711
# Run on every cube : they choose themselves whether to trigger.
698-
action_ukmo_stash(engine)
699-
action_ukmo_processflags(engine)
712+
action_all_managed_attributes(engine)
700713

701714
# cell measures
702715
cellm_facts = engine.fact_list("cell_measure")
Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
# Copyright Iris contributors
2+
#
3+
# This file is part of Iris and is released under the BSD license.
4+
# See LICENSE in the root of the repository for full licensing details.
5+
"""NetCDF attribute translations for Iris attributes with special convenience types.
6+
7+
These are things which are stored differently in an Iris cube attribute from how they
8+
are actually stored in a netcdf file. E.G. a STASH code is stored as a special object,
9+
but in a file it is just a string.
10+
11+
These conversions are intended to be automatic and lossless, like a serialization.
12+
13+
At present, there are 3 of these :
14+
* "STASH": records/controls the exact file encoding of data loaded from or saved to
15+
UM file formats (PP/FF).
16+
* "GRIB_PARAM": does the same for GRIB data (using iris_grib).
17+
* "ukmo__process_flags": internally a tuple of strings, but stored as a single string
18+
with underscore separators.
19+
20+
"""
21+
22+
from abc import ABCMeta, abstractmethod
23+
from typing import Any
24+
25+
from iris.fileformats.pp import STASH
26+
27+
28+
class AttributeHandler(metaclass=ABCMeta):
29+
#: The user-visible attribute name used within Iris, which identifies attributes
30+
# which we should attempt to encode with this coder.
31+
_IrisIdentifyingName: str = ""
32+
#: The storage name(s) which identify this type of data in actual files, which thus
33+
# identify attributes which we should attempt to decode with this coder.
34+
# NOTES:
35+
# (1) for load, in (presumably extremely rare) case of multiples appearing, "the"
36+
# internal attribute is taken from the earliest appearing name: The other values
37+
# are lost, and a warning will be issued.
38+
# (2) for save ,the attribute name is dynamically determined by the "encode" call.
39+
# On translation failure, however, we assume it is the last name listed -- since
40+
# it is so for StashHandler, the only one it currently matters for.
41+
_NetcdfIdentifyingNames: list[str] = []
42+
43+
@property
44+
def iris_name(self) -> str:
45+
"""Provide the iris attribute name which this handler deals with.
46+
47+
Read-only access to the information configured at the class-level.
48+
"""
49+
return self._IrisIdentifyingName
50+
51+
@property
52+
def netcdf_names(self) -> list[str]:
53+
"""Provide the netcdf attribute name(s) which this handler deals with.
54+
55+
Read-only access to the information configured at the class-level.
56+
"""
57+
# N.B. return a list copy to avoid any possibility of in-place change !
58+
return list(self._NetcdfIdentifyingNames)
59+
60+
@property
61+
def _primary_nc_name(self):
62+
"""The "usual" file attribute name."""
63+
# N.B. for now, this only matters for STASH, so take the *last* name. Because
64+
# the first name is dominant, but that is the 'legacy' version.
65+
return self._NetcdfIdentifyingNames[-1]
66+
67+
@abstractmethod
68+
def encode_object(self, content: Any) -> tuple[str, str]:
69+
"""Encode an object as an attribute name and value.
70+
71+
We already do change the name of STASH attributes to "um_stash_source" on save
72+
(as-of Iris 3.12). This structure also allows that we might produce different
73+
names for different codes.
74+
75+
The 'content' may be a custom object or string equivalent, depending on what
76+
specific implementation allows.
77+
78+
This should raise TypeError or ValueError if 'content' is unsuitable.
79+
"""
80+
pass
81+
82+
@abstractmethod
83+
def decode_attribute(self, attr_value: Any) -> Any:
84+
"""Decode an attribute name and value into the appropriate attribute object.
85+
86+
The 'value' is typically a string, but possibly other attribute content types,
87+
depending on the specific implementation.
88+
89+
This should raise TypeError or ValueError if 'value' is unsuitable.
90+
"""
91+
pass
92+
93+
94+
class StashHandler(AttributeHandler):
95+
"""Convert STASH object attribute to/from a netcdf string attribute."""
96+
97+
_IrisIdentifyingName = "STASH"
98+
# Note: two possible in-file attribute names, the first one is a 'legacy' version
99+
# but takes priority in a conflict.
100+
_NetcdfIdentifyingNames = ["ukmo__um_stash_source", "um_stash_source"]
101+
102+
def encode_object(self, stash: Any) -> tuple[str, str]:
103+
if isinstance(stash, STASH):
104+
stash_object = stash
105+
elif isinstance(stash, str):
106+
# Attempt to convert as an MSI string to a STASH object.
107+
# NB this will normalise the content.
108+
stash_object = STASH.from_msi(stash)
109+
else:
110+
msg = (
111+
f"Invalid STASH attribute can not be written to netcdf file: {stash!r}. "
112+
"Can only be a 'iris.fileformats.pp.STASH' object, or a string of the "
113+
"form 'mXXsXXiXXX', where XX are decimal numbers."
114+
)
115+
raise TypeError(msg)
116+
117+
msi_string = str(stash_object) # convert to standard MSI string representation
118+
# We always write "um_stash_source", not the legacy one.
119+
return self._primary_nc_name, msi_string
120+
121+
def decode_attribute(self, attr_value: Any) -> Any:
122+
# In this case the attribute name does not matter.
123+
from iris.fileformats.pp import STASH
124+
125+
attr_value = str(attr_value)
126+
return STASH.from_msi(attr_value)
127+
128+
129+
class UkmoProcessFlagsHandler(AttributeHandler):
130+
"""Convert ukmo__process_flags tuple attribute to/from a netcdf string attribute."""
131+
132+
_IrisIdentifyingName = "ukmo__process_flags"
133+
_NetcdfIdentifyingNames = ["ukmo__process_flags"]
134+
135+
def encode_object(self, value: Any) -> tuple[str, str]:
136+
if not isinstance(value, tuple) or any(
137+
not isinstance(elem, str) for elem in value
138+
):
139+
msg = (
140+
f"Invalid 'ukmo__process_flags' attribute : {value!r}. "
141+
"Must be a tuple of str."
142+
)
143+
raise TypeError(msg)
144+
145+
def value_fix(value):
146+
value = value.replace(" ", "_")
147+
if value == "":
148+
# Special handling for an empty string entry, which otherwise upsets
149+
# the split/join process.
150+
value = "<EMPTY>"
151+
return value
152+
153+
value = " ".join([value_fix(x) for x in value])
154+
return self._primary_nc_name, value
155+
156+
def decode_attribute(self, attr_value: Any) -> Any:
157+
# In this case the attribute name does not matter.
158+
attr_value = str(attr_value)
159+
160+
def value_unfix(value):
161+
value = value.replace("_", " ")
162+
if value == "<EMPTY>":
163+
# A special placeholder flagging where the original was an empty string.
164+
value = ""
165+
return value
166+
167+
if attr_value == "":
168+
# This is basically a fix for the odd behaviour of 'str.split'.
169+
flags = []
170+
else:
171+
flags = [value_unfix(x) for x in attr_value.split(" ")]
172+
173+
return tuple(flags)
174+
175+
176+
class GribParamHandler(AttributeHandler):
177+
"""Convert iris_grib GRIB_PARAM object attribute to/from a netcdf string attribute.
178+
179+
Use the mechanisms in iris_grib.
180+
"""
181+
182+
_IrisIdentifyingName = "GRIB_PARAM"
183+
_NetcdfIdentifyingNames = ["GRIB_PARAM"]
184+
185+
def encode_object(self, iris_value: Any) -> Any:
186+
# 'iris_value' is typically an
187+
# iris_grib.grib_phenom_translation._gribcode.GenericConcreteGRIBCode
188+
# Not typing this, as we need iris_grib to remain an optional import.
189+
from iris_grib.grib_phenom_translation._gribcode import (
190+
GenericConcreteGRIBCode,
191+
GRIBCode,
192+
)
193+
194+
if isinstance(iris_value, GenericConcreteGRIBCode):
195+
gribcode = iris_value
196+
else:
197+
# Create a gribcode from that.
198+
# N.B. (1) implicitly uses str() to convert the arg
199+
# N.B. (2) can fail : let it, caller deals with this !
200+
gribcode = GRIBCode(iris_value)
201+
202+
# The correct file attribute is the repr of a GRIBCode object.
203+
grib_string = repr(gribcode)
204+
return self._primary_nc_name, grib_string
205+
206+
def decode_attribute(self, attr_value: Any) -> Any:
207+
from iris_grib.grib_phenom_translation._gribcode import GRIBCode
208+
209+
# As above, a str() conversion is implied here.
210+
result = GRIBCode(attr_value)
211+
return result
212+
213+
214+
# Define the available attribute handlers.
215+
ATTRIBUTE_HANDLERS: dict[str, AttributeHandler] = {}
216+
217+
218+
def _add_handler(handler: AttributeHandler):
219+
ATTRIBUTE_HANDLERS[handler._IrisIdentifyingName] = handler
220+
221+
222+
# Always include the "STASH" and "ukmo__process_flags" handlers.
223+
_add_handler(StashHandler())
224+
_add_handler(UkmoProcessFlagsHandler())
225+
226+
try:
227+
import iris_grib # noqa: F401
228+
229+
# If iris-grib is available, also include the "GRIB_PARAM" handler.
230+
_add_handler(GribParamHandler())
231+
232+
except ImportError:
233+
pass

0 commit comments

Comments
 (0)