From 975ca7a95e8555849056954633d0df9d10d67813 Mon Sep 17 00:00:00 2001 From: Orsiris de Jong Date: Fri, 18 Jul 2025 15:39:14 +0200 Subject: [PATCH 1/3] Add nvme-cli 2.3 compat Signed-off-by: Orsiris de Jong --- nvme_metrics.py | 43 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/nvme_metrics.py b/nvme_metrics.py index c7dbaec..5f27918 100755 --- a/nvme_metrics.py +++ b/nvme_metrics.py @@ -131,6 +131,21 @@ } +def nvme_has_verbose(): + """ + Old nvme-cli versions like 2.3 on Debian 12 don't have --verbose for smart-log command + We need to check if --verbose is supported. This command will report usage to stderr + Consider we have a recent version if something goes wrong + """ + try: + result = subprocess.run(["nvme", "smart-log", "--help"], check=False, capture_output=True) + if "--verbose" not in str(result.stderr): + return False + return True + except subprocess.CalledProcessError: + return True + + def exec_nvme(*args): """ Execute nvme CLI tool with specified arguments and return captured stdout result. Set LC_ALL=C @@ -141,7 +156,7 @@ def exec_nvme(*args): return subprocess.check_output(cmd, stderr=subprocess.PIPE, env=dict(os.environ, LC_ALL="C")) -def exec_nvme_json(*args): +def exec_nvme_json(*args, has_verbose): """ Execute nvme CLI tool with specified arguments and return parsed JSON output. """ @@ -149,7 +164,12 @@ def exec_nvme_json(*args): # be verbose. Older versions of nvme-cli optionally produced verbose output if the --verbose # flag was specified. In order to avoid having to handle two different JSON schemas, always # add the --verbose flag. - output = exec_nvme(*args, "--output-format", "json", "--verbose") + # Note2: nvme-cli 2.3 that ships with Debian 12 has no verbose parameter for smart-log command only + + if "smart-log" in args and not has_verbose: + output = exec_nvme(*args, "--output-format", "json") + else: + output = exec_nvme(*args, "--output-format", "json", "--verbose") return json.loads(output) @@ -161,7 +181,8 @@ def main(): cli_version = "unknown" metrics["nvmecli"].labels(cli_version).set(1) - device_list = exec_nvme_json("list") + has_verbose = nvme_has_verbose() + device_list = exec_nvme_json("list", has_verbose=has_verbose) for device in device_list["Devices"]: for subsys in device["Subsystems"]: @@ -187,7 +208,9 @@ def main(): # FIXME: The smart-log should only need to be fetched once per controller, not # per namespace. However, in order to preserve legacy metric labels, fetch it # per namespace anyway. Most consumer grade SSDs will only have one namespace. - smart_log = exec_nvme_json("smart-log", os.path.join("/dev", device_name)) + smart_log = exec_nvme_json( + "smart-log", os.path.join("/dev", device_name), has_verbose=has_verbose + ) # Various counters in the NVMe specification are 128-bit, which would have to # discard resolution if converted to a JSON number (i.e., float64_t). Instead, @@ -208,9 +231,14 @@ def main(): metrics["avail_spare"].labels(device_name).set(smart_log["avail_spare"] / 100) metrics["spare_thresh"].labels(device_name).set(smart_log["spare_thresh"] / 100) metrics["percent_used"].labels(device_name).set(smart_log["percent_used"] / 100) - metrics["critical_warning"].labels(device_name).set( - smart_log["critical_warning"]["value"] - ) + if has_verbose: + metrics["critical_warning"].labels(device_name).set( + smart_log["critical_warning"]["value"] + ) + else: + metrics["critical_warning"].labels(device_name).set( + smart_log["critical_warning"] + ) metrics["media_errors"].labels(device_name).inc(int(smart_log["media_errors"])) metrics["num_err_log_entries"].labels(device_name).inc( int(smart_log["num_err_log_entries"]) @@ -246,6 +274,7 @@ def main(): main() except Exception as e: print("ERROR: {}".format(e), file=sys.stderr) + raise sys.exit(1) print(generate_latest(registry).decode(), end="") From fd7ac9a7311304e750a2eeec51249759736f6f4c Mon Sep 17 00:00:00 2001 From: deajan Date: Tue, 23 Sep 2025 13:01:31 +0200 Subject: [PATCH 2/3] Fix linter message for too long comment line --- nvme_metrics.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nvme_metrics.py b/nvme_metrics.py index 5f27918..9cb8f94 100755 --- a/nvme_metrics.py +++ b/nvme_metrics.py @@ -164,7 +164,8 @@ def exec_nvme_json(*args, has_verbose): # be verbose. Older versions of nvme-cli optionally produced verbose output if the --verbose # flag was specified. In order to avoid having to handle two different JSON schemas, always # add the --verbose flag. - # Note2: nvme-cli 2.3 that ships with Debian 12 has no verbose parameter for smart-log command only + # Note2: nvme-cli 2.3 that ships with Debian 12 has + # no verbose parameter for smart-log command only if "smart-log" in args and not has_verbose: output = exec_nvme(*args, "--output-format", "json") From 24ab676d2ed98d890a45821db58f662887c5b315 Mon Sep 17 00:00:00 2001 From: deajan Date: Tue, 23 Sep 2025 13:14:14 +0200 Subject: [PATCH 3/3] Fix error when no nvme devices are found --- nvme_metrics.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/nvme_metrics.py b/nvme_metrics.py index 9cb8f94..71bf108 100755 --- a/nvme_metrics.py +++ b/nvme_metrics.py @@ -167,10 +167,18 @@ def exec_nvme_json(*args, has_verbose): # Note2: nvme-cli 2.3 that ships with Debian 12 has # no verbose parameter for smart-log command only - if "smart-log" in args and not has_verbose: - output = exec_nvme(*args, "--output-format", "json") - else: - output = exec_nvme(*args, "--output-format", "json", "--verbose") + try: + if "smart-log" in args and not has_verbose: + output = exec_nvme(*args, "--output-format", "json") + else: + output = exec_nvme(*args, "--output-format", "json", "--verbose") + except subprocess.CalledProcessError as exc: + try: + output = json.loads(exc.output) + if "Failed to scan topology" in output["error"]: + return {"Devices": []} + except json.JSONDecodeError: + raise ValueError("Cannot parse nvme binary output") return json.loads(output)