From e43e215b0da6747257960e216e9e04bf533bca7b Mon Sep 17 00:00:00 2001 From: Giacomo Marciani Date: Tue, 12 Aug 2025 16:47:17 -0400 Subject: [PATCH] [Bugfix] Fix logic in function are_queues_updated?. The function was assuming to be called only during cluster update, leading to cluster creation failure if it gets called on cluster creation. The reason is that it was assuming that the file containing the previous cluster config always exists, which is not the case on cluster creation. --- .../libraries/update.rb | 5 +- .../spec/unit/libraries/update_spec.rb | 77 +++++++++++++++++++ 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/cookbooks/aws-parallelcluster-slurm/libraries/update.rb b/cookbooks/aws-parallelcluster-slurm/libraries/update.rb index d4747d502c..a2df691e76 100644 --- a/cookbooks/aws-parallelcluster-slurm/libraries/update.rb +++ b/cookbooks/aws-parallelcluster-slurm/libraries/update.rb @@ -19,10 +19,13 @@ require 'timeout' # Verify if Scheduling section of cluster configuration and compute node bootstrap_timeout have been updated +# If the previous cluster config file does not exists, it assumes that queues have not been updated. def are_queues_updated? require 'yaml' config = YAML.safe_load(File.read(node['cluster']['cluster_config_path'])) - previous_config = YAML.safe_load(File.read(node['cluster']['previous_cluster_config_path'])) + previous_cluster_config_path = node['cluster']['previous_cluster_config_path'] + return false unless File.exist?(previous_cluster_config_path) + previous_config = YAML.safe_load(File.read(previous_cluster_config_path)) config["Scheduling"] != previous_config["Scheduling"] or is_compute_node_bootstrap_timeout_updated?(previous_config, config) end diff --git a/cookbooks/aws-parallelcluster-slurm/spec/unit/libraries/update_spec.rb b/cookbooks/aws-parallelcluster-slurm/spec/unit/libraries/update_spec.rb index c4e9c8297b..22a8f28f3d 100644 --- a/cookbooks/aws-parallelcluster-slurm/spec/unit/libraries/update_spec.rb +++ b/cookbooks/aws-parallelcluster-slurm/spec/unit/libraries/update_spec.rb @@ -59,4 +59,81 @@ } include_examples "the correct method", changeset, true end + + describe "aws-parallelcluster-slurm:libraries:are_queues_updated" do + CLUSTER_CONFIG_PATH = "/CLUSTER_CONFIG_PATH".freeze + PREVIOUS_CLUSTER_CONFIG_PATH = "/PREVIOUS_CLUSTER_CONFIG_PATH".freeze + + let(:node) do + { + "cluster" => { + "cluster_config_path" => CLUSTER_CONFIG_PATH, + "previous_cluster_config_path" => PREVIOUS_CLUSTER_CONFIG_PATH, + }, + } + end + + shared_examples "the correct result" do |config, previous_config, file_exists, expected_result| + it "returns #{expected_result}" do + allow(File).to receive(:exist?).with(PREVIOUS_CLUSTER_CONFIG_PATH).and_return(file_exists) + allow(File).to receive(:read).with(CLUSTER_CONFIG_PATH).and_return(YAML.dump(config)) + if file_exists + allow(File).to receive(:read).with(PREVIOUS_CLUSTER_CONFIG_PATH).and_return(YAML.dump(previous_config)) + end + result = are_queues_updated? + expect(result).to eq(expected_result) + end + end + + context "when previous cluster config file does not exist" do + config = { + "Scheduling" => { "SlurmQueues" => [] }, + } + previous_config = nil + include_examples "the correct result", config, previous_config, false, false + end + + context "when Scheduling sections are identical and bootstrap timeout unchanged" do + config = { + "Scheduling" => { "SlurmQueues" => [{ "Name" => "queue1" }] }, + } + previous_config = { + "Scheduling" => { "SlurmQueues" => [{ "Name" => "queue1" }] }, + } + include_examples "the correct result", config, previous_config, true, false + end + + context "when Scheduling sections are identical and bootstrap timeout is updated" do + config = { + "Scheduling" => { "SlurmQueues" => [{ "Name" => "queue1" }] }, + "DevSettings" => { "Timeouts" => { "ComputeNodeBootstrapTimeout" => 3600 } }, + } + previous_config = { + "Scheduling" => { "SlurmQueues" => [{ "Name" => "queue1" }] }, + "DevSettings" => { "Timeouts" => { "ComputeNodeBootstrapTimeout" => 1800 } }, + } + include_examples "the correct result", config, previous_config, true, true + end + + context "when Scheduling sections are identical and bootstrap timeout changes from default to explicit value" do + config = { + "Scheduling" => { "SlurmQueues" => [{ "Name" => "queue1" }] }, + "DevSettings" => { "Timeouts" => { "ComputeNodeBootstrapTimeout" => 3600 } }, + } + previous_config = { + "Scheduling" => { "SlurmQueues" => [{ "Name" => "queue1" }] }, + } + include_examples "the correct result", config, previous_config, true, true + end + + context "when Scheduling sections are different" do + config = { + "Scheduling" => { "SlurmQueues" => [{ "Name" => "queue2" }] }, + } + previous_config = { + "Scheduling" => { "SlurmQueues" => [{ "Name" => "queue1" }] }, + } + include_examples "the correct result", config, previous_config, true, true + end + end end