From e0cc721a3fad2c3475de35921b11fe29d2a14b63 Mon Sep 17 00:00:00 2001 From: Helena Greebe Date: Tue, 15 Jul 2025 10:54:44 -0400 Subject: [PATCH 1/2] Delete cluster name state file whenever slurm accounting is configured or updated --- .../recipes/config/config_slurm_accounting.rb | 9 +++++++++ .../recipes/update/clear_slurm_accounting.rb | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/cookbooks/aws-parallelcluster-slurm/recipes/config/config_slurm_accounting.rb b/cookbooks/aws-parallelcluster-slurm/recipes/config/config_slurm_accounting.rb index 57c304b531..c9d8a97491 100644 --- a/cookbooks/aws-parallelcluster-slurm/recipes/config/config_slurm_accounting.rb +++ b/cookbooks/aws-parallelcluster-slurm/recipes/config/config_slurm_accounting.rb @@ -88,6 +88,15 @@ retry_delay 10 end unless kitchen_test? || (node['cluster']['node_type'] == "ExternalSlurmDbd") + bash "Remove existing cluster name state file" do + user 'root' + group 'root' + code <<-CLUSTERSTATE + rm /var/spool/slurm.state/clustername + CLUSTERSTATE + only_if { ::File.exist?('/var/spool/slurm.state/clustername') } + end + bash "bootstrap slurm database" do user 'root' group 'root' diff --git a/cookbooks/aws-parallelcluster-slurm/recipes/update/clear_slurm_accounting.rb b/cookbooks/aws-parallelcluster-slurm/recipes/update/clear_slurm_accounting.rb index 9aca50d359..4d64a3677e 100644 --- a/cookbooks/aws-parallelcluster-slurm/recipes/update/clear_slurm_accounting.rb +++ b/cookbooks/aws-parallelcluster-slurm/recipes/update/clear_slurm_accounting.rb @@ -23,3 +23,12 @@ supports restart: false action %i(disable stop) end + +bash "Remove existing cluster name state file" do + user 'root' + group 'root' + code <<-CLUSTERSTATE + rm /var/spool/slurm.state/clustername + CLUSTERSTATE + only_if { ::File.exist?('/var/spool/slurm.state/clustername') } +end From 5620a52bec9cb28fb4cfd02501a11f12eacd26c8 Mon Sep 17 00:00:00 2001 From: Helena Greebe Date: Wed, 16 Jul 2025 09:47:41 -0400 Subject: [PATCH 2/2] Add spec test to cover the deletion of the cluster id state --- .../recipes/clear_slurm_accounting_spec.rb | 28 +++++++++++++++++++ .../recipes/config_slurm_accounting_spec.rb | 4 +++ 2 files changed, 32 insertions(+) create mode 100644 cookbooks/aws-parallelcluster-slurm/spec/unit/recipes/clear_slurm_accounting_spec.rb diff --git a/cookbooks/aws-parallelcluster-slurm/spec/unit/recipes/clear_slurm_accounting_spec.rb b/cookbooks/aws-parallelcluster-slurm/spec/unit/recipes/clear_slurm_accounting_spec.rb new file mode 100644 index 0000000000..6a186241e2 --- /dev/null +++ b/cookbooks/aws-parallelcluster-slurm/spec/unit/recipes/clear_slurm_accounting_spec.rb @@ -0,0 +1,28 @@ +require 'spec_helper' + +describe 'aws-parallelcluster-slurm::clear_slurm_accounting' do + for_all_oses do |platform, version| + context "on #{platform}#{version}" do + cached(:chef_run) do + runner = runner(platform: platform, version: version) do |node| + mock_file_exists("/var/spool/slurm.state/clustername", true) + node.override['cluster']['slurmdbd_service_enabled'] = true + end + runner.converge(described_recipe) + end + cached(:node) { chef_run.node } + + it 'stops the slurm database daemon' do + is_expected.to disable_service("slurmdbd") + end + + it 'deletes the Slurm database password update script' do + is_expected.to delete_file("#{node['cluster']['scripts_dir']}/slurm/update_slurm_database_password.sh") + end + + it 'Removes existing cluster name state file' do + is_expected.to run_bash("Remove existing cluster name state file") + end + end + end +end diff --git a/cookbooks/aws-parallelcluster-slurm/spec/unit/recipes/config_slurm_accounting_spec.rb b/cookbooks/aws-parallelcluster-slurm/spec/unit/recipes/config_slurm_accounting_spec.rb index 822d692d2b..43c48d7937 100644 --- a/cookbooks/aws-parallelcluster-slurm/spec/unit/recipes/config_slurm_accounting_spec.rb +++ b/cookbooks/aws-parallelcluster-slurm/spec/unit/recipes/config_slurm_accounting_spec.rb @@ -10,6 +10,7 @@ allow_any_instance_of(Object).to receive(:are_mount_or_unmount_required?).and_return(false) allow_any_instance_of(Object).to receive(:dig).and_return(true) RSpec::Mocks.configuration.allow_message_expectations_on_nil = true + mock_file_exists("/var/spool/slurm.state/clustername", true) node.override['cluster']['slurmdbd_service_enabled'] = enable_service end runner.converge(described_recipe) @@ -70,6 +71,9 @@ ) end if enable_service == "true" + it 'Removes existing cluster name state file' do + is_expected.to run_bash("Remove existing cluster name state file") + end it 'starts the slurm database daemon' do is_expected.to enable_service("slurmdbd") is_expected.to start_service("slurmdbd")