#!/usr/bin/env expect ############################################################################ # Purpose: Test of Slurm functionality # Validate that DenyOnLimit is enforced on QoS and # Association limits. # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR # anything else indicates a failure mode that must be investigated. ############################################################################ # Copyright (C) 2015 SchedMD LLC # Written by Nathan Yee # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals source ./globals_accounting set test_id 21.35 set test_acct "test$test_id\_acct" set test_qos "test$test_id\_qos" set file_in "test$test_id.sc" set max_wall_val 2 set job_id 0 set exit_code 0 set save_billing_weights "" set tres_cpu_mult 2 # mod qos array set max_tres_type { billing 2 cpu 2 node 1 } array set lim_type_type { 0 MaxTres 1 MaxTresPerUser 2 MaxTresMin } # Reset QoS array set reset_qos_val { MaxCpus -1 MaxNode -1 MaxJobs -1 MaxSubmitJobs -1 MaxCpuMin -1 MaxWall -1 MaxCpusPerUser -1 MaxNode -1 MaxNodesPerUser -1 MaxTRESMinsPerJob=billing -1 MaxTRESPerJob=billing -1 MaxTRESPerUser=billing -1 } # Reset Associtions array set reset_assoc_val { MaxCpus -1 MaxNode -1 MaxJobs -1 MaxSubmitJobs -1 MaxCpuMin -1 MaxWall -1 MaxNode -1 MaxTRESMinsPerJob=billing -1 MaxTRESPerJob=billing -1 } print_header $test_id proc check_limit { type name } { global sacctmgr sbatch lim_type_type i option max_tres_type global job_id test_qos test_acct reset_qos_val reset_assoc_val global file_in number exit_code tres_cpu_mult set flag "" set modified 0 set type_value $max_tres_type($option) if { ![string compare $option "billing"] } { set type_value [expr $type_value * $tres_cpu_mult] } spawn $sacctmgr -i mod $type $name set $lim_type_type($i)=$option=$type_value expect { -re "Modified" { set modified 1 exp_continue } timeout { send_user "\nFAILURE: sacctmgr is not responding\n" set exit_code 1 } eof { wait } } if {$modified != 1} { send_user "\nFAILURE: sacctmgr did not modify tres=$option=$max_tres_type($option)\n" set exit_code 1 } set tmp [expr $max_tres_type($option) + 1] if { ![string compare $option "node"] } { set flag "-N$tmp" } else { set flag "-n$tmp" } set match 0 spawn $sbatch $flag -t1 -o/dev/null --account=$test_acct $file_in expect { -re "Job violates accounting/QOS policy" { set match 1 exp_continue } -re "Node count specification invalid" { send_user "\nWARNING: Test exceeds available node count\n" set match 1 exp_continue } -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) set exit_code 1 } timeout { send_user "\nFAILURE: srun is not responding\n" set exit_code 1 } eof { wait } } if {$match != 1 || $job_id} { send_user "\nFAILURE: job should have been killed upon submission\n" cancel_job $job_id cleanup exit 1 } # Reset limits mod_qos $test_qos [array get reset_qos_val] mod_acct $test_acct "" "" [array get reset_assoc_val] } proc cleanup { } { global sacctmgr file_in bin_rm test_acct test_qos exit_code global save_billing_weights scontrol # Delete test account and qos spawn $sacctmgr -i delete account $test_acct expect { timeout { send_user "\nFAILURE: sacctmgr is not responding\n" set exit_code 1 } eof { wait } } spawn $sacctmgr -i delete qos $test_qos expect { timeout { send_user "\nFAILURE: sacctmgr is not responding\n" set exit_code 1 } eof { wait } } if {$save_billing_weights ne ""} { spawn $scontrol update partitionname=[default_partition] TRESBillingWeights=$save_billing_weights expect { -re "error" { send_user "\nFAILURE: failed to reset TRESBillingWeights\n" set exit_code 1 } timeout { send_user "\nFAILURE: scontrol is not responding\n" set exit_code 1 } eof { wait } } } # Remove test script exec $bin_rm -f $file_in } if { [test_account_storage] == 0 } { send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" exit 0 } elseif { [test_enforce_limits] == 0 } { send_user "\nWARNING: This test can't be run without a usable AccountingStorageEnforce\n" exit 0 } if { [test_limits_enforced] == 0 } { send_user "\nWARNING: This test can't be run without enforcing limits\n" exit 0 } if {[test_super_user] == 0} { send_user "\nWARNING Test can only be ran as SlurmUser\n" exit 0 } # Clean up any vestigial data cleanup make_bash_script $file_in " sleep 2" #add qos set added 1 spawn $sacctmgr -i create qos $test_qos expect { -re "Adding QOS" { set added 1 exp_continue } timeout { send_user "\nFAILURE: sacctmgr is not responding\n" set exit_code 1 } eof { wait } } if {$added != 1} { send_user "\nFAILURE: test QoS $test_qos was not created\n" cleanup exit 1 } set match 0 spawn $sacctmgr -i create account $test_acct qos=$test_qos expect { -re "Associations" { set match 1 exp_continue } timeout { send_user"\nFAILURE: sacctmgr is not responding\n" set exit_code 1 } eof { wait } } if {$match != 1} { send_user "\nFAILURE: test account $test_acct was not created\n" cleanup exit 1 } spawn $sacctmgr -i create user name=[get_my_user_name] account=$test_acct expect { timeout { send_user"\nFAILURE: sacctmgr is not responding\n" set exit_code 1 } eof { wait } } # Set DenyOnLimit qos flag set modified 0 spawn $sacctmgr -i mod qos $test_qos set flag=DenyOnLimit expect { -re "Modified qos" { set modified 1 exp_continue } timeout { send_user "\nFAILURE: sacctmgr is not responding\n" set exit_code 1 } eof { wait } } spawn $scontrol show part [default_partition] expect { -re "TRESBillingWeights=(\\S+)" { set save_billing_weights $expect_out(1,string) exp_continue } timeout { send_user "\nFAILURE: scontrol is not responding\n" set exit_code 1 } eof { wait } } spawn $scontrol update partitionname=[default_partition] tresbillingweights=cpu=$tres_cpu_mult expect { -re "error" { send_user "\nFAILURE: failed to set TRESBillingWeights\n" set exit_code 1 } timeout { send_user "\nFAILURE: scontrol is not responding\n" set exit_code 1 } eof { wait } } if {$exit_code != 0} { cleanup exit 1 } for {set i 0} {$i < 3} {incr i} { foreach option [array names max_tres_type] { # Check DenyOnLimit for QoS send_user "\n==== Testing QoS ====\n" check_limit "qos" $test_qos if {$i == 0} { # Check DenyOnLimit for Association send_user "\n==== Testing Association ====\n" check_limit "account" $test_acct } } } # # Test Max Wall # set modified 0 spawn $sacctmgr -i mod qos $test_qos set maxwall=$max_wall_val expect { -re "Modified qos" { set modified 1 exp_continue } timeout { send_user "\nFAILURE: sacctmgr is not responding\n" set exit_code 1 } eof { wait } } if {$modified != 1} { send_user "\nFAILURE: sacctmgr did not modify tres=$option=$max_tres_qos($option)\n" set exit_code 1 } set match 0 spawn $sbatch -N1 -t[expr $max_wall_val + 1] -o/dev/null --account=$test_acct $file_in expect { -re "Job violates accounting/QOS policy" { set match 1 exp_continue } -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) set exit_code 1 } timeout { send_user "\nFAILURE: srun is not responding\n" set exit_code 1 } eof { wait } } if {$match != 1 || $job_id} { send_user "\nFAILURE: job should have been killed upon submittion\n" set exit_code 1 } # Cancel the submitted job cancel_job $job_id cleanup if {$exit_code == 0} { print_success $test_id } else { send_user "\nFAILURE: test $test_id\n" }