#!/usr/bin/env expect ############################################################################ # Purpose: Test of Slurm functionality # Validate that srun -N uses the MaxNode and GrpNode limit in # QoS and that the first limit on the GrpNode and MaxNode # limit is used in an association. # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR # anything else indicates a failure mode that must be investigated. ############################################################################ # Copyright (C) 2011-2014 SchedMD LLC # Written by Nathan Yee # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals source ./globals_accounting set test_id "1.74" set node_cnt 0 set cluster [get_cluster_name] set node_name "" set user "" set acct "test$test_id\_acct" set acct_c1 "test$test_id\_acct_c_1" set acct_c2 "test$test_id\_acct_c_2" set qos "test$test_id\_qos" set access_err 0 set exit_code 0 array set mod_qos_vals {} array set mod_acct_desc_vals {} array set mod_acct_vals {} array set mod_acct_assoc_vals {} set mod_acct_assoc_vals(qos) $qos print_header $test_id if { [test_using_slurmdbd] == 0 } { send_user "\nWARNING: This test can't be run without AccountStorageType=slurmdbd\n" exit $exit_code } elseif { [test_enforce_limits] == 0 } { send_user "\nWARNING: This test can't be run without AccountingStorageEnforce=limits\n" exit $exit_code } elseif { [test_enforce_qos_set] == 0 } { send_user "\nWARNING: This test can't be run without AccountingStorageEnforce=qos\n" exit $exit_code } if {[string compare [check_accounting_admin_level] "Administrator"]} { send_user "\nThis test can't be run without being an Accounting administrator.\n" exit $exit_code } set user [get_my_user_name] proc cleanup { } { global acct acct_c1 acct_c2 exit_code qos remove_acct "" "$acct,$acct_c1,$acct_c2" if {[remove_qos "$qos"] != 0} { send_user "\nWARNING: not authorized to perform this test\n" set $exit_code 1 } } proc srun_test {exp_cnt account} { global srun acct exit_code node_cnt number bin_printenv set job_id 0 set count 0 spawn $srun -l -t1 -A $account -N1-$node_cnt $bin_printenv SLURM_JOB_ID expect { -re "$number: ($number)" { incr count set job_id $expect_out(1,string) exp_continue } -re "($number)-($number):($number)" { # NOTE: POE format incr count [expr $expect_out(2,string) - $expect_out(1,string) + 1] set job_id $expect_out(3,string) exp_continue } -re "$number:($number)" { # NOTE: POE format incr count set job_id $expect_out(1,string) exp_continue } timeout { send_user "\nFAILURE srun is not responding\n" set exit_code 1 } eof { wait } } if {$job_id == 0} { if {$exp_cnt != 0} { send_user "\nFAILURE: did not get job id\n" set exit_code 1 } } else { if {[wait_for_job $job_id DONE] != 0} { send_user "\nFAILURE: waiting for job $job_id to complete\n" set exit_code 1 } } if {$count != $exp_cnt} { send_user "\nFAILURE: incorrect number of tasks were run ($count != $exp_cnt)\n" set exit_code 1 } } proc add_child {parent child maxnode grpnode} { global user exit_code cluster set acct_req(cluster) $cluster set acct_req(parent) $parent set acct_req(maxnode) $maxnode set acct_req(grpnode) $grpnode set user_req(cluster) $cluster set user_req(account) $child if { [add_acct $child [array get acct_req]] } { send_user "\nFAILURE: child account was not added\n" incr exit_code return 1 } if { [add_user $user [array get user_req]] } { send_user "\nFAILURE: user was not added to child account\n" incr exit_code return 1 } return 0 } # Remove any vestigial test accounts cleanup # Setup set partition [default_partition] set node_cnt [available_nodes $partition "idle,alloc,comp"] if {$node_cnt < 3} { send_user "\nWARNING: partition $partition has too few nodes ($node_cnt < 3)\n" exit $exit_code } # Wait for 3 nodes to be in idle state if {[wait_for_node $partition idle 3]} { send_user "\nWARNING: partition $partition lacks 3 idle nodes\n" exit $exit_code } set node_cnt [available_nodes $partition idle] # Add parent account (off root) if {[add_child "root" $acct -1 -1]} { cleanup exit 1 } # Now run test using MaxNode limits of the qos if {[add_qos $qos ""]} { send_user "\nFAILURE: qos was not added\n" cleanup exit 1 } # Add child account if {[add_child $acct $acct_c1 [expr $node_cnt - 1] -1]} { cleanup exit 1 } # Add another child account if {[add_child $acct_c1 $acct_c2 [expr $node_cnt - 2] -1]} { cleanup exit 1 } send_user "############################# Test QoS Limits #################################\n" if {[mod_acct $acct [array get mod_acct_desc_vals] [array get mod_acct_vals] [array get mod_acct_assoc_vals]] != 0} { send_user "\nFAILURE: account was not modified\n" cleanup exit 1 } # base line test srun_test $node_cnt $acct set mod_qos_vals(MaxNodes) [expr $node_cnt - 1] mod_qos $qos [array get mod_qos_vals] set mod_qos_vals(MaxNodes) -1 # some times the message takes a little time for some reason sleep 1 srun_test [expr $node_cnt-1] $acct # now make sure the maxnodes of the QOS overrides the association set mod_acct_assoc_vals(maxnodes) 1 if {[mod_acct $acct [array get mod_acct_desc_vals] [array get mod_acct_vals] [array get mod_acct_assoc_vals]] != 0} { send_user "\nFAILURE: account was not modified\n" cleanup exit 1 } srun_test [expr $node_cnt-1] $acct # Reset acct maxnodes set mod_acct_assoc_vals(maxnodes) -1 if {[mod_acct $acct [array get mod_acct_desc_vals] [array get mod_acct_vals] [array get mod_acct_assoc_vals]] != 0} { send_user "\nFAILURE: account was not modified\n" cleanup exit 1 } # Now run test using GrpNode limits of qos set mod_qos_vals(GrpNodes) [expr $node_cnt - 1] mod_qos $qos [array get mod_qos_vals] sleep 1 srun_test [expr $node_cnt-1] $acct # now make sure the grpnodes of the QOS overrides the association set mod_acct_assoc_vals(grpnodes) 1 if {[mod_acct $acct [array get mod_acct_desc_vals] [array get mod_acct_vals] [array get mod_acct_assoc_vals]] != 0} { send_user "\nFAILURE: account was not modified\n" cleanup exit 1 } srun_test [expr $node_cnt-1] $acct # Now make sure maxnodes is the max of the association and grpnodes of the # QOS doesn't override it. set mod_acct_assoc_vals(grpnodes) -1 set mod_acct_assoc_vals(maxnodes) 1 if {[mod_acct $acct [array get mod_acct_desc_vals] [array get mod_acct_vals] [array get mod_acct_assoc_vals]] != 0} { send_user "\nFAILURE: account was not modified\n" cleanup exit 1 } srun_test 1 $acct set mod_acct_assoc_vals(maxnodes) -1 send_user "##################### Test limits based on associations #####################\n" # # MaxNodes Limit # send_user "\nTesting Association MaxNode Limits\n" set mod_qos_vals(GrpNodes) -1 if {[mod_qos $qos [array get mod_qos_vals]]} { send_user "\nFAILURE: QOS was not modified\n" cleanup exit 1 } # reset if {[mod_acct $acct [array get mod_acct_desc_vals] [array get mod_acct_vals] [array get mod_acct_assoc_vals]]} { send_user "\nFAILURE: account was not modified\n" cleanup exit 1 } # Run srun test on parent and child accounts srun_test $node_cnt $acct srun_test [expr $node_cnt - 1] $acct_c1 srun_test [expr $node_cnt - 2] $acct_c2 # # GrpNodes Limit # send_user "\nTesting GrpNode Limits\n" # Modify child with GrpNode set mod_acct_assoc_vals(MaxNode) -1 set mod_acct_assoc_vals(GrpNode) [expr $node_cnt - 1] mod_acct $acct_c1 [array get mod_acct_desc] [array get mod_acct_vals] [array get mod_acct_assoc_vals] # Modify child with GrpNode set mod_acct_assoc_vals(MaxNode) -1 set mod_acct_assoc_vals(GrpNode) [expr $node_cnt - 2] mod_acct $acct_c2 [array get mod_acct_desc] [array get mod_acct_vals] [array get mod_acct_assoc_vals] # Run srun test on parent and child accounts srun_test $node_cnt $acct srun_test [expr $node_cnt - 1] $acct_c1 srun_test [expr $node_cnt - 2] $acct_c2 cleanup if {$exit_code == 0} { send_user "\nSUCCESS\n" } else { send_user "\nFAILURE\n" } exit $exit_code