#!/usr/bin/env expect ############################################################################ # Purpose: Test of Slurm functionality # Validate --cpu-freq is enforced when using non-numeric values # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR # anything else indicates a failure mode that must be investigated. ############################################################################ # Copyright (C) 2014 SchedMD LLC # Written by Nathan Yee # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals set test_id 1.75 set file_id "test$test_id\_id.bash" set file_in "test$test_id\.bash" set file_out "test$test_id\.out" set node "" set threads 0 set job_id 0 set exit_code 0 array set freq_lvl_1 { high 0 highm1 0 medium 0 low 0 } array set freq_lvl_2 { conservative 0 ondemand 0 performance 0 powersave 0 } print_header $test_id if { [test_using_slurmdbd] == 0 } { send_user "\nWARNING: This test can't be run without AccountStorageType=slurmdbd\n" exit 0 } if {![string compare [test_proctrack] "linuxproc"]} { send_user "\nWARNING: this test cannot run on ProctrackType of linuxproc\n" exit 0 } if {![string compare [get_job_acct_type] "none"]} { send_user "\nWARNING: this test cannot run on JobAcctGatherType of none\n" exit 0 } if {[slurmd_user_root] == 0} { send_user "\nWARNING: This test is incompatible with SlurmdUser != root\n" exit 0 } if {[test_cpu_affinity_or_cgroup] == 0} { send_user "\nWARNING: This test requires some form of task affinity\n" exit 0 } if {![test_accting_steps]} { send_user "\nWARNING: This test can not be run with nosteps or nojobs " send_user "(AccountingStorageEnforce)\n" exit 0 } proc sub_job { freq } { global srun sacct node threads job_id number wait_for_job float timeout exit_code global alpha_numeric_under file_id avail_governors set timeout 120 array set this_freq $freq foreach option [array names this_freq] { send_user "\n======= TESTING FREQUENCY/GOVERNOR $option =======\n" set skip false set job_id 0 set srun_pid [spawn $srun -t1 --cpu-freq=$option -n$threads -w$node ./$file_id] expect { -re "not allowed" { if {[string first $option $avail_governors] == -1} { send_user "\nThis error is expected, no worries\n" set skip true } else { send_user "\nFAILURE: This CPU frequency should be valid\n" set exit_code 1 } exp_continue } -re "SLURM_JOB_ID=($number)" { set job_id $expect_out(1,string) exp_continue } timeout { send_user "\nFAILURE: srun is not responding\n" set exit_code 1 } eof { wait } } if {$skip} { set this_freq($option) -1 continue } if {$job_id == 0} { send_user "\nFAILURE: srun did not submit job\b" exit 1 } if {[wait_for_job $job_id "DONE"] != 0} { send_user "\nFAILURE: error waiting for job $job_id to complete\n" cancel_job $job_id set exit_code 1 } spawn $sacct -j$job_id -oavecpufreq --noheader expect { -re "($float)($alpha_numeric_under)" { set this_freq($option) $expect_out(1,string) set tmp $expect_out(2,string) if {[string compare $tmp "M"] == 0} { set this_freq($option) [expr $this_freq($option) / 1000.0] } exp_continue } timeout { send_user "\nFAILURE: sacct is not responding\n" set exit_code 1 } eof { wait } } if {$this_freq($option) == 0} { send_user "\nFAILURE: did not get cpu frequency for $option\n" set exit_code 1 } } return [array get this_freq] } make_bash_script $file_id "echo SLURM_JOB_ID=\$SLURM_JOB_ID; $bin_sleep 30" make_bash_script $file_in "cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_governors" # Identify a node that we can use and available governors spawn $sbatch -N1 -t1 -o/dev/null --exclusive -o $file_out ./$file_in expect { -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) exp_continue } timeout { send_user "\nFAILURE: sbatch is not responding\n" set exit_code 1 } eof { wait } } if {$job_id == 0} { send_user "\nFAILURE: sbatch did not submit job\n" exit 1 } if {[wait_for_job $job_id "DONE"] != 0} { send_user "\nFAILURE: waiting for job to complete\n" cancel_job $job_id exit 1 } set userspace_governor 0 if {[wait_for_file $file_out] == 0} { spawn $bin_cat $file_out expect { -re "ondemand" { set userspace_governor 1 exp_continue } eof { wait } } } if {$userspace_governor == 0} { send_user "\nWARNING: Node configuration prevents direct control over CPU frequency\n" exec $bin_rm -f $file_id $file_in $file_out exit 0 } set match 0 spawn $scontrol show job $job_id expect { -re "NodeList=($alpha_numeric_nodelist)" { set node $expect_out(1,string) set match 1 exp_continue } timeout { send_user "\nFAILURE: scontrol is not responding\n" set exit_code 1 } eof { wait } } if {$match != 1} { send_user "\nFAILURE: was not able to get a usable node\n" exit 1 } lassign [get_node_cpus $node] num_cputot threads cancel_job $job_id # # Test various CPU governor values # set avail_governors get_cpu_governors send_user "\nCpuFreqGovernors = $avail_governors\n" array set freq_lvl_2 [sub_job [array get freq_lvl_2]] if {($freq_lvl_2(conservative) == 0) || ($freq_lvl_2(ondemand) == 0) || ($freq_lvl_2(performance) == 0) || ($freq_lvl_2(powersave) == 0)} { send_user "\nFAILURE: CPU frequency values are invalid\n" set exit_code 1 } # # Test various CPU frequency values # array set freq_lvl_1 [sub_job [array get freq_lvl_1]] send_user "\n======= Reported frequencies =======\n" foreach name [array names freq_lvl_1] { send_user "$name is $freq_lvl_1($name) GHz\n" } if { (($freq_lvl_1(low) > $freq_lvl_1(medium)) || ($freq_lvl_1(medium) > $freq_lvl_1(high)) || ($freq_lvl_1(highm1) > $freq_lvl_1(high)))} { send_user "\nFAILURE: CPU frequency values are not valid\n" send_user "Test with smaller JobAcctGatherFrequency configuration or longer running jobs\n" exit 1 } if {$exit_code == 0} { exec $bin_rm -f $file_id $file_in $file_out send_user "\nSUCCESS\n" } exit $exit_code