#!/usr/bin/env expect ############################################################################ # Purpose: Test of Slurm functionality # Validates that the OverTimeLimit value set in the # slurm.conf file is enforced # # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR # anything else indicates a failure mode that must be investigated. ############################################################################ # Copyright (C) 2013 SchedMD LLC # Written by Nathan Yee # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals set test_id "test1.73" set config_dir "" set conf_script "$test_id\_conf_script" set bad_script "$test_id\_bad_script" set good_script "$test_id\_good_script" set good_job "$test_id\_good_job_sc" set bad_job "$test_id\_bad_job_sc" set timeout [expr $max_job_delay + 500] set exit_code 0 print_header $test_id if {[is_super_user] == 0} { send_user "\nWARNING: This test can't be run except as SlurmUser\n" exit 0 } # # Slurm time limit enforcement is performed within one minute of the actual # job time limit, so a one minute limit job will be killed 60 to 120 seconds # after initiation. With OverTimeLimit=2, that is moved to 180 to 240 seconds. # make_bash_script $good_script "sleep 130" make_bash_script $good_job " $srun -t1 -v ./$good_script echo RC=$\? " make_bash_script $bad_script "sleep 250" make_bash_script $bad_job " $srun -t1 -v ./$bad_script echo RC=$\? " proc check_rc { job } { global number exit_code set rc -1 spawn ./$job expect { -re "RC=($number)" { set rc $expect_out(1,string) exp_continue } timeout { send_user "\nFAILURE: echo is not responding\n" set exit_code 1 } eof { wait } } return $rc } # # Get the slurm.conf path # set got_config 0 log_user 0 spawn $scontrol show config expect { -re "SLURM_CONF.*= (/.*)/($alpha).*SLURM_VERSION" { set config_dir $expect_out(1,string) set got_config 1 exp_continue } timeout { send_user "\nFAILURE: scontrol is not responding\n" set exit_code 1 } eof { wait } } log_user 1 if {$got_config == 0} { send_user "\nFAILURE: Could not find configuration file location\n" exit 1 } # Get the current working directory set cwd "[$bin_pwd]" # # Copy the original slurm.conf file # exec $bin_rm -f $cwd/slurm.conf.orig spawn $bin_cp -v $config_dir/slurm.conf $cwd/slurm.conf.orig expect { timeout { send_user "\nFAILURE: slurm.conf was not copied\n" exit 1 } eof { wait } } if {![file exists $cwd/slurm.conf.orig]} { send_user "\nFAILURE: slurm.conf was not copied\n" exit 1 } # # Add OverTimeLimit value to slurm.conf and reconfigure # make_bash_script $conf_script "$bin_echo OverTimeLimit=2 >> $config_dir/slurm.conf" spawn ./$conf_script expect { -re "Permission denied" { send_user "\nWARNING: Unable to update slurm.conf\n" set exit_code 1 exp_continue } eof { wait } } set match 0 spawn tail $config_dir/slurm.conf expect { -re "OverTimeLimit=2" { set match 1 exp_continue } eof { wait } } if {$match == 0} { send_user "\nWARNING: slurm.conf was not updated\n" exit 0 } reconfigure set rc [check_rc $good_job] if {$rc != 0} { send_user "\nFAILURE: bad job exit code ($rc != 0)\n" set exit_code 1 } set rc [check_rc $bad_job] if {$rc == 0} { send_user "\nFAILURE: bad job exit code ($rc == 0)\n" set exit_code 1 } # Restore the slurm.conf file back to original values exec $bin_cp $cwd/slurm.conf.orig $config_dir/slurm.conf reconfigure if {$exit_code == 0} { exec $bin_rm -f $good_script $bad_script $good_job $bad_job exec $bin_rm -f $conf_script $cwd/slurm.conf.orig send_user "\nSUCCESS\n" } exit $exit_code