#!/usr/bin/env expect ############################################################################ # Purpose: Test of Slurm functionality # Validate that the mcs plugin (mcs/group) is OK with sbatch # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR # anything else indicates a failure mode that must be investigated. ############################################################################ # Copyright (C) 2015 CEA/DAM/DIF # Written by Aline Roy # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals set test_id 17.52 set exit_code 0 set cwd "[$bin_pwd]" set config_path "" set tmp_job "test$test_id\_tmp_job" set file_in "test${test_id}.in" set file_out "test${test_id}.out" print_header $test_id if {[is_super_user] == 0} { send_user "\nWARNING: This test can't be run except as SlurmUser\n" exit 0 } set config_path [get_conf_path] if { $exit_code != 0 } { exit $exit_code } copy_conf $config_path $cwd if { $exit_code != 0 } { exit $exit_code } send_user "\n---Checking sbatch uses mcs-label only for some jobs (ondemand mode)---\n" # # Change the slurm.conf MCSparameters and MCSPlugin # set alpha_numeric_pipe "\[a-zA-Z0-9|\]+" set groups_name 0 send_user "\n groups=$groups_name\n" spawn $bin_bash -c "exec groups \| sed \"s/ /\|/g\"" expect { -re "($alpha_numeric_pipe)" { set groups_name $expect_out(1,string) } eof { wait } } exec $bin_sed -i /^\[\t\s\]*MCSPlugin\[\t\s\]*=/Id $config_path/slurm.conf exec $bin_sed -i /^\[\t\s\]*MCSParameters\[\t\s\]*=/Id $config_path/slurm.conf exec $bin_sed -i /^\[\t\s\]*PrivateData\[\t\s\]*=/Id $config_path/slurm.conf exec $bin_echo -e "\nMCSPlugin=mcs/group" >> $config_path/slurm.conf exec $bin_echo MCSParameters=ondemand,select,privatedata:$groups_name >> $config_path/slurm.conf exec $bin_echo PrivateData=jobs,nodes >> $config_path/slurm.conf reconfigure # # verify slurm conf parameters MCS # set found 0 spawn -noecho $bin_bash -c "exec $scontrol show config | $bin_grep MCS" expect { -re "MCSPlugin = mcs/group" { send_user "\n MCSPlugin=mcs/group OK\n" set found 1 exp_continue } timeout { send_user "\nFAILURE: scontrol is not responding\n" set exit_code 1 } eof { wait } } if {$found == 0} { send_user "\nFAILURE: MCSPlugin parameter in scontrol show config is not mcs/group\n" set exit_code 1 } ###### Check that sbatch fails with a bad mcs-label ###### send_user "\n---Checking sbatch fails with a bad mcs-label ---\n" set timeout $max_job_delay make_bash_script $tmp_job "sleep 10" spawn $sbatch -N1 --mcs-label=foo -t1 $tmp_job expect { -re "Batch job submission failed: Invalid mcs_label specified" { send_user "\nThis error is expected, no worries\n" exp_continue } timeout { send_user "\nFAILURE: sbatch not responding\n" set exit_code 1 } eof { wait } } ###### Check that sbatch uses mcs-label=group ###### send_user "\n---Checking sbatch uses mcs-label=group---\n" make_bash_script $tmp_job "sleep 10" set job_id 0 spawn $sbatch -N1 -o/dev/null --exclusive=mcs -t1 $tmp_job expect { -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) exp_continue } timeout { send_user "\nFAILURE: sbatch is not responding\n" set exit_code 1 } eof { wait } } if {$job_id == 0} { send_user "\nFAILURE: job was not submitted\n" set exit_code 1 } spawn $bin_bash -c "exec groups \| sed \"s/ /\|/g\"" expect { -re "($alpha_numeric_under)" { set group1 $expect_out(1,string) } eof { wait } } set found 0 spawn $squeue --jobs=$job_id --noheader -O "mcslabel" expect { -re "(null)" { send_user "\nNO MCS-label for this job : this is not expected\n" exp_continue } -re "$group1" { send_user "\nMCS-label OK for this job\n" set found 1 exp_continue } timeout { send_user "\nFAILURE: squeue is not responding\n" set exit_code 1 } eof { wait } } if {$found == 0} { send_user "\nFAILURE: job was submitted with a bad mcs-label\n" set exit_code 1 } cancel_job $job_id make_bash_script $tmp_job "sleep 30" set job_id 0 spawn $sbatch -N1 --mcs-label=$group1 --exclusive=mcs -o/dev/null -t10 $tmp_job expect { -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) exp_continue } timeout { send_user "\nFAILURE: sbatch is not responding\n" set exit_code 1 } eof { wait } } if {$job_id == 0} { send_user "\nFAILURE: job was not submitted\n" set exit_code 1 } set found 0 sleep 5 spawn $squeue --jobs=$job_id --noheader -O "mcslabel" expect { -re "$group1" { send_user "\nMCS-label OK for this job\n" set found 1 exp_continue } -re "Invalid job format specification" { send_user "\nFAILURE Invalid job format specification mcslabel\n" set exit_code 1 set found 1 } timeout { send_user "\nFAILURE: squeue is not responding\n" set exit_code 1 } eof { wait } } if {$found == 0} { send_user "\nFAILURE: job was submitted with a bad mcs-label\n" set exit_code 1 } set found 0 set node 0 spawn $squeue --jobs=$job_id --noheader -O "nodelist" expect { -re "($alpha_numeric_under)" { set node $expect_out(1,string) send_user "\nNode for this job : $node\n" set found 1 } timeout { send_user "\nFAILURE: squeue is not responding\n" set exit_code 1 } eof { wait } } if {$found == 0} { send_user "\nFAILURE: no node found in squeue command\n" set exit_code 1 } # # verify MCS of nodes # set found 0 spawn -noecho $bin_bash -c "exec $scontrol show node=$node | $bin_grep MCS" expect { -re "MCS_label=$group1" { send_user "\n mcs_label OK for node $node\n" set found 1 exp_continue } timeout { send_user "\nFAILURE: scontrol is not responding\n" set exit_code 1 } eof { wait } } if {$found == 0} { send_user "\nFAILURE: job was submitted with node with bad mcs-label\n" set exit_code 1 } cancel_job $job_id # # Change the slurm.conf MCSparameters and MCSPlugin # test with enforced # send_user "\n---Checking sbatch uses mcs-label with all jobs (enforced mode)---\n" exec $bin_sed -i /^\[\t\s\]*MCSPlugin\[\t\s\]*=/Id $config_path/slurm.conf exec $bin_sed -i /^\[\t\s\]*MCSParameters\[\t\s\]*=/Id $config_path/slurm.conf exec $bin_sed -i /^\[\t\s\]*PrivateData\[\t\s\]*=/Id $config_path/slurm.conf exec $bin_echo -e "\nMCSPlugin=mcs/group" >> $config_path/slurm.conf exec $bin_echo MCSParameters=enforced,noselect,privatedata:$groups_name >> $config_path/slurm.conf exec $bin_echo PrivateData=jobs,nodes >> $config_path/slurm.conf reconfigure ###### Check that sbatch uses mcs-label=group ###### send_user "\n---Checking sbatch uses mcs-label=group---\n" make_bash_script $tmp_job "sleep 10" spawn $sbatch -N1 -o/dev/null -t1 $tmp_job expect { -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) exp_continue } timeout { send_user "\nFAILURE: sbatch is not responding\n" set exit_code 1 } eof { wait } } if {$job_id == 0} { send_user "\nFAILURE: job was not submitted\n" set exit_code 1 } set found 0 sleep 3 spawn $squeue --jobs=$job_id --noheader -O "mcslabel" expect { -re "$group1" { send_user "\nMCS-label OK for this job\n" set found 1 exp_continue } -re "(null)" { send_user "\nFAILURE: NO MCS-label for this job\n" set exit_code 1 exp_continue } timeout { send_user "\nFAILURE: squeue is not responding\n" set exit_code 1 } eof { wait } } if {$found == 0} { send_user "\nFAILURE: job was submitted with a bad mcs-label\n" set exit_code 1 } set found 0 set node 0 spawn $squeue --jobs=$job_id --noheader -O "nodelist" expect { -re "($alpha_numeric_under)" { set node $expect_out(1,string) send_user "\nNode for this job : $node\n" set found 1 } timeout { send_user "\nFAILURE: squeue is not responding\n" set exit_code 1 } eof { wait } } if {$found == 0} { send_user "\nFAILURE: no node found in squeue command\n" set exit_code 1 } # # verify MCS of nodes # spawn -noecho $bin_bash -c "exec $scontrol show node=$node | $bin_grep MCS" expect { -re "MCS_label=$group1" { send_user "\nFAILURE: a mcs_label is found for this job. It was not expected\n" set exit_code 1 } -re "MCS_label=N/A" { send_user "\nNo mcs_label for this node. It was expected\n" } timeout { send_user "\nFAILURE: scontrol is not responding\n" set exit_code 1 } eof { wait } } cancel_job $job_id # Clean up vestigial files and restore original slurm.conf file send_user "\nChanging slurm.conf back\n" exec $bin_cp -v $cwd/slurm.conf.orig $config_path/slurm.conf reconfigure if {$exit_code == 0} { exec $bin_rm $tmp_job $cwd/slurm.conf.orig send_user "\nSUCCESS\n" } exit $exit_code