#!/usr/bin/env expect ############################################################################ # Purpose: Test of srun functionality # Test of hostfile option (-hostfile). # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR # anything else indicates a failure mode that must be investigated. ############################################################################ # Copyright (C) 2002-2007 The Regents of the University of California. # Copyright (C) 2008-2009 Lawrence Livermore National Security. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Danny Auble # CODE-OCEC-09-009. All rights reserved. # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals set test_id "1.52" set exit_code 0 set num_nodes 2 set num_tasks 2 set idle_nodes 0 set max_nodes 0 set task_count 0 set hostfile "test$test_id.hostfile" print_header $test_id if { [test_front_end] } { send_user "\nWARNING: This test incompatible with front-end systems\n" exit $exit_code } # Determine if we have enough nodes to test functionality set def_part [default_partition] spawn $scontrol show partition $def_part expect { -re "not found" { send_user "\nFAILURE: partition $def_part doesn't exist\n" exit 1 } -re "MaxNodes=($number)" { set max_nodes $expect_out(1,string) exp_continue } -re "MaxNodes=UNLIMITED" { set max_nodes 999999 exp_continue } timeout { send_user "\nFAILURE: scontrol not responding\n" exit 1 } eof { wait } } set idle_nodes [available_nodes $def_part idle] if { ($idle_nodes < 3) || ($max_nodes < 3) } { if { $max_nodes == 999999 } { send_user "WARNING: partition $def_part must have at least 3 idle nodes and MaxNodes >= 3 to run this test on. IDLE:$idle_nodes MaxNodes:UNLIMITED\n" } else { send_user "WARNING: partition $def_part must have at least 3 idle nodes and MaxNodes >= 3 to run this test on. IDLE:$idle_nodes MaxNodes:$max_nodes\n" } exit $exit_code } exec $bin_rm -f %hostfile set node0 0 set node1 0 set node2 0 set timeout $max_job_delay for {set i 0} {$i<3} {incr i} { if { $i==1 } { if { $node0 == 0 || $node1 == 0 || $node2 == 0 } { send_user "\nFAILURE: node names not set from \ previous srun\n" exit 1 } set env(SLURM_HOSTFILE) $hostfile set 1node0 $node2 set 1node1 $node0 set 1node2 $node1 set file [open $hostfile "w"] puts $file "$node2\n$node0\n$node1" close $file } elseif { $i==2 } { if { $node0 == 0 || $node1 == 0 || $node2 == 0 } { send_user "\nFAILURE: node names not set from \ previous srun\n" exit 1 } set env(SLURM_HOSTFILE) $hostfile set 2node0 $node1 set 2node1 $node0 set 2node2 $node2 set file [open $hostfile "w"] puts $file "$node1\n$node0\n$node2" close $file } # # execute srun with a specific node count # set node0 0 set node1 0 set node2 0 set task_cnt 0 if { $i == 0} { set srun_pid [spawn $srun -N3 -t1 -l $bin_printenv SLURMD_NODENAME] } else { set srun_pid [spawn $srun -N3 -t1 -l --distribution=arbitrary $bin_printenv SLURMD_NODENAME] } expect { -re "($number): ($alpha_numeric_under)" { incr task_cnt set task_id $expect_out(1,string) if {$task_id == 0} { set node0 $expect_out(2,string) } elseif {$task_id == 1} { set node1 $expect_out(2,string) } else { set node2 $expect_out(2,string) } exp_continue } timeout { send_user "\nFAILURE: srun not responding\n" slow_kill $srun_pid set exit_code 1 } eof { wait } } if { $task_cnt != 3 } { send_user "\nNo worries, test just can not run here\n" } elseif { $i == 1 } { if { [string compare $node0 $1node0] } { send_user "\nFAILURE: task 0 not distributed by hostfile ($node0 != $1node0)\n" set exit_code 1 } elseif { [string compare $node1 $1node1] } { send_user "\nFAILURE: task 1 not distributed by hostfile ($node1 != $1node1)\n" set exit_code 1 } elseif { [string compare $node2 $1node2] } { send_user "\nFAILURE: task 2 not distributed by hostfile ($node2 != $1node2)\n" set exit_code 1 } } elseif { $i == 2 } { if { [string compare $node0 $2node0] } { send_user "\nFAILURE: task 0 not distributed by hostfile ($node0 != $2node0)\n" set exit_code 1 } elseif { [string compare $node1 $2node1] } { send_user "\nFAILURE: task 1 not distributed by hostfile ($node1 != $2node1)\n" set exit_code 1 } elseif { [string compare $node2 $2node2] } { send_user "\nFAILURE: task 2 not distributed by hostfile ($node2 != $2node2)\n" set exit_code 1 } } } if {$exit_code == 0} { exec $bin_rm -f $hostfile send_user "\nSUCCESS\n" } exit $exit_code