#!/usr/bin/env expect ############################################################################ # Purpose: Test of Slurm functionality # Validate Slurm debugger infrastructure (--debugger-test option). # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR # anything else indicates a failure mode that must be investigated. ############################################################################ # Copyright (C) 2002-2006 The Regents of the University of California. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Morris Jette # CODE-OCEC-09-009. All rights reserved. # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals set test_id "1.41" set exit_code 0 set task_cnt 4 set resp_cnt 0 set timed_out 0 print_header $test_id # # Cannot run the test if OverTimeLimit is set, since we test time limits. # set overtimelim [get_over_time_limit] if {$overtimelim != 0} { log_warn "Cannot run this test when OverTimeLimit is set. Exiting now." exit 0 } # # Submit a slurm job that will execute 'id' on 1 node and over task_cnt tasks # # Timeout is max_job_delay (to spawn task) + # 60 (job time limit) + # 60 (slurmctld time limit check poll interval) + # KillWait (120 secs on BlueGene per IBM recommendation) # set timeout [expr $max_job_delay + 60 + 60 + 120] set node_cnt 1-2 set srun_pid [spawn $srun -N$node_cnt -n$task_cnt --overcommit --debugger-test -t1 $bin_id] expect { -re "uid=" { send_user "\nFAILURE: task not stopped\n" set exit_code 1 exp_continue } -re " host:" { incr resp_cnt exp_continue } -re "timelimit" { set timed_out 1 exp_continue; } timeout { send_user "\nFAILURE: srun not responding\n" slow_kill $srun_pid set exit_code 1 } eof { wait } } if {$timed_out == 1} { send_user "\nEarly termination is expected, no worries.\n" } if {$task_cnt != $resp_cnt} { send_user "\nFAILURE: Did not get proper number of tasks: " send_user "$task_cnt, $resp_cnt\n" set exit_code 1 } if {$exit_code == 0} { send_user "\nSUCCESS\n" } exit $exit_code