#!/usr/bin/env expect ############################################################################ # Purpose: Test of Slurm functionality # Test that no files are open in spawned tasks (except stdin, # stdout, and stderr) to ensure successful checkpoint/restart. # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR # anything else indicates a failure mode that must be investigated. ############################################################################ # Copyright (C) 2002-2006 The Regents of the University of California. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Morris Jette # CODE-OCEC-09-009. All rights reserved. # # This file is part of Slurm, a resource management program. # For details, see . # Please also read the included file: DISCLAIMER. # # Slurm is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more # details. # # You should have received a copy of the GNU General Public License along # with Slurm; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals set test_id "7.9" set exit_code 0 set file_in "test$test_id.input" set file_out "test$test_id.output" set file_prog "test$test_id.prog" set iterations 50 print_header $test_id if {![test_accting_steps]} { send_user "\nWARNING: This test can not be run with nosteps or nojobs " send_user "(AccoutingStorageEnforce)\n" exit 0 } # # Test is incompatible with certain plugins. # # Each leave open files, although we could clear the proctrack related files # by just closing all files after fd=3. # if {[string compare [test_proctrack] "cray_aries"] == 0} { log_warn "Test incompatible with proctrack/cray_aries" exit $exit_code } # # Delete left-over programs and rebuild them. # We use our own program to get ulimit values since the output # of the ulimit program is inconsistent across systems. # exec $bin_rm -f $file_prog $file_in $file_out exec $bin_cc -O -o $file_prog ${file_prog}.c make_bash_script $file_in " $bin_echo 'testing within script' ./$file_prog $bin_echo ' ' $bin_echo 'testing $iterations sets of spawned tasks' for ((i=0; i<$iterations; i++)) ; do $srun --mpi=none ./$file_prog done $bin_echo 'fini' " set job_id 0 spawn $sbatch --output=$file_out -t1 ./$file_in expect { -re "Submitted batch job ($number)" { set job_id $expect_out(1,string) exp_continue } eof { wait } } if {$job_id == 0} { send_user "\nFAILURE: batch submit failure\n" exit 1 } # # Wait for job to complete # if {[wait_for_job $job_id "DONE"] != 0} { send_user "\nFAILURE: waiting for job to complete\n" exit 1 } # # Inspect the job's output file # if {[wait_for_file $file_out] != 0} { exit 1 } set fini 0 set matches 0 spawn $bin_cat $file_out expect { -re "FAILED" { incr matches exp_continue } -re "fini" { set fini 1 exp_continue } timeout { send_user "\nFAILURE: /bin/cat not responding\n" set exit_code 1 } eof { wait } } if {$fini != 1} { send_user "\nFAILURE: script never completed\n" set exit_code 1 } if {$matches != 0} { set tot [expr $iterations + 1] if {$matches <= 1} { send_user "\nWARNING: $matches of $tot tests failed\n" } else { send_user "\nFAILURE: $matches of $tot tests failed\n" set exit_code 1 } send_user " This should happen infrequently, typically when\n" send_user " JobAcctFrequency is set to a small value and is\n" send_user " indicative of a non-checkpointable job. For details,\n" send_user " see src/plugins/jobacct/linux/jobacct_linux.c\n" send_user " To diagnose, use lsof to examine spawned job.\n" } if {$exit_code == 0} { send_user "\nSUCCESS\n" exec $bin_rm -f $file_in $file_prog $file_out } exit $exit_code