#! /usr/bin/perl # @configure_input@ ################################################################################ # # Usage: checkpoint_script # # This script is invoked by pbs_mom to checkpoint a job. # Errors are logged to syslog # ################################################################################ use strict; use Sys::Syslog; # configure should assign a value to blcr_bindir # blcr_bindir, if non-empty, is appended to path my $blcr_bindir="@BLCR_BINDIR@"; # Customize the following script variables # Customize the PATH to include location of blcr cr_checkpoint command $ENV{PATH} .= ":$blcr_bindir" if $blcr_bindir; my $logLevel = 3; # 0 = none, 1 = fail, 2 = info, 3 = debug logPrint(2, "Invoked: $0 " . join(' ', @ARGV) . "\n"); my ($sessionId, $jobId, $userId, $groupId, $signalNum, $checkpointDir, $checkpointName, $checkpointDepth); my $usage = "Usage: $0 \n"; # Note that depth is not used in this script but could control a limit to the # number of checkpoint image files that are preserved on the disk. # # Note also that a request was made to identify whether this script was invoked # by the job's owner or by a system administrator. While this information is # known to pbs_server, it is not propagated to pbs_mom and thus it is not # possible to pass this to the script. Therefore, a workaround is to invoke # qmgr and attempt to set a trivial variable. This will fail if the invoker is # not a manager. if (@ARGV == 8) { ($sessionId, $jobId, $userId, $groupId, $checkpointDir, $checkpointName, $signalNum, $checkpointDepth) = @ARGV; } else { logDie(1, $usage); } # Drop privileges to the job owner my $gid = getgrnam($groupId); logDie(1, "Unable to resolve group id ($groupId)\n") unless defined $gid; $( = $gid; $) = $gid; logDie(1, "Unable to set gid: $gid") unless $gid == $(; my $uid = getpwnam($userId); logDie(1, "Unable to resolve user id ($userId)\n") unless defined $uid; $< = $uid; $> = $uid; logDie(1, "Unable to set uid: $uid") unless $uid == $<; # Change to the checkpoint directory where we want the checkpoint to be created chdir $checkpointDir or logDie(1, "Unable to cd to checkpoint dir ($checkpointDir): $!\n") if $logLevel; # Build blcr checkpoint command my $cmd = "cr_checkpoint"; $cmd .= " --signal $signalNum" if $signalNum; $cmd .= " --tree $sessionId"; $cmd .= " --file $checkpointName"; my $output = `$cmd 2>&1`; my $rc = $? >> 8; if ($rc) { logPrint(1, "Subcommand ($cmd) failed with rc=$rc:\n$output") if $logLevel >= 1; } else { logPrint(3, "Subcommand ($cmd) yielded rc=$rc:\n$output") if $logLevel >= 3; } exit $rc; ################################################################################ # logPrint($message) # Write a message (to syslog) and die ################################################################################ sub logPrint { my ($level, $message) = @_; my @severity = ('none', 'warning', 'info', 'debug'); return if $logLevel < $level; openlog('checkpoint_script', '', 'user'); syslog($severity[$level], $message); closelog(); } ################################################################################ # logDie($message) # Write a message (to syslog) and die ################################################################################ sub logDie { my ($level, $message) = @_; logPrint($level, $message); die($message); }