/*
*         OpenPBS (Portable Batch System) v2.3 Software License
*
* Copyright (c) 1999-2000 Veridian Information Solutions, Inc.
* All rights reserved.
*
* ---------------------------------------------------------------------------
* For a license to use or redistribute the OpenPBS software under conditions
* other than those described below, or to purchase support for this software,
* please contact Veridian Systems, PBS Products Department ("Licensor") at:
*
*    www.OpenPBS.org  +1 650 967-4675                  sales@OpenPBS.org
*                        877 902-4PBS (US toll-free)
* ---------------------------------------------------------------------------
*
* This license covers use of the OpenPBS v2.3 software (the "Software") at
* your site or location, and, for certain users, redistribution of the
* Software to other sites and locations.  Use and redistribution of
* OpenPBS v2.3 in source and binary forms, with or without modification,
* are permitted provided that all of the following conditions are met.
* After December 31, 2001, only conditions 3-6 must be met:
*
* 1. Commercial and/or non-commercial use of the Software is permitted
*    provided a current software registration is on file at www.OpenPBS.org.
*    If use of this software contributes to a publication, product, or
*    service, proper attribution must be given; see www.OpenPBS.org/credit.html
*
* 2. Redistribution in any form is only permitted for non-commercial,
*    non-profit purposes.  There can be no charge for the Software or any
*    software incorporating the Software.  Further, there can be no
*    expectation of revenue generated as a consequence of redistributing
*    the Software.
*
* 3. Any Redistribution of source code must retain the above copyright notice
*    and the acknowledgment contained in paragraph 6, this list of conditions
*    and the disclaimer contained in paragraph 7.
*
* 4. Any Redistribution in binary form must reproduce the above copyright
*    notice and the acknowledgment contained in paragraph 6, this list of
*    conditions and the disclaimer contained in paragraph 7 in the
*    documentation and/or other materials provided with the distribution.
*
* 5. Redistributions in any form must be accompanied by information on how to
*    obtain complete source code for the OpenPBS software and any
*    modifications and/or additions to the OpenPBS software.  The source code
*    must either be included in the distribution or be available for no more
*    than the cost of distribution plus a nominal fee, and all modifications
*    and additions to the Software must be freely redistributable by any party
*    (including Licensor) without restriction.
*
* 6. All advertising materials mentioning features or use of the Software must
*    display the following acknowledgment:
*
*     "This product includes software developed by NASA Ames Research Center,
*     Lawrence Livermore National Laboratory, and Veridian Information
*     Solutions, Inc.
*     Visit www.OpenPBS.org for OpenPBS software support,
*     products, and information."
*
* 7. DISCLAIMER OF WARRANTY
*
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND. ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT
* ARE EXPRESSLY DISCLAIMED.
*
* IN NO EVENT SHALL VERIDIAN CORPORATION, ITS AFFILIATED COMPANIES, OR THE
* U.S. GOVERNMENT OR ANY OF ITS AGENCIES BE LIABLE FOR ANY DIRECT OR INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* This license will be governed by the laws of the Commonwealth of Virginia,
* without reference to its choice of law rules.
*/
#include <pbs_config.h>   /* the master config generated by configure */

#include <sys/types.h>
#include <sys/socket.h>
#include <stdio.h>
#include <unistd.h>
#include <dirent.h>
#include <limits.h>
#include <assert.h>
#include <ctype.h>
#include <sys/stat.h>
#include "dis.h"
#include "libpbs.h"
#include "portability.h"
#include <errno.h>
#include <fcntl.h>
#include <pwd.h>
#include <signal.h>
#include <string.h>
#include "list_link.h"
#include "server_limits.h"
#include "attribute.h"
#include "resource.h"
#include "pbs_job.h"
#include "log.h"
#include "credential.h"
#include "batch_request.h"
#include "net_connect.h"
#include "svrfunc.h"
#include "mom_mach.h"
#include "mom_func.h"
#include "pbs_error.h"
#include "pbs_proto.h"
#include "rpp.h"
#ifdef ENABLE_CPA
#include "pbs_cpa.h"
#endif
#ifdef PENABLE_LINUX26_CPUSETS
#include "pbs_cpuset.h"
#endif


/* External Functions */

/* External Globals */

extern char  *path_epilog;
extern char  *path_epiloguser;
extern char  *path_epilogp;
extern char  *path_epiloguserp;
extern char  *path_jobs;
extern unsigned int default_server_port;
extern tlist_head svr_alljobs, mom_polljobs;
extern int  exiting_tasks;
extern char  *msg_daemonname;
extern int  termin_child;

extern struct connection svr_conn[];
extern int  resc_access_perm;
extern char  *path_aux;

extern int   LOGLEVEL;

extern char  *PJobSubState[];
extern char  mom_host[];
extern int   PBSNodeCheckProlog;
extern int   PBSNodeCheckEpilog;


/* external prototypes */

u_long resc_used(job *, char *, u_long(*f) (resource *));
static void preobit_reply (int);
static void obit_reply (int);
extern int tm_reply (int, int, tm_event_t);
extern u_long addclient (char *);
extern void encode_used (job *, tlist_head *);
extern void encode_flagged_attrs (job *, tlist_head *);
extern void job_nodes (job *);
extern int task_recov (job *);
extern void mom_server_all_update_stat(void);
extern void check_state(int);
extern int mom_open_socket_to_jobs_server (job *, char *, void (*) (int));
extern int mark_for_resend (job *);
extern void checkpoint_partial(job *pjob);
extern void mom_checkpoint_recover(job *pjob);
extern void clear_down_mom_servers();
extern int is_mom_server_down(pbs_net_t);
extern void set_mom_server_down(pbs_net_t);
extern int no_mom_servers_down();
extern char *get_local_script_path(job *pjob, char *base);


/* END external prototypes */

/*
 * catch_child() - the signal handler for SIGCHLD.
 *
 * To keep the signal handler simple for
 * SIGCHLD  - just indicate there was one.
 */

void catch_child(

  int sig)

  {
  termin_child = 1;

  return;
  }  /* END catch_child() */


hnodent *get_node(

  job        *pjob,
  tm_node_id  nodeid)

  {
  int      i;
  vnodent *vp = pjob->ji_vnods;

  for (i = 0;i < pjob->ji_numvnod;i++, vp++)
    {
    if (vp->vn_node == nodeid)
      {
      return(vp->vn_host);
      }
    }

  return(NULL);
  }  /* END get_node() */


/**
 * For all jobs in MOM
 *   ignore job if job's pbs_server is down
 *   for all tasks in job
 *     ignore task if task state is not exiting
 *     if task is master, send kill to all sisters
 *     process TM client obits
 *   if I am sister, do sister stuff and continue
 *   kill_job
 *   contact server and register preobit_reply()
 *   set job substate to JOB_SUBSTATE_PREOBIT
 *
 * @see main_loop() - parent
 * @see scan_for_terminated()
 * @see post_epilog()
 * @see preobit_reply() - registered to handle response to preobit
 * @see send_sisters() - child
 * @see kill_job() - child
 *
 * Obit Overview:
 *  - main_loop()
 *    - scan_for_terminated()
 *       uses waitpid() to detect completed children
 *       First Pass:  catches SIGCHLD of job executable to identify when job
 *         tasks terminate, issues kill_task(), and marks job task ti_status
 *         as TI_STATE_EXITED which is detected and processed inside of
 *         scan_for_exiting()
 *       Second Pass:  catches SIGCHLD for job epilog child and exec's
 *         job's ji_mompost (post_epilog)
 *
 *    - scan_for_exiting()
 *       called after scan_for_terminated and looks at jobs to identify which
 *       have exiting tasks.  Sends kill to all sisters via send_sisters(),
 *       sets job substate to JOB_SUBSTATE_EXITING, issues kill_job, and
 *       then sets job substate to JOB_SUBSTATE_PREOBIT.  This routine then
 *       creates the preobit message and sends it to pbs_server.
 *      registers preobit_reply() as socket handler
 *
 *  - preobit_reply()
 *      o validates server response to preobit message
 *        If the server returns unknown job id (it may have been purged),
 *        then the job is deleted from the mom: mom_deljob -> job_purge,
 *        and that should be it for the job. Otherwise, we fork:
 *      - fork_me()
 *        o parent registers post_epilog in job ji_mompost attribute, sets job
 *          substate to JOB_SUBSTATE_OBIT, and registers post_epilogue handler.
 *          This handler will be invoked when the waitpid in scan_for_terminated
 *          catches a SIGCHLD for the job epilog invoked by the child.
 *        o child runs run_pelog()
 *
 *  - post_epilog()
 *     sends obit to pbs_server and registers obit_reply() as connection handler
 *
 *  - obit_reply()
 *     sets job substate to EXITED
 *     END OF JOB LIFECYCLE
 *
 *  when job completes and process id goes away scan_for_terminated()
 *
 * OVERALL FLOW:
 * - scan_for_terminating() - PHASE I
 *   - KILL TASK
 * - scan_for_exiting()
 *   - KILL SISTERS
 *   - SEND PREOBIT TO PBS_SERVER
 * - preobit_reply()
     - FORK AND EXEC EPILOG
 * - scan_for_terminating() - PHASE II
 *   - post_epilog()
 *     - SEND OBIT TO PBS_SERVER
 * - obit_reply()
 *
 * STATE TRANSITIONS:
 *   JOB_SUBSTATE_RUNNING (42)
 *   JOB_SUBSTATE_EXITING (50) - scan_for_exiting()
 *   JOB_SUBSTATE_PREOBIT (57) - scan_for_exiting()
 *   JOB_SUBSTATE_OBIT (58) - preobit_reply()
 */

void scan_for_exiting(void)

  {
  char         *id = "scan_for_exiting";

  int  found_one = 0;
  job  *nxjob;
  job  *pjob;
  task  *ptask;
  obitent *pobit;
  int  sock;
  char  *cookie;
  u_long gettime(resource *);
  u_long getsize(resource *);
  task *task_find(job *, tm_task_id);
  int im_compose(int, char *, char *, int, tm_event_t, tm_task_id);

  static int ForceObit    = -1;   /* boolean - if TRUE, ObitsAllowed will be enforced */
  static int ObitsAllowed = 1;

  int NumSisters;

  /*
  ** Look through the jobs.  Each one has it's tasks examined
  ** and if the job is EXITING, it meets it's fate depending
  ** on whether this is the Mother Superior or not.
  */

  if (LOGLEVEL >= 3)
    {
    log_record(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_SERVER,
      id,
      "searching for exiting jobs");
    }

  if (ForceObit == -1)
    {
    /* NOTE:  Allow sites to locally specify obit groupings larger than 1. */
    /*        Remove after 6/1/2008 if no further obit issues are encountered */

    char *ptr;

    if ((ptr = getenv("TORQUEFORCESEND")) != NULL)
      {
      int tmpI;

      tmpI = (int)strtol(ptr, NULL, 10);

      if (tmpI > 0)
        ObitsAllowed = tmpI;

      ForceObit = 1;
      }
    else
      {
      ForceObit = 1;
      }
    }    /* END if (ForceObit == -1) */

  clear_down_mom_servers();

  for (pjob = (job *)GET_NEXT(svr_alljobs);pjob != NULL;pjob = nxjob)
    {
    nxjob = (job *)GET_NEXT(pjob->ji_alljobs);

    /*
     * Bypass job if it is for a server that we know is down
     */

    if (is_mom_server_down(pjob->ji_qs.ji_un.ji_momt.ji_svraddr))
      {
      if (LOGLEVEL >= 3)
        {
        snprintf(log_buffer, 1024, "not checking job %s - server is down",
                 pjob->ji_qs.ji_jobid);

        log_record(
          PBSEVENT_DEBUG,
          PBS_EVENTCLASS_SERVER,
          id,
          log_buffer);
        }

      continue;
      }

    /*
    ** If a checkpoint with aborts is active,
    ** skip it.  We don't want to report any obits
    ** until we know that the whole thing worked.
    */

    if (pjob->ji_flags & MOM_CHECKPOINT_ACTIVE)
      {
      continue;
      }

    /*
    ** If the job has had an error doing a checkpoint with
    ** abort, the MOM_CHECKPOINT_POST flag will be on.
    */

    if (pjob->ji_flags & MOM_CHECKPOINT_POST)
      {
      checkpoint_partial(pjob);

      continue;
      }


    if (!(pjob->ji_wattr[(int)JOB_ATR_Cookie].at_flags & ATR_VFLAG_SET))
      {
      continue;
      }

    cookie = pjob->ji_wattr[(int)JOB_ATR_Cookie].at_val.at_str;

    /*
    ** Check each EXITED task.  They transition to DEAD here.
    */

    for (
      ptask = (task *)GET_NEXT(pjob->ji_tasks);
      ptask != NULL;
      ptask = (task *)GET_NEXT(ptask->ti_jobtask))
      {
      if (ptask->ti_qs.ti_status != TI_STATE_EXITED)
        continue;

      /*
      ** Check if it is the top shell.
      */

      if (ptask->ti_qs.ti_parenttask == TM_NULL_TASK)
        {
        /* master task is in state TI_STATE_EXITED */

        if(pjob->ji_qs.ji_un.ji_momt.ji_exitstat != JOB_EXEC_OVERLIMIT)
          {
          pjob->ji_qs.ji_un.ji_momt.ji_exitstat = ptask->ti_qs.ti_exitstat;
          }

        LOG_EVENT(
          PBSEVENT_JOB,
          PBS_EVENTCLASS_JOB,
          pjob->ji_qs.ji_jobid,
          "job was terminated");

        NumSisters = send_sisters(pjob, IM_KILL_JOB);

        if (NumSisters == 0)
          {
          /* no sisters contacted - should be a serial job */

          if (LOGLEVEL >= 3)
            {
            LOG_EVENT(
              PBSEVENT_JOB,
              PBS_EVENTCLASS_JOB,
              pjob->ji_qs.ji_jobid,
              "no sisters contacted - setting job substate to EXITING");
            }

          pjob->ji_qs.ji_substate = JOB_SUBSTATE_EXITING;

          job_save(pjob, SAVEJOB_QUICK);
          }
        else if (LOGLEVEL >= 3)
          {
          snprintf(log_buffer, 1024, "master task has exited - sent kill job request to %d sisters",
                   NumSisters);

          LOG_EVENT(
            PBSEVENT_JOB,
            PBS_EVENTCLASS_JOB,
            pjob->ji_qs.ji_jobid,
            log_buffer);
          }
        }    /* END if (ptask->ti_qs.ti_parenttask == TM_NULL_TASK) */

      /*
      ** process any TM client obits waiting.
      */

      pobit = (obitent *)GET_NEXT(ptask->ti_obits);

      while (pobit != NULL)
        {
        hnodent *pnode;

        pnode = get_node(pjob, pobit->oe_info.fe_node);

        /* see if this is me or another MOM */

        if (pjob->ji_nodeid == pnode->hn_node)
          {
          task *tp;

          /* send event to local child */

          tp = task_find(pjob, pobit->oe_info.fe_taskid);

          assert(tp != NULL);

          if (tp->ti_fd != -1)
            {
            tm_reply(tp->ti_fd, IM_ALL_OKAY, pobit->oe_info.fe_event);

            diswsi(tp->ti_fd, ptask->ti_qs.ti_exitstat);

            DIS_tcp_wflush(tp->ti_fd);
            }
          }
        else if (pnode->hn_stream != -1)
          {
          /*
          ** Send a response over to MOM
          ** whose child sent the request.
          */

          im_compose(
            pnode->hn_stream,
            pjob->ji_qs.ji_jobid,
            cookie,
            IM_ALL_OKAY,
            pobit->oe_info.fe_event,
            pobit->oe_info.fe_taskid);

          diswsi(pnode->hn_stream, ptask->ti_qs.ti_exitstat);

          rpp_flush(pnode->hn_stream);
          }

        delete_link(&pobit->oe_next);

        free(pobit);

        pobit = (obitent *)GET_NEXT(ptask->ti_obits);
        }  /* END while (pobit) */

      ptask->ti_fd = -1;

      ptask->ti_qs.ti_status = TI_STATE_DEAD;

      if (LOGLEVEL >= 3)
        {
        LOG_EVENT(
          PBSEVENT_JOB,
          PBS_EVENTCLASS_JOB,
          pjob->ji_qs.ji_jobid,
          "task is dead");
        }

      task_save(ptask);
      }  /* END for (ptask) */

    /*
    ** Look to see if the job has terminated.  If it is
    ** in any state other than EXITING continue on.
    */

    if ((pjob->ji_qs.ji_substate != JOB_SUBSTATE_EXITING) && (pjob->ji_qs.ji_substate != JOB_SUBSTATE_NOTERM_REQUE))
      {
      if (LOGLEVEL >= 3)
        {
        snprintf(log_buffer, 1024, "job is in non-exiting substate %s, no obit sent at this time",
                 PJobSubState[pjob->ji_qs.ji_substate]);

        LOG_EVENT(
          PBSEVENT_JOB,
          PBS_EVENTCLASS_JOB,
          pjob->ji_qs.ji_jobid,
          log_buffer);
        }

      if(pjob->ji_qs.ji_substate == JOB_SUBSTATE_EXITED)
        {
        /* This is quasi odd. If we are in an EXITED substate then we
           already sent the obit to the server and it replied. But
           we have not received a PBS_BATCH_DeleteJob request from the 
           server. If we have tasks to complete continue. But if there
           are no tasks left to run we need to delete the job.*/
        ptask = (task *)GET_NEXT(pjob->ji_tasks);
        if(ptask == NULL)
          mom_deljob(pjob);
        }

      continue;
      }

    /*
    ** Look to see if I am a regular sister.  If so,
    ** check to see if there is an obit event to
    ** send back to mother superior.
    ** Otherwise, I need to wait for her to send a KILL_JOB
    ** so I can send the obit (unless she died).
    */

    if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_HERE) == 0)
      {
      int stream;

      stream = (pjob->ji_hosts == NULL) ?
               -1 :
               pjob->ji_hosts[0].hn_stream;

      /*
      ** Check to see if I'm still in touch with
      ** the mother superior.  If not, I'm just going to
      ** get rid of this job.
      */

      if (stream == -1)
        {
        if (LOGLEVEL >= 3)
          {
          LOG_EVENT(
            PBSEVENT_JOB,
            PBS_EVENTCLASS_JOB,
            pjob->ji_qs.ji_jobid,
            "connection to server lost - no obit sent - job will be purged");
          }

        if(pjob->ji_qs.ji_substate != JOB_SUBSTATE_NOTERM_REQUE)
          {
          kill_job(pjob, SIGKILL, id, "connection to server lost - no obit sent");
          }

        job_purge(pjob);

        continue;
        }

      /*
      ** No event waiting for sending info to MS
      ** so I'll just sit tight.
      */

      if (pjob->ji_obit == TM_NULL_EVENT)
        {
        if (LOGLEVEL >= 3)
          {
          LOG_EVENT(
            PBSEVENT_JOB,
            PBS_EVENTCLASS_JOB,
            pjob->ji_qs.ji_jobid,
            "obit method not specified for job - no obit sent");
          }

        continue;
        }

      /*
      ** Check to see if any tasks are running.
      */

      ptask = (task *)GET_NEXT(pjob->ji_tasks);

      while (ptask != NULL)
        {
        if (ptask->ti_qs.ti_status == TI_STATE_RUNNING)
          break;

        ptask = (task *)GET_NEXT(ptask->ti_jobtask);
        }

      /* Still somebody there so don't send it yet.  */

      if (ptask != NULL)
        {
        if (LOGLEVEL >= 3)
          {
          LOG_EVENT(
            PBSEVENT_JOB,
            PBS_EVENTCLASS_JOB,
            pjob->ji_qs.ji_jobid,
            "one or more running tasks found - no obit sent");
          }

        continue;
        }

      if ((pjob->ji_wattr[(int)JOB_ATR_interactive].at_flags & ATR_VFLAG_SET) &&
          pjob->ji_wattr[(int)JOB_ATR_interactive].at_val.at_long)
        {

        if (run_pelog(PE_EPILOGUSER, path_epiloguserp, pjob, PE_IO_TYPE_NULL) != 0)
          {
          log_err(-1, id, "user parallel epilog failed");
          }

        if (run_pelog(PE_EPILOG, path_epilogp, pjob, PE_IO_TYPE_NULL) != 0)
          {
          log_err(-1, id, "parallel epilog failed");
          }
        }
      else
        {

        if (run_pelog(PE_EPILOGUSER, path_epiloguserp, pjob, PE_IO_TYPE_STD) != 0)
          {
          log_err(-1, id, "parallel user epilog failed");
          }

        if (run_pelog(PE_EPILOG, path_epilogp, pjob, PE_IO_TYPE_STD) != 0)
          {
          log_err(-1, id, "parallel epilog failed");
          }
        }

      /*
      ** No tasks running ... format and send a
      ** reply to the mother superior and get rid of
      ** the job.
      */

      im_compose(
        stream,
        pjob->ji_qs.ji_jobid,
        cookie,
        IM_ALL_OKAY,
        pjob->ji_obit,
        TM_NULL_TASK);

      diswul(stream, resc_used(pjob, "cput", gettime));

      diswul(stream, resc_used(pjob, "mem", getsize));

      diswul(stream, resc_used(pjob, "vmem", getsize));

      rpp_flush(stream);

      if (LOGLEVEL >= 6)
        {
        LOG_EVENT(
          PBSEVENT_JOB,
          PBS_EVENTCLASS_JOB,
          pjob->ji_qs.ji_jobid,
          "all tasks complete - purging job as sister");
        }

      DBPRT(("all tasks complete - purging job as sister (%s)\n",

             pjob->ji_qs.ji_jobid));

      job_purge(pjob);

      continue;
      }  /* END if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_HERE) == 0) */

    /*
     * At this point, we know we are Mother Superior for this
     * job which is EXITING.  Time for it to die.
     */

    pjob->ji_qs.ji_svrflags &= ~JOB_SVFLG_Suspend;

    if(pjob->ji_qs.ji_substate != JOB_SUBSTATE_NOTERM_REQUE)
      kill_job(pjob, SIGKILL, id, "local task termination detected");
    else
      {
      ptask = (task *)GET_NEXT(pjob->ji_tasks);

      while (ptask != NULL)
        {
        if (ptask->ti_qs.ti_status == TI_STATE_RUNNING)
          {
          if (LOGLEVEL >= 4)
            {
            log_record(
              PBSEVENT_JOB,
              PBS_EVENTCLASS_JOB,
              pjob->ji_qs.ji_jobid,
              "kill_job found a task to kill");
            }

          if (pjob->ji_qs.ji_un.ji_momt.ji_exitstat != 0)
            ptask->ti_qs.ti_exitstat = pjob->ji_qs.ji_un.ji_momt.ji_exitstat;
          else
            ptask->ti_qs.ti_exitstat = 0;  /* assume successful completion */
          ptask->ti_qs.ti_status   = TI_STATE_EXITED;

          task_save(ptask);
          }

        ptask = (task *)GET_NEXT(ptask->ti_jobtask);
        }  /* END while (ptask != NULL) */

      }

#ifdef ENABLE_CPA
    if (CPADestroyPartition(pjob) != 0)
      continue;
#endif

    delete_link(&pjob->ji_jobque); /* unlink for poll list */

    /*
     * +  Open connection to the Server (for the Job Obituary)
     * +  Set the connection to call obit_reply when the reply
     *    arrives.
     * +  fork child process, parent looks for more terminated jobs.
     * Child:
     * +  Run the epilogue script (if one)
     * +  Send the Job Obit Request (notice).
     */

    sock = mom_open_socket_to_jobs_server(pjob, id, preobit_reply);

    if (sock < 0)
      {
      if ((errno == EINPROGRESS) || (errno == ETIMEDOUT) || (errno == EINTR))
        {
        sprintf(log_buffer, "connect to server unsuccessful after 5 seconds - will retry");
        }

      /*
       * continue through the jobs loop since we can have jobs for multiple
       * servers.  Keep track that this server is down so we don't try to
       * process any more jobs for it. We will leave it's exiting_tasks set
       * so Mom will retry Obit when server is available
       */

      set_mom_server_down(pjob->ji_qs.ji_un.ji_momt.ji_svraddr);

      continue;
      }  /* END if (sock < 0) */

    if (LOGLEVEL >= 2)
      {
      log_record(
        PBSEVENT_DEBUG,
        PBS_EVENTCLASS_JOB,
        pjob->ji_qs.ji_jobid,
        "sending preobit jobstat");
      }

    pjob->ji_qs.ji_substate = JOB_SUBSTATE_PREOBIT;

#ifdef TREMOVEME

    if (ForceObit == 0)
      {
      if (found_one++ >= ObitsAllowed)
        {
        /* do not exceed max obits per iteration limit */

        break;
        }
      }

#endif /* TREMOVEME */

    /* send the pre-obit job stat request */

    DIS_tcp_setup(sock);

    if (encode_DIS_ReqHdr(sock, PBS_BATCH_StatusJob, pbs_current_user) ||
        encode_DIS_Status(sock, pjob->ji_qs.ji_jobid, NULL) ||
        encode_DIS_ReqExtend(sock, NULL))
      {
      /* FAILURE */

      log_record(
        PBSEVENT_DEBUG,
        PBS_EVENTCLASS_JOB,
        pjob->ji_qs.ji_jobid,
        "failed creating preobit message");

      return;
      }

    DIS_tcp_wflush(sock);

    if (found_one++ >= ObitsAllowed)
      {
      /* do not exceed max obits per iteration limit */

      break;
      }
    }  /* END for (pjob) */

  if ((pjob == NULL) && (no_mom_servers_down()))
    {
    /* search finished */

    exiting_tasks = 0; /* went through all jobs */
    }

  return;
  }  /* END scan_for_exiting() */


/**
 * Send obit to server.
 *
 * @see scan_for_terminated() - calls post_epilog() via ji_mompost job attribute
 * @see mom_open_socket_to_jobs_server() - child
 * @see obit_reply() - registered handler for obit connection
 *
 * @see scan_for_exiting() for Obit overview
 */

int post_epilogue(

  job *pjob,  /* I */
  int  ev)    /* I exit value (only used to determine if retrying obit) */

  {
  char id[] = "post_epilogue";

  int sock;

  struct batch_request *preq;

  if (LOGLEVEL >= 2)
    {
    sprintf(log_buffer, "preparing obit message for job %s",
            pjob->ji_qs.ji_jobid);

    LOG_EVENT(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_REQUEST,
      id,
      log_buffer);
    }

  /* open new connection - register obit_reply as handler */

  sock = mom_open_socket_to_jobs_server(pjob, id, obit_reply);

  if (sock < 0)
    {
    /* FAILURE */

    if ((errno == EINTR) || (errno == ETIMEDOUT) || (errno == EINPROGRESS))
      {
      /* transient failure - server/network up but busy... retry */

      int retrycount;

      for (retrycount = 0;retrycount < 2;retrycount++)
        {
        sock = mom_open_socket_to_jobs_server(pjob, id, obit_reply);

        if (sock >= 0)
          break;
        }  /* END for (retrycount) */
      }

    if (sock < 0)
      {
      /* We are trying to send obit, but failed - where is this retried?
       * Answer: In the main_loop examine_all_jobs_to_resend() tries
       * every so often to send the obit.  This would work for recovered
       * jobs also.
       */
      if (ev != MOM_OBIT_RETRY)
        {
        mark_for_resend(pjob);
        }

      return(1);
      }
    }

  /* send the job obiturary notice to the server */

  preq = alloc_br(PBS_BATCH_JobObit);

  if (preq == NULL)
    {
    /* FAILURE */

    sprintf(log_buffer, "cannot allocate memory for obit message");

    LOG_EVENT(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_REQUEST,
      id,
      log_buffer);

    return(1);
    }

  CLEAR_HEAD(preq->rq_ind.rq_jobobit.rq_attr);

  resc_access_perm = ATR_DFLAG_RDACC;

  encode_used(pjob, &preq->rq_ind.rq_jobobit.rq_attr);

  encode_flagged_attrs(pjob, &preq->rq_ind.rq_jobobit.rq_attr);

  strcpy(preq->rq_ind.rq_jobobit.rq_jid, pjob->ji_qs.ji_jobid);

  if (pjob->ji_job_is_being_rerun)
    {
    pjob->ji_qs.ji_un.ji_momt.ji_exitstat = 0;
    }

  preq->rq_ind.rq_jobobit.rq_status = pjob->ji_qs.ji_un.ji_momt.ji_exitstat;

  if (LOGLEVEL > 5)
    {
    sprintf(log_buffer, "job id %s exit status %d",
                preq->rq_ind.rq_jobobit.rq_jid,
                preq->rq_ind.rq_jobobit.rq_status);
    LOG_EVENT(
       PBSEVENT_DEBUG,
       PBSEVENT_JOB,
       id,
       log_buffer);
    }


  DIS_tcp_setup(sock);

  if (encode_DIS_ReqHdr(sock, PBS_BATCH_JobObit, pbs_current_user) ||
      encode_DIS_JobObit(sock, preq) ||
      encode_DIS_ReqExtend(sock, 0))
    {
    /* FAILURE */

    sprintf(log_buffer, "cannot create obit message for job %s",
            pjob->ji_qs.ji_jobid);

    LOG_EVENT(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_REQUEST,
      id,
      log_buffer);

    close(sock);

    free_br(preq);

    return(1);
    }

  DIS_tcp_wflush(sock);  /* does flush close sock? */

  free_br(preq);

  /* SUCCESS */

  /* Who closes sock and unsets pjob->ji_momhandle?
   * Answer: This gets done in the message reply handler, obit_reply.
   */

  log_record(
    PBSEVENT_DEBUG,
    PBS_EVENTCLASS_JOB,
    pjob->ji_qs.ji_jobid,
    "obit sent to server");

  return(0);
  }  /* END post_epilog() */


/**
 * preobit_reply
 *
 * @see scan_for_exiting() - registers this routine as handler
 * @see mom_deljob() - child
 * @see run_pelog() - child
 *
 * This function is a message handler that is hooked to a server connection.
 * The connection is established in scan_for_exiting() where all jobs
 * are examined.  A socket connection to the server is opened, an obit
 * message is sent to the server, and then at some later time, the server
 * sends back a reply and we end up here.
 *
 * What is the correct response if an EOF is detected?
 */

static void preobit_reply(

  int sock)  /* I */

  {
  char id[] = "preobit_reply";

  pid_t cpid;
  job *pjob;
  int irtn;

  struct batch_request *preq;

  struct brp_status    *pstatus;
  svrattrl             *sattrl;
  int  runepilogue = 0;
  int  deletejob = 0;
  int  jobiscorrupt = 0;

  char *path_epiloguserjob;
  resource *presc;

  /* struct batch_status *bsp = NULL; */

  log_record(
    PBSEVENT_DEBUG,
    PBS_EVENTCLASS_SERVER,
    id,
    "top of preobit_reply");

  /* read and decode the reply */

  preq = alloc_br(PBS_BATCH_StatusJob);

  CLEAR_HEAD(preq->rq_ind.rq_status.rq_attr);

  while ((irtn = DIS_reply_read(sock, &preq->rq_reply)) &&
         (errno == EINTR));

  if (irtn != 0)
    {
    sprintf(log_buffer, "DIS_reply_read/decode_DIS_replySvr failed, rc=%d sock=%d",
            irtn,
            sock);

    /* NOTE:  irtn=11 indicates EOF */

    /* NOTE:  errno not set, thus log_err say success in spite of failure */

    log_err(errno, id, log_buffer);

    preq->rq_reply.brp_code = -1;
    }
  else
    {
    log_record(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_SERVER,
      id,
      "DIS_reply_read/decode_DIS_replySvr worked, top of while loop");
    }

  /* find the job that triggered this req */

  pjob = (job *)GET_NEXT(svr_alljobs);

  while (pjob != NULL)
    {
    if ((pjob->ji_qs.ji_substate == JOB_SUBSTATE_PREOBIT) &&
        (pjob->ji_momhandle == sock))
      {
      /* located job that triggered req from server */

      break;
      }

    pjob = (job *)GET_NEXT(pjob->ji_alljobs);
    }  /* END while (pjob != NULL) */

  if (pjob == NULL)
    {
    /* FAILURE - cannot locate job that triggered req */

    log_record(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_SERVER,
      id,
      "cannot locate job that triggered req");

    free_br(preq);

    shutdown(sock, SHUT_RDWR);

    close_conn(sock);

    return;
    }  /* END if (pjob != NULL) */

  /* we've got a job in PREOBIT and matches the socket, now
     inspect the results of the job stat */

  switch (preq->rq_reply.brp_code)
    {

    case PBSE_CLEANEDOUT:

    case PBSE_UNKJOBID:

      /* this is the simple case of the job being purged from the server */

      sprintf(log_buffer,
              "preobit_reply, unknown on server, deleting locally");

      deletejob = 1;

      break;  /* not reached */

    case PBSE_NONE:

      log_record(
        PBSEVENT_DEBUG,
        PBS_EVENTCLASS_SERVER,
        id,
        "in while loop, no error from job stat");

      if (preq->rq_reply.brp_choice == BATCH_REPLY_CHOICE_Status)
        {
        pstatus = (struct brp_status *)GET_NEXT(preq->rq_reply.brp_un.brp_status);
        }
      else
        {
        sprintf(log_buffer, "BUG: preq->rq_reply.brp_choice==%d",
                preq->rq_reply.brp_choice);

        break;
        }

      if (pstatus == NULL)
        {
        sprintf(log_buffer, "BUG: pstatus==NULL");

        break;
        }

      if (strcmp(pstatus->brp_objname, pjob->ji_qs.ji_jobid))
        {
        sprintf(log_buffer,
                "BUG: mismatched jobid in preobit_reply (%s != %s)",
                pstatus->brp_objname, pjob->ji_qs.ji_jobid);

        break;
        }

      /* determine if job has exechost set - if set, and task 0 host is X ... */

      sattrl = (svrattrl *)GET_NEXT(pstatus->brp_attr);

      jobiscorrupt = 1;

      while (sattrl != NULL)
        {
        if (!strcmp(sattrl->al_name, ATTR_exechost))
          {
          jobiscorrupt = 0;

          if (strncmp(
                sattrl->al_value,
                pjob->ji_hosts[0].hn_host,
                strlen(pjob->ji_hosts[0].hn_host)))
            {
            /* the job was re-run elsewhere */

            sprintf(log_buffer, "first host DOES NOT match me: %s != %s",
                    sattrl->al_value,
                    pjob->ji_hosts[0].hn_host);

            deletejob = 1;
            }
          else
            {
            /* job was run locally */

            runepilogue = 1;
            }

          break;
          }

        sattrl = (svrattrl *)GET_NEXT(sattrl->al_link);
        }  /* END while (sattrl != NULL) */

      if (jobiscorrupt == 1)
        {
        /* runepilogue = 1; */
        }

      break;

    case - 1:

      sprintf(log_buffer,
              "EOF? received attempting to process obit reply");

      break;

    default:

      /* not sure what happened */

      sprintf(log_buffer,
              "something bad happened: %d",
              preq->rq_reply.brp_code);

      break;
    }  /* END switch (preq->rq_reply.brp_code) */

  /* we've inspected the server's response and can now act */

  free_br(preq);

  shutdown(sock, SHUT_RDWR);

  close_conn(sock);

  if (deletejob == 1)
    {
    log_record(
      PBSEVENT_ERROR,
      PBS_EVENTCLASS_JOB,
      pjob->ji_qs.ji_jobid,
      log_buffer);

    if (!(pjob->ji_wattr[(int)JOB_ATR_interactive].at_flags & ATR_VFLAG_SET) ||
        (pjob->ji_wattr[(int)JOB_ATR_interactive].at_val.at_long == 0))
      {
      int x; /* dummy */

      /* do this if not interactive */
      job_unlink_file(pjob, std_file_name(pjob, StdOut, &x));
      job_unlink_file(pjob, std_file_name(pjob, StdErr, &x));
      job_unlink_file(pjob, std_file_name(pjob, Checkpoint, &x));
      }

    mom_deljob(pjob);

    return;
    }

  if (!runepilogue)
    {
    log_record(
      PBSEVENT_ERROR,
      PBS_EVENTCLASS_JOB,
      pjob->ji_qs.ji_jobid,
      log_buffer);

    pjob->ji_qs.ji_substate = JOB_SUBSTATE_EXITING;
    pjob->ji_momhandle = -1;
    exiting_tasks = 1;  /* job exit will be picked up again */

    return;
    }

  /* at this point, server gave us a valid response so we can run epilogue */

  if (LOGLEVEL >= 2)
    {
    log_record(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_JOB,
      pjob->ji_qs.ji_jobid,
      "performing job clean-up in preobit_reply()");
    }

  cpid = fork_me(-1);

  if (cpid < 0)
    {
    /* FAILURE */

    log_record(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_JOB,
      pjob->ji_qs.ji_jobid,
      "fork failed in preobit_reply");

    return;
    }

  if (cpid > 0)
    {
    /* parent - mark that job epilog subtask has been launched */

    /* NOTE:  pjob->ji_mompost will be executed in scan_for_terminated() */

    pjob->ji_qs.ji_substate = JOB_SUBSTATE_OBIT;
    pjob->ji_momsubt = cpid;
    pjob->ji_mompost = post_epilogue;
    pjob->ji_momhandle = -1;

    if (LOGLEVEL >= 2)
      {
      snprintf(log_buffer, 1024, "epilog subtask created with pid %d - substate set to JOB_SUBSTATE_OBIT - registered post_epilogue",
               cpid);

      log_record(
        PBSEVENT_DEBUG,
        PBS_EVENTCLASS_JOB,
        pjob->ji_qs.ji_jobid,
        log_buffer);
      }

    return;
    }

  /* child */

  /* check epilog script */

  if ((pjob->ji_wattr[(int)JOB_ATR_interactive].at_flags & ATR_VFLAG_SET) &&
      pjob->ji_wattr[(int)JOB_ATR_interactive].at_val.at_long)
    {
    /* job is interactive */

    presc = find_resc_entry(
          &pjob->ji_wattr[(int)JOB_ATR_resource],
          find_resc_def(svr_resc_def, "epilogue", svr_resc_size));
    if((presc != NULL))
      if((presc->rs_value.at_flags & ATR_VFLAG_SET) && (presc->rs_value.at_val.at_str != NULL))
        {
        path_epiloguserjob = get_local_script_path(pjob, presc->rs_value.at_val.at_str);
        if(path_epiloguserjob)
          {
          if (run_pelog(PE_EPILOGUSERJOB, path_epiloguserjob, pjob, PE_IO_TYPE_NULL) != 0)
            {
            log_err(-1, id, "user local epilog failed");
            }
          free(path_epiloguserjob);
          }
        }


    if (run_pelog(PE_EPILOGUSER, path_epiloguser, pjob, PE_IO_TYPE_NULL) != 0)
      {
      log_err(-1, id, "user epilog failed - interactive job");
      }

    if (run_pelog(PE_EPILOG, path_epilog, pjob, PE_IO_TYPE_NULL) != 0)
      {
      log_err(-1, id, "system epilog failed - interactive job");
      }
    }
  else
    {
    /* job is not interactive */

    int rc;

    presc = find_resc_entry(
          &pjob->ji_wattr[(int)JOB_ATR_resource],
          find_resc_def(svr_resc_def, "epilogue", svr_resc_size));
    if((presc != NULL))
      if((presc->rs_value.at_flags & ATR_VFLAG_SET) && (presc->rs_value.at_val.at_str != NULL))
        {
        path_epiloguserjob = get_local_script_path(pjob, presc->rs_value.at_val.at_str);

        if(path_epiloguserjob)
          {
          if (run_pelog(PE_EPILOGUSERJOB, path_epiloguserjob, pjob, PE_IO_TYPE_STD) != 0)
            {
            log_err(-1, id, "user local epilog failed");
            }
          free(path_epiloguserjob);
          }

        }

    if (run_pelog(PE_EPILOGUSER, path_epiloguser, pjob, PE_IO_TYPE_STD) != 0)
      {
      log_err(-1, id, "user epilog failed");
      }

    if ((rc = run_pelog(PE_EPILOG, path_epilog, pjob, PE_IO_TYPE_STD)) != 0)
      {
      sprintf(log_buffer, "system epilog failed w/rc=%d",
              rc);

      log_err(-1, id, log_buffer);
      }
    }    /* END else (jobisinteractive) */

  exit(0);
  }  /* END preobit_reply() */


/*
 * obit_reply
 *
 * This function is a message handler that is hooked to a server connection.
 * The connection is established in post_epilogue().
 *
 * A socket connection to the server is opened, a job obiturary notice
 * message is sent to the server, and then at some later time, the server
 * sends back a reply and we end up here.
 *
 * On success, this routine sets the job's substate to EXITED
 *
 * @see post_epilogue() - registers obit_reply via add_conn()
 */


static void obit_reply(

  int sock)  /* I */

  {
  int    irtn;
  job   *nxjob;
  job   *pjob;
  attribute  *pattr;

  struct batch_request *preq;
  int    x; /* dummy */

  /* read and decode the reply */

  preq = alloc_br(PBS_BATCH_JobObit);

  CLEAR_HEAD(preq->rq_ind.rq_jobobit.rq_attr);

  while ((irtn = DIS_reply_read(sock, &preq->rq_reply)) &&
         (errno == EINTR));

  if (irtn != 0)
    {
    /* NOTE:  irtn is of type DIS_* in include/dis.h, see dis_emsg[] */

    sprintf(log_buffer, "DIS_reply_read failed, rc=%d sock=%d",
            irtn,
            sock);

    log_err(errno, "obit_reply", log_buffer);

    preq->rq_reply.brp_code = -1;
    }

  /* find the job associated with the reply by the socket number */
  /* saved in the job structure, ji_momhandle */

  pjob = (job *)GET_NEXT(svr_alljobs);

  while (pjob != NULL)
    {
    nxjob = (job *)GET_NEXT(pjob->ji_alljobs);

    if ((pjob->ji_qs.ji_substate == JOB_SUBSTATE_OBIT) &&
        (pjob->ji_momhandle == sock))
      {
      /* Clear out destination so we know job is not on mom any more */

      pjob->ji_qs.ji_destin[0] = '\0';

      switch (preq->rq_reply.brp_code)
        {

        case PBSE_NONE:

          /* normal ack, mark job as exited */

          pjob->ji_qs.ji_substate = JOB_SUBSTATE_EXITED;

          job_save(pjob, SAVEJOB_QUICK);

          if (LOGLEVEL >= 4)
            {
            LOG_EVENT(
              PBSEVENT_ERROR,
              PBS_EVENTCLASS_JOB,
              pjob->ji_qs.ji_jobid,
              "job obit acknowledge received - substate set to JOB_SUBSTATE_EXITED");
            }

          break;

        case PBSE_ALRDYEXIT:

          /* have already told the server before recovery */
          /* the server will contact us to continue       */

          if (LOGLEVEL >= 7)
            {
            log_record(
              PBSEVENT_ERROR,
              PBS_EVENTCLASS_JOB,
              pjob->ji_qs.ji_jobid,
              "setting already exited job substate to EXITED");
            }

          pjob->ji_qs.ji_substate = JOB_SUBSTATE_EXITED;

          job_save(pjob, SAVEJOB_QUICK);

          break;

        case PBSE_CLEANEDOUT:

          /* all jobs discarded by server, discard job */

          pattr = &pjob->ji_wattr[(int)JOB_ATR_interactive];

          if (((pattr->at_flags & ATR_VFLAG_SET) == 0) ||
              (pattr->at_val.at_long == 0))
            {
            /* do this if not interactive */

            job_unlink_file(pjob, std_file_name(pjob, StdOut, &x));
            job_unlink_file(pjob, std_file_name(pjob, StdErr, &x));
            job_unlink_file(pjob, std_file_name(pjob, Checkpoint, &x));
            }

          mom_deljob(pjob);

          break;

        case - 1:

          /* FIXME - causes epilogue to be run twice! */

          pjob->ji_qs.ji_substate = JOB_SUBSTATE_EXITING;

          exiting_tasks = 1;

          break;

        default:

          {
          char tmpLine[1024];

          switch (preq->rq_reply.brp_code)
            {

            case PBSE_BADSTATE:

              sprintf(tmpLine, "server rejected job obit - unexpected job state");

              break;

            case PBSE_SYSTEM:

              sprintf(tmpLine, "server rejected job obit - server not ready for job completion");

              break;

            default:

              sprintf(tmpLine, "server rejected job obit - %d",
                      preq->rq_reply.brp_code);

              break;
            }  /* END switch (preq->rq_reply.brp_code) */

          log_ext(-1,"obit_reply",tmpLine,LOG_ALERT);

          LOG_EVENT(
            PBSEVENT_ERROR,
            PBS_EVENTCLASS_JOB,
            pjob->ji_qs.ji_jobid,
            tmpLine);
          }  /* END BLOCK */

        mom_deljob(pjob);

        break;
        }  /* END switch (preq->rq_reply.brp_code) */

      break;
      }    /* END if (...) */

    pjob = nxjob;
    }  /* END while (pjob != NULL) */

  if (pjob == NULL)
    {
    LOG_EVENT(
      PBSEVENT_ERROR,
      PBS_EVENTCLASS_REQUEST,
      "obit reply",
      "Job not found for obit reply");
    }

  free_br(preq);

  shutdown(sock, 2);

  close_conn(sock);

  if (PBSNodeCheckEpilog)
    {
    check_state(1);

    mom_server_all_update_stat();
    }

  return;
  }  /* END obit_reply() */


/*
 * init_abort_jobs - on mom initialization, recover all running jobs.
 *
 * Called on initialization
 *    If the -p option was given (default) (recover = JOB_RECOV_RUNNING), Mom will allow the jobs
 *    to continue to run.   She depends on detecting when they terminate
 *    via the slow poll method rather than SIGCHLD.
 *
 *    If the -r option was given (recover = JOB_RECOV_TERM_REQUE), MOM is
 *    recovering on a running system and the session id of the jobs should be valid;
 *    the job processes are killed and the job is re-queued
 *
 *    If -q was given (recover = JOB_RECOV_RQUE), it is assumed that the whole
 *    system, not just MOM, is coming up, the session ids are not valid;
 *    so no attempt is made to kill the job processes.  But the jobs are
 *    terminated and requeued.
 * 
 *    If the -P option was given (recover == JOB_RECOV_DELETE), no attempt is
 *    made to recover the jobs. The jobs are deleted from the queue.
 */

void init_abort_jobs(

  int recover)  /* I (boolean) */

  {
  char          *id = "init_abort_jobs";

  DIR  *dir;
  int            i;
  int            j;
  int            sisters, rc;

  struct dirent *pdirent;
  job  *pj;
  char  *job_suffix = JOB_FILE_SUFFIX;
  int            job_suf_len = strlen(job_suffix);
  char  *psuffix;

  if (LOGLEVEL >= 6)
    {
    sprintf(log_buffer, "%s: recover=%d",
            id,
            recover);

    log_record(
      PBSEVENT_ERROR,
      PBS_EVENTCLASS_SERVER,
      msg_daemonname,
      log_buffer);
    }

  dir = opendir(path_jobs);

  if (dir == NULL)
    {
    sprintf(log_buffer, "cannot open job directory '%s'",
            path_jobs);

    log_record(
      PBSEVENT_ERROR,
      PBS_EVENTCLASS_SERVER,
      msg_daemonname,
      log_buffer);

    exit(1);
    }

  while ((pdirent = readdir(dir)) != NULL)
    {
    if ((i = strlen(pdirent->d_name)) <= job_suf_len)
      continue;

    psuffix = pdirent->d_name + i - job_suf_len;

    if (strcmp(psuffix, job_suffix))
      continue;

    pj = job_recov(pdirent->d_name);

    if (pj == NULL)
      {
      sprintf(log_buffer, "%s: NULL job pointer",
              id);

      log_record(
        PBSEVENT_ERROR,
        PBS_EVENTCLASS_SERVER,
        msg_daemonname,
        log_buffer);

      continue;
      }
     
    /* code moved to here because even when we're canceling jobs, if there is a 
     * user epilogue we'll attempt to become the user, so if ji_grpcache is 
     * NULL then we'll get a crash */
    if (pj->ji_grpcache == NULL)
      {
      DBPRT(("init_abort_jobs: setting grpcache for job %s\n",
        pj->ji_qs.ji_jobid));
      
      if (check_pwd(pj) == NULL)
        {
        /* somehow a job that was legally executing (had a password entry)
         * no longer has a password entry?? */
        snprintf(log_buffer, sizeof(log_buffer),
          "job %s no longer has valid password entry - deleting",
          pj->ji_qs.ji_jobid);
        
        log_err(-1, id, log_buffer);
        
        mom_deljob(pj);

        continue;
        }
      }

    /* PW:  mpiexec patch - set the globid so mom does not coredump in response to tm_spawn */

    set_globid(pj, NULL);

    append_link(&svr_alljobs, &pj->ji_alljobs, pj);

    job_nodes(pj);

    rc = task_recov(pj);

    if (LOGLEVEL >= 2)
      {
      sprintf(log_buffer, "task recovery %s for job %s, rc=%d",
              (rc == 0) ? "succeeded" : "failed",
              pj->ji_qs.ji_jobid,
              rc);

      log_record(
        PBSEVENT_DEBUG,
        PBS_EVENTCLASS_JOB,
        id,
        log_buffer);
      }


    mom_checkpoint_recover(pj);

    /*
     * make sure we trust connections from sisters in case we get an
     * IM request before we get the real addr list from server.
     * Note: this only works after the job_nodes() call above.
     */

    for (j = 0;j < pj->ji_numnodes;j++)
      {
      if (LOGLEVEL >= 6)
        {
        sprintf(log_buffer, "%s: adding client %s",
                id,
                pj->ji_hosts[j].hn_host);

        log_record(
          PBSEVENT_ERROR,
          PBS_EVENTCLASS_SERVER,
          msg_daemonname,
          log_buffer);
        }

      addclient(pj->ji_hosts[j].hn_host);
      }  /* END for (j) */

    if (LOGLEVEL >= 4)
      {
      sprintf(log_buffer, "successfully recovered job %s",
              pj->ji_qs.ji_jobid);

      log_record(
        PBSEVENT_DEBUG,
        PBS_EVENTCLASS_JOB,
        id,
        log_buffer);
      }

    if ((recover != JOB_RECOV_RUNNING) && (recover != JOB_RECOV_DELETE) &&
        ((pj->ji_qs.ji_substate == JOB_SUBSTATE_RUNNING) ||
         (pj->ji_qs.ji_substate == JOB_SUBSTATE_PRERUN) ||
         (pj->ji_qs.ji_substate == JOB_SUBSTATE_SUSPEND) ||
         (pj->ji_qs.ji_substate == JOB_SUBSTATE_EXITED) ||
         (pj->ji_qs.ji_substate == JOB_SUBSTATE_NOTERM_REQUE) ||
         (pj->ji_qs.ji_substate == JOB_SUBSTATE_EXITING)))
      {
      if (LOGLEVEL >= 2)
        {
        sprintf(log_buffer, "job %s recovered in active state %s (full recover not enabled)",
                pj->ji_qs.ji_jobid,
                PJobSubState[pj->ji_qs.ji_substate]);

        log_record(
          PBSEVENT_DEBUG,
          PBS_EVENTCLASS_JOB,
          id,
          log_buffer);
        }

      if (recover == JOB_RECOV_TERM_REQUE) /* -r option was used to start mom */
        {
        kill_job(pj, SIGKILL, id, "recover is non-zero");
        }

      /*
      ** Check to see if I am Mother Superior.  The
      ** JOB_SVFLG_HERE flag is overloaded for MOM
      ** for this purpose.
      ** If I'm an ordinary sister, just throw the job
      ** away.  If I am MS, send a KILL_JOB request to
      ** any sisters that happen to still be alive.
      */

      if ((pj->ji_qs.ji_svrflags & JOB_SVFLG_HERE) == 0)
        {
        if (LOGLEVEL >= 2)
          {
          sprintf(log_buffer, "local host is not mother-superior, deleting job %s",
                  pj->ji_qs.ji_jobid);

          log_record(
            PBSEVENT_DEBUG,
            PBS_EVENTCLASS_JOB,
            id,
            log_buffer);
          }

        mom_deljob(pj);

        continue;
        }

      if (LOGLEVEL >= 2)
        {
        sprintf(log_buffer, "setting job state to exiting for job %s in state %s",
                pj->ji_qs.ji_jobid,
                PJobSubState[pj->ji_qs.ji_substate]);

        log_record(
          PBSEVENT_DEBUG,
          PBS_EVENTCLASS_JOB,
          id,
          log_buffer);
        }

      /* set exit status to:
       *   JOB_EXEC_INITABT - init abort and no checkpoint
       *   JOB_EXEC_INITRST - init and checkpoint, no mig
       *   JOB_EXEC_INITRMG - init and checkpoint, migrate
       * to indicate recovery abort
       */

      if (pj->ji_qs.ji_svrflags & (JOB_SVFLG_CHECKPOINT_FILE | JOB_SVFLG_CHECKPOINT_MIGRATEABLE))
        {
#if PBS_CHKPT_MIGRATE

        pj->ji_qs.ji_un.ji_momt.ji_exitstat = JOB_EXEC_INITRMG;
#else
        pj->ji_qs.ji_un.ji_momt.ji_exitstat = JOB_EXEC_INITRST;
#endif
        }
      else
        {
        pj->ji_qs.ji_un.ji_momt.ji_exitstat = JOB_EXEC_INITABT;
        }

      sisters = pj->ji_numnodes - 1;

      /*
      ** A sisterhood exists... send a KILL request.
      */

      if (sisters > 0)
        {
        DBPRT(("init_abort_jobs: Sending to sisters\n"))

        pj->ji_resources = (noderes *)calloc(sisters, sizeof(noderes));

        send_sisters(pj, IM_KILL_JOB);

        continue;
        }

      /* If mom was initialized with a -r any running processes have already
         been killed. We set substate to JOB_SUBSTATE_NOTERM_REQUE so scan_for_exiting
         will not try to kill the running processes for this job */
      pj->ji_qs.ji_substate = JOB_SUBSTATE_NOTERM_REQUE;

      job_save(pj, SAVEJOB_QUICK);

      exiting_tasks = 1;
      }  /* END if ((recover != 2) && ...) */
    else if (recover == JOB_RECOV_RUNNING || recover == JOB_RECOV_DELETE)
      {
      /*
       * add: 8/11/03 David.Singleton@anu.edu.au
       *
       * Lots of job structure components need to be
       * initialized if we are leaving this job
       * running,  this is just a few.
       * Modified to accomodate JOB_RECOV_DELETE option
       * 01/13/2009 Ken Nielson knielson@adaptivecomputing.com
       */

      if (LOGLEVEL >= 2 && recover == JOB_RECOV_RUNNING)
        {
        sprintf(log_buffer, "attempting to recover job %s in state %s",
                pj->ji_qs.ji_jobid,
                PJobSubState[pj->ji_qs.ji_substate]);

        log_record(
          PBSEVENT_DEBUG,
          PBS_EVENTCLASS_JOB,
          id,
          log_buffer);
        }

      sisters = pj->ji_numnodes - 1;

      if (sisters > 0)
        pj->ji_resources = (noderes *)calloc(sisters, sizeof(noderes));

      if ((sisters > 0) && (recover == JOB_RECOV_RUNNING))
        append_link(&mom_polljobs, &pj->ji_jobque, pj);
      }
    }    /* while ((pdirent = readdir(dir)) != NULL) */

  closedir(dir);

  return;
  }  /* END init_abort_jobs() */


/*
 * mom_deljob - delete the job entry, MOM no longer knows about the job
 */

void mom_deljob(

  job *pjob)  /* I (modified) */

  {

#ifdef _CRAY
  /* remove any temporary directories */

  rmtmpdir(pjob->ji_qs.ji_jobid);
#endif /* _CRAY */

  if (LOGLEVEL >= 3)
    {
    sprintf(log_buffer, "deleting job %s in state %s",
            pjob->ji_qs.ji_jobid,
            PJobSubState[pjob->ji_qs.ji_substate]);

    log_record(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_JOB,
      pjob->ji_qs.ji_jobid,
      log_buffer);
    }

  job_purge(pjob);

  return;
  }  /* END mom_deljob() */

/* END catch_child() */