/*
*         OpenPBS (Portable Batch System) v2.3 Software License
*
* Copyright (c) 1999-2000 Veridian Information Solutions, Inc.
* All rights reserved.
*
* ---------------------------------------------------------------------------
* For a license to use or redistribute the OpenPBS software under conditions
* other than those described below, or to purchase support for this software,
* please contact Veridian Systems, PBS Products Department ("Licensor") at:
*
*    www.OpenPBS.org  +1 650 967-4675                  sales@OpenPBS.org
*                        877 902-4PBS (US toll-free)
* ---------------------------------------------------------------------------
*
* This license covers use of the OpenPBS v2.3 software (the "Software") at
* your site or location, and, for certain users, redistribution of the
* Software to other sites and locations.  Use and redistribution of
* OpenPBS v2.3 in source and binary forms, with or without modification,
* are permitted provided that all of the following conditions are met.
* After December 31, 2001, only conditions 3-6 must be met:
*
* 1. Commercial and/or non-commercial use of the Software is permitted
*    provided a current software registration is on file at www.OpenPBS.org.
*    If use of this software contributes to a publication, product, or
*    service, proper attribution must be given; see www.OpenPBS.org/credit.html
*
* 2. Redistribution in any form is only permitted for non-commercial,
*    non-profit purposes.  There can be no charge for the Software or any
*    software incorporating the Software.  Further, there can be no
*    expectation of revenue generated as a consequence of redistributing
*    the Software.
*
* 3. Any Redistribution of source code must retain the above copyright notice
*    and the acknowledgment contained in paragraph 6, this list of conditions
*    and the disclaimer contained in paragraph 7.
*
* 4. Any Redistribution in binary form must reproduce the above copyright
*    notice and the acknowledgment contained in paragraph 6, this list of
*    conditions and the disclaimer contained in paragraph 7 in the
*    documentation and/or other materials provided with the distribution.
*
* 5. Redistributions in any form must be accompanied by information on how to
*    obtain complete source code for the OpenPBS software and any
*    modifications and/or additions to the OpenPBS software.  The source code
*    must either be included in the distribution or be available for no more
*    than the cost of distribution plus a nominal fee, and all modifications
*    and additions to the Software must be freely redistributable by any party
*    (including Licensor) without restriction.
*
* 6. All advertising materials mentioning features or use of the Software must
*    display the following acknowledgment:
*
*     "This product includes software developed by NASA Ames Research Center,
*     Lawrence Livermore National Laboratory, and Veridian Information
*     Solutions, Inc.
*     Visit www.OpenPBS.org for OpenPBS software support,
*     products, and information."
*
* 7. DISCLAIMER OF WARRANTY
*
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND. ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT
* ARE EXPRESSLY DISCLAIMED.
*
* IN NO EVENT SHALL VERIDIAN CORPORATION, ITS AFFILIATED COMPANIES, OR THE
* U.S. GOVERNMENT OR ANY OF ITS AGENCIES BE LIABLE FOR ANY DIRECT OR INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* This license will be governed by the laws of the Commonwealth of Virginia,
* without reference to its choice of law rules.
*/
/*
 * The entry point function for MOM.
 */

#include <pbs_config.h>   /* the master config generated by configure */

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdarg.h>

#ifdef _CRAY
#include <termios.h>
#endif /* _CRAY */

#include <pwd.h>
#include <signal.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <time.h>
#include <limits.h>
#include <netdb.h>
#include <grp.h>
#include <sys/types.h>
#include <sys/param.h>
#include <sys/times.h>
#include <sys/stat.h>
#if (PLOCK_DAEMONS & 4)
#include <sys/lock.h>
#endif /* PLOCK_DAEMONS */
#include <netinet/in.h>
#include <sys/socket.h>
#ifdef _CRAY
#include <sys/category.h>
#include <sys/usrv.h>
#include <sys/sysv.h>
#endif /* _CRAY */
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/utsname.h>
#if defined(NTOHL_NEEDS_ARPA_INET_H) && defined(HAVE_ARPA_INET_H)
#include <arpa/inet.h>
#endif


#include "libpbs.h"
#include "pbs_ifl.h"
#include "server_limits.h"
#include "list_link.h"
#include "attribute.h"
#include "resource.h"
#include "pbs_job.h"
#include "mom_mach.h"
#include "mom_func.h"
#include "svrfunc.h"
#include "pbs_error.h"
#include "log.h"
#include "net_connect.h"
#include "rpp.h"
#include "dis.h"
#include "dis_init.h"
#include "resmon.h"
#include "pbs_nodes.h"
#include "dis.h"
#include "csv.h"
#include "utils.h"

#include "mcom.h"

#ifdef NOPOSIXMEMLOCK
#undef _POSIX_MEMLOCK
#endif /* NOPOSIXMEMLOCK */

#ifdef _POSIX_MEMLOCK
#include <sys/mman.h>
#endif /* _POSIX_MEMLOCK */

#define CHECK_POLL_TIME     45
#define DEFAULT_SERVER_STAT_UPDATES 45

#define PMAX_PORT           32000
#define MAX_RESEND_JOBS     512
#define DUMMY_JOB_PTR       1

/* Global Data Items */

char  *program_name;
int    MOMIsLocked = 0;
int    MOMIsPLocked = 0;
int    ServerStatUpdateInterval = DEFAULT_SERVER_STAT_UPDATES;
int    CheckPollTime            = CHECK_POLL_TIME;
int    ForceServerUpdate = 0;

int    verbositylevel = 0;
double cputfactor = 1.00;
unsigned int default_server_port = 0;
int    exiting_tasks = 0;
float  ideal_load_val = -1.0;
int    internal_state = 0;
/* by default, enforce these policies */
int    ignwalltime = 0; 
int    ignmem = 0;
int    igncput = 0;
int    ignvmem = 0; 
int    spoolasfinalname = 0;
/* end policies */
int    lockfds = -1;
time_t loopcnt;  /* used for MD5 calc */
float  max_load_val = -1.0;
int    hostname_specified = 0;
char   mom_host[PBS_MAXHOSTNAME + 1];
char   TMOMRejectConn[1024];   /* most recent rejected connection */
char   mom_short_name[PBS_MAXHOSTNAME + 1];
int    num_var_env;
char        *path_epilog;
char        *path_epilogp;
char        *path_epiloguser;
char        *path_epiloguserp;
char        *path_epilogpdel;
char        *path_jobs;
char        *path_prolog;
char        *path_prologp;
char        *path_prologuser;
char        *path_prologuserp;
char        *path_spool;
char        *path_undeliv;
char        *path_aux;
char        *path_server_name;
char        *path_home = PBS_SERVER_HOME;
char        *mom_home;
extern char *msg_daemonname;          /* for logs     */
extern char *msg_info_mom; /* Mom information message   */
extern int pbs_errno;
gid_t  pbsgroup;
uid_t pbsuser;
unsigned int pbs_mom_port = 0;
unsigned int pbs_rm_port = 0;
tlist_head mom_polljobs; /* jobs that must have resource limits polled */
tlist_head svr_newjobs; /* jobs being sent to MOM */
tlist_head svr_alljobs; /* all jobs under MOM's control */
tlist_head mom_varattrs; /* variable attributes */
int  termin_child = 0;  /* boolean - one or more children need to be terminated this iteration */
time_t  time_now = 0;
time_t  last_poll_time = 0;
extern tlist_head svr_requests;

extern struct var_table vtable; /* see start_exec.c */
double  wallfactor = 1.00;
long  log_file_max_size = 0;
long  log_file_roll_depth = 1;

time_t  last_log_check;
char           *nodefile_suffix = NULL;    /* suffix to append to each host listed in job host file */
char           *submithost_suffix = NULL;  /* suffix to append to submithost for interactive jobs */
char           *TNoSpoolDirList[TMAX_NSDCOUNT];
char           *TRemChkptDirList[TMAX_RCDCOUNT];

job            *JobsToResend[MAX_RESEND_JOBS];

char           *AllocParCmd = NULL;  /* (alloc) */

int      src_login_batch = TRUE;
int      src_login_interactive = TRUE;

/* externs */

extern char *server_alias;
extern unsigned int pe_alarm_time;
extern time_t   pbs_tcp_timeout;
extern long     MaxConnectTimeout;

char            tmpdir_basename[MAXPATHLEN];  /* for $TMPDIR */

char            rcp_path[MAXPATHLEN];
char            rcp_args[MAXPATHLEN];
char            xauth_path[MAXPATHLEN];

time_t          LastServerUpdateTime = 0;  /* NOTE: all servers updated together */

time_t          MOMStartTime         = 0;
int             MOMPrologTimeoutCount;
int             MOMPrologFailureCount;

char            MOMConfigVersion[64];
char            MOMUNameMissing[64];

int             MOMConfigDownOnError      = 0;
int             MOMConfigRestart          = 0;
int             MOMConfigRReconfig        = 0;
long            system_ncpus = 0;
char           *auto_ideal_load = NULL;
char           *auto_max_load   = NULL;

#define TMAX_JE  64

pjobexec_t      TMOMStartInfo[TMAX_JE];


/* prototypes */

extern void     add_resc_def(char *, char *);
extern void     mom_server_all_diag(char **BPtr, int *BSpace);
extern void     mom_server_update_receive_time(int stream, const char *command_name);
extern void     mom_server_all_init(void);
extern void     mom_server_all_update_stat(void);
extern int      mark_for_resend(job *);
extern int      mom_server_all_check_connection(void);
extern int      mom_server_all_send_state(void);
extern int      mom_server_add(char *name);
extern int      mom_server_count;
extern int      post_epilogue(job *, int);
extern int      mom_checkpoint_init(void);
extern void     mom_checkpoint_check_periodic_timer(job *pjob);
extern void     mom_checkpoint_set_directory_path(char *str);

void prepare_child_tasks_for_delete();

#define PMOMTCPTIMEOUT 60  /* duration in seconds mom TCP requests will block */


/* Local Data Items */

static char *log_file = NULL;

enum PMOMStateEnum
  {
  MOM_RUN_STATE_RUNNING,
  MOM_RUN_STATE_EXIT,
  MOM_RUN_STATE_KILLALL,
  MOM_RUN_STATE_RESTART,
  MOM_RUN_STATE_LAST
  };

static enum PMOMStateEnum mom_run_state;

static int recover = JOB_RECOV_RUNNING;
static int recover_set = FALSE;

static int      call_hup = 0;
static int      nconfig;
static char    *path_log;

struct config_list
  {
  struct config       c;

  struct config_list *c_link;
  };

/* NOTE:  must adjust RM_NPARM in resmom.h to be larger than number of parameters
          specified below */

static unsigned long setxauthpath(char *);
static unsigned long setrcpcmd(char *);
static unsigned long setpbsclient(char *);
static unsigned long configversion(char *);
static unsigned long cputmult(char *);
static unsigned long setallocparcmd(char *);
static unsigned long setidealload(char *);
static unsigned long setignwalltime(char *);
static unsigned long setignmem(char *);
static unsigned long setigncput(char *);
static unsigned long setignvmem(char *);
static unsigned long setlogevent(char *);
static unsigned long setloglevel(char *);
static unsigned long setumask(char *);
static unsigned long setpreexec(char *);
static unsigned long setmaxload(char *);
static unsigned long setenablemomrestart(char *);
static unsigned long prologalarm(char *);
static unsigned long restricted(char *);
static unsigned long jobstartblocktime(char *);
static unsigned long usecp(char *);
static unsigned long wallmult(char *);
static unsigned long setpbsserver(char *);
static unsigned long setnodecheckscript(char *);
static unsigned long setnodecheckinterval(char *);
static unsigned long settimeout(char *);
extern unsigned long mom_checkpoint_set_checkpoint_interval(char *);
extern unsigned long mom_checkpoint_set_checkpoint_script(char *);
extern unsigned long mom_checkpoint_set_restart_script(char *);
extern unsigned long mom_checkpoint_set_checkpoint_run_exe_name(char *);
static unsigned long setdownonerror(char *);
static unsigned long setstatusupdatetime(char *);
static unsigned long setcheckpolltime(char *);
static unsigned long settmpdir(char *);
static unsigned long setlogfilemaxsize(char *);
static unsigned long setlogfilerolldepth(char *);
static unsigned long setlogfilesuffix(char *);
static unsigned long setlogdirectory(char *);
static unsigned long setlogkeepdays(char *);
static unsigned long setvarattr(char *);
static unsigned long setautoidealload(char *);
static unsigned long setautomaxload(char *);
static unsigned long setnodefilesuffix(char *);
static unsigned long setnospooldirlist(char *);
static unsigned long setmomhost(char *);
static unsigned long setrreconfig(char *);
static unsigned long setsourceloginbatch(char *);
static unsigned long setsourcelogininteractive(char *);
static unsigned long setspoolasfinalname(char *);
static unsigned long setremchkptdirlist(char *);
static unsigned long setmaxconnecttimeout(char *);
static unsigned long aliasservername(char *);


static struct specials
  {
  char            *name;
  u_long(*handler)();
  } special[] = {
  { "alloc_par_cmd",       setallocparcmd },
  { "auto_ideal_load",     setautoidealload },
  { "auto_max_load",       setautomaxload },
  { "xauthpath",           setxauthpath },
  { "rcpcmd",              setrcpcmd },
  { "rcp_cmd",             setrcpcmd },
  { "pbsclient",           setpbsclient },
  { "configversion",       configversion },
  { "cputmult",            cputmult },
  { "ideal_load",          setidealload },
  { "ignwalltime",         setignwalltime },
  { "ignmem",              setignmem },
  { "igncput",             setigncput },
  { "ignvmem",             setignvmem },
  { "logevent",            setlogevent },
  { "loglevel",            setloglevel },
  { "max_load",            setmaxload },
  { "enablemomrestart",    setenablemomrestart },
  { "prologalarm",         prologalarm },
  { "restricted",          restricted },
  { "jobstartblocktime",   jobstartblocktime },
  { "usecp",               usecp },
  { "wallmult",            wallmult },
  { "clienthost",          setpbsserver },  /* deprecated - use pbsserver */
  { "pbsserver",           setpbsserver },
  { "node_check_script",   setnodecheckscript },
  { "node_check_interval", setnodecheckinterval },
  { "timeout",             settimeout },
  { "checkpoint_interval", mom_checkpoint_set_checkpoint_interval },
  { "checkpoint_script",   mom_checkpoint_set_checkpoint_script },
  { "restart_script",      mom_checkpoint_set_restart_script },
  { "checkpoint_run_exe",  mom_checkpoint_set_checkpoint_run_exe_name },
  { "down_on_error",       setdownonerror },
  { "status_update_time",  setstatusupdatetime },
  { "check_poll_time",     setcheckpolltime },
  { "tmpdir",              settmpdir },
  { "log_directory",       setlogdirectory },
  { "log_file_max_size",   setlogfilemaxsize },
  { "log_file_roll_depth", setlogfilerolldepth },
  { "log_file_suffix",     setlogfilesuffix },
  { "log_keep_days",       setlogkeepdays },
  { "varattr",             setvarattr },
  { "nodefile_suffix",     setnodefilesuffix },
  { "nospool_dir_list",    setnospooldirlist },
  { "mom_host",            setmomhost },
  { "remote_reconfig",     setrreconfig },
  { "job_output_file_umask", setumask },
  { "preexec",             setpreexec },
  { "source_login_batch",  setsourceloginbatch },
  { "source_login_interactive", setsourcelogininteractive },
  { "spool_as_final_name", setspoolasfinalname },
  { "remote_checkpoint_dirs", setremchkptdirlist },
  { "max_conn_timeout_micro_sec",   setmaxconnecttimeout },
  { "alias_server_name", aliasservername },
  { NULL,                  NULL }
  };


static char *arch(struct rm_attribute *);
static char *opsys(struct rm_attribute *);
static char *requname(struct rm_attribute *);
static char *validuser(struct rm_attribute *);
static char *reqmsg(struct rm_attribute *);
char *reqgres(struct rm_attribute *);
static char *reqstate(struct rm_attribute *);
static char *getjoblist(struct rm_attribute *);
static char *reqvarattr(struct rm_attribute *);
/* static char *nullproc(struct rm_attribute *); */


struct config common_config[] =
  {
  { "arch",      {arch} },             /* machine architecture           */
  { "opsys",     {opsys} },            /* operating system               */
  { "uname",     {requname} },         /* user name     ???              */
  { "validuser", {validuser} },        /* valid user    ???              */
  { "message",   {reqmsg} },           /* message       ???              */
  { "gres",      {reqgres} },          /* generic resource (licenses...) */
  { "state",     {reqstate} },         /* state of pbs_mom               */
  { "jobs",      {getjoblist} },       /* job list this pbs_mom          */
  { "varattr",   {reqvarattr} },       /* ???                            */
  { NULL,        {NULL} }
  };

int                     LOGLEVEL = 0;  /* valid values (0 - 10) */
int                     LOGKEEPDAYS = 0; /* days each log file should be kept before deleting */
int                     DEBUGMODE = 0;
int                     DOBACKGROUND = 1;
char                    DEFAULT_UMASK[1024];
char                    PRE_EXEC[1024];
long                    TJobStartBlockTime = 5; /* seconds to wait for job to launch before backgrounding */
long                    TJobStartTimeout = 300; /* seconds to wait for job to launch before purging */


char                   *ret_string;
int   ret_size;

struct config         *config_array = NULL;

struct config_list    *config_list = NULL;
sigset_t  allsigs;
int   rm_errno;
unsigned int            reqnum = 0;  /* the packet number */

int   port_care = TRUE; /* secure connecting ports */
uid_t   uid = 0;  /* uid we are running with */
unsigned int   alarm_time = 10; /* time before alarm */

extern tree            *okclients;  /* accept connections from */
char                  **maskclient = NULL; /* wildcard connections */
int   mask_num = 0;
int   mask_max = 0;
u_long   localaddr = 0;

char   extra_parm[] = "extra parameter(s)";
char   no_parm[]    = "required parameter not found";
char   varattr_delimiter[] = ";";

int   cphosts_num = 0;

struct cphosts         *pcphosts = NULL;

static int  config_file_specified = 0;
static char  config_file[_POSIX_PATH_MAX] = "config";

char                    PBSNodeMsgBuf[1024];
char                    PBSNodeCheckPath[1024];
int                     PBSNodeCheckInterval;
int                     PBSNodeCheckProlog = 0;
int                     PBSNodeCheckEpilog = 0;
static char            *MOMExePath = NULL;
static time_t           MOMExeTime = 0;


/* sync w/#define JOB_SUBSTATE_XXX (in include/pbs_job.h)*/

const char *PJobSubState[] =
  {
  "TRANSIN",                /* Transit in, wait for commit */
  "TRANSICM",               /* Transit in, wait for commit */
  "TRNOUT",                 /* transiting job outbound */
  "TRNOUTCM",               /* transiting outbound, rdy to commit */
  "SUBSTATE04",
  "SUBSTATE05",
  "SUBSTATE06",
  "SUBSTATE07",
  "SUBSTATE08",
  "SUBSTATE09",
  "QUEUED",                 /* job queued and ready for selection */
  "PRESTAGEIN",             /* job queued, has files to stage in */
  "SUBSTATE12",
  "SYNCRES",                /* job waiting on sync start ready */
  "STAGEIN",                /* job staging in files then wait */
  "STAGEGO",                /* job staging in files and then run */
  "STAGECMP",               /* job stage in complete */
  "SUBSTATE17",
  "SUBSTATE18",
  "SUBSTATE19",
  "HELD",      /* job held - user or operator */
  "SYNCHOLD",  /* job held - waiting on sync regist */
  "DEPNHOLD",  /* job held - waiting on dependency */
  "SUBSTATE23",
  "SUBSTATE24",
  "SUBSTATE25",
  "SUBSTATE26",
  "SUBSTATE27",
  "SUBSTATE28",
  "SUBSTATE29",
  "WAITING",   /* job waiting on execution time */
  "SUBSTATE31",
  "SUBSTATE32",
  "SUBSTATE33",
  "SUBSTATE34",
  "SUBSTATE35",
  "SUBSTATE36",
  "STAGEFAIL", /* job held - file stage in failed */
  "SUBSTATE38",
  "SUBSTATE39",
  "PRERUN",    /* job sent to MOM to run */
  "STARTING",  /* final job start initiated */
  "RUNNING",   /* job running */
  "SUSPEND",   /* job suspended, CRAY only */
  "SUBSTATE44",
  "SUBSTATE45",
  "SUBSTATE46",
  "SUBSTATE47",
  "SUBSTATE48",
  "SUBSTATE49",
  "EXITING",   /* Start of job exiting processing */
  "STAGEOUT",  /* job staging out (other) files   */
  "STAGEDEL",  /* job deleteing staged out files  */
  "EXITED",    /* job exit processing completed   */
  "ABORT",     /* job is being aborted by server  */
  "SUBSTATE55",
  "SUBSTATE56",
  "PREOBIT",   /* preobit job status */
  "OBIT",      /* (MOM) job obit notice sent */
  "COMPLETED",
  "RERUN",     /* job is rerun, recover output stage */
  "RERUN1",    /* job is rerun, stageout phase */
  "RERUN2",    /* job is rerun, delete files stage */
  "RERUN3",    /* job is rerun, mom delete job */
  "RETSTD",    /* job has checkpoint file, return stdout / stderr files to server
                * spool dir so that job can be restarted
                */
  NULL
  };


/* sync w/#define IS_XXX */

const char *PBSServerCmds[] =
  {
  "NULL",
  "HELLO",
  "CLUSTER_ADDRS",
  "UPDATE",
  "STATUS",
  NULL
  };


/*
** These routines are in the "dependent" code.
*/

extern void dep_initialize(void);
extern void dep_cleanup(void);

/* External Functions */

extern void catch_child(int);
extern void init_abort_jobs(int);
extern void scan_for_exiting();
extern void scan_for_terminated();
extern int TMomCheckJobChild(pjobexec_t *, int, int *, int *);
extern int TMomFinalizeJob3(pjobexec_t *, int, int, int *);
extern void exec_bail(job *, int);
extern void check_state(int);
extern void DIS_tcp_funcs();


/* Local public functions */

static void stop_me(int);
static void PBSAdjustLogLevel(int);
int         TMOMScanForStarting(void);


/* Local private functions */

void check_log(void);





char *nullproc(

  struct rm_attribute *attrib)

  {
  char *id = "nullproc";

  log_err(-1,id,"should not be called");

  return(NULL);
  }  /* END nullproc() */




static char *arch(

  struct rm_attribute *attrib)  /* I */

  {
  char *id = "arch";

  struct config *cp;

  if (attrib != NULL)
    {
    log_err(-1, id, extra_parm);

    rm_errno = RM_ERR_BADPARAM;

    return(NULL);
    }

  if (config_array == NULL)
    {
    return(PBS_MACH);
    }

  /* locate arch string */

  for (cp = config_array;cp->c_name != NULL;cp++)
    {
    if (cp->c_u.c_value == NULL)
      continue;

    if (strcmp(cp->c_name, "arch"))
      continue;

    return(cp->c_u.c_value);
    }  /* END for (cp) */

  return(PBS_MACH);
  }  /* END arch() */




static char *opsys(

  struct rm_attribute *attrib)  /* I */

  {
  char *id = "opsys";

  struct config *cp;

  if (attrib != NULL)
    {
    log_err(-1, id, extra_parm);

    rm_errno = RM_ERR_BADPARAM;

    return(NULL);
    }

  if (config_array == NULL)
    {
    return(PBS_MACH);
    }

  /* locate opsys string */

  for (cp = config_array;cp->c_name != NULL;cp++)
    {
    if (cp->c_u.c_value == NULL)
      continue;

    if (strcmp(cp->c_name, "opsys"))
      continue;

    return(cp->c_u.c_value);
    }  /* END for (cp) */

  return(PBS_MACH);
  }  /* END opsys() */





char *
getuname(void)

  {

  struct utsname  n;
  static char    *name = NULL;

  if (name == NULL)
    {
    if (uname(&n) == -1)
      {
      return(NULL);
      }

    sprintf(ret_string, "%s %s %s %s %s",

            n.sysname,
            n.nodename,
            n.release,
            n.version,
            n.machine);

    name = strdup(ret_string);
    }  /* END if (name == NULL) */

  return(name);
  }  /* END getuname() */




static char *reqmsg(

  struct rm_attribute *attrib)

  {
  char  *id = "reqmsg";

  if (attrib != NULL)
    {
    log_err(-1, id, extra_parm);

    rm_errno = RM_ERR_BADPARAM;

    return(NULL);
    }

  return(PBSNodeMsgBuf);
  }  /* END reqmsg() */




static char *getjoblist(

  struct rm_attribute *attrib) /* I */

  {
  static char *list = NULL;
  static int listlen = 0;
  job *pjob;
  int firstjob = 1;

  if (list == NULL)
    {
    if ((list = calloc(BUFSIZ + 50, sizeof(char)))==NULL)
      {
      /* FAILURE - cannot alloc memory */

      fprintf(stderr,"ERROR: could not calloc!\n");

      /* since memory cannot be allocated, report no jobs */

      return (" ");	
      }

    listlen = BUFSIZ;
    }

  *list = '\0'; /* reset the list */

  if ((pjob = (job *)GET_NEXT(svr_alljobs)) == NULL)
    {
    /* no jobs - return space character */

    return(" ");
    }

  for (;pjob != NULL;pjob = (job *)GET_NEXT(pjob->ji_alljobs))
    {
    if (!firstjob)
      strcat(list, " ");

    strcat(list, pjob->ji_qs.ji_jobid);

    if ((int)strlen(list) >= listlen)
      {
      char *tmpList;

      listlen += BUFSIZ;

      tmpList = realloc(list,listlen);

      if (tmpList == NULL)
      	{
        /* FAILURE - cannot alloc memory */

        fprintf(stderr,"ERROR: could not realloc!\n");

        /* since memory cannot be allocated, report no jobs */

        return(" ");
      	}

      list = tmpList;
      }

    firstjob = 0;
    }  /* END for (pjob) */

  if (list[0] == '\0')
    {
    /* no jobs - return space character */

    strcat(list, " ");
    }

  return(list);
  }  /* END getjoblist() */




#define TMAX_VARBUF   65536

static char *reqvarattr(

  struct rm_attribute *attrib)  /* I */

  {
  static char id[] = "reqvarattr";

  static char    *list = NULL, *child_spot;
  static int      listlen = 0;

  struct varattr *pva;
  int             fd, len, child_len;
  int             first_line;
  FILE           *child;

  char           *ptr;
  char           *ptr2;

  char            tmpBuf[TMAX_VARBUF + 1];

  if (list == NULL)
    {
    list = calloc(BUFSIZ + 1024, sizeof(char));

    if (list == NULL)
      {
      /* FAILURE - cannot alloc memory */

      log_err(errno,id,"cannot alloc memory");

      return(" ");
      }

    listlen = BUFSIZ;
    }

  *list = '\0'; /* reset the list */

  if ((pva = (struct varattr *)GET_NEXT(mom_varattrs)) == NULL)
    {
    return(" ");
    }

  for (;pva != NULL;pva = (struct varattr *)GET_NEXT(pva->va_link))
    {
    /* loop for each $varattr parameter */

    if ((pva->va_lasttime == 0) || (time_now >= (pva->va_ttl + pva->va_lasttime)))
      {
      if ((pva->va_ttl == -1) && (pva->va_lasttime != 0))
        {
        if (pva->va_value[0] != '\0')
          {
          if (*list != '\0')
            strcat(list, varattr_delimiter);

          strcat(list, pva->va_value);
          }

        if ((int)strlen(list) >= listlen)
          {
          listlen += BUFSIZ;

          list = realloc(list, listlen);

          if (list == NULL)
            {
            log_err(errno,id,"cannot alloc memory");

            return(" ");
            }
          }

        continue;  /* ttl of -1 is only run once */
        }

      /* TTL is satisfied, reload value */

      pva->va_lasttime = time_now;

      if (pva->va_value == NULL)
        pva->va_value = calloc(TMAX_VARBUF, sizeof(char));

      /* execute script and get a new value */

      if ((child = popen(pva->va_cmd, "r")) == NULL)
        {
        sprintf(pva->va_value, "error: %d %s",
          errno,
          strerror(errno));
        }
      else
        {
        fd = fileno(child);

        child_spot = tmpBuf;
        child_len  = 0;
        child_spot[0] = '\0';

retryread:

        while ((len = read(fd, child_spot, TMAX_VARBUF - child_len)) > 0)
          {
          child_len  += len;
          child_spot += len;

          if (child_len >= TMAX_VARBUF - 1)
            break;
          }  /* END while ((len = read() > 0) */

        if (len == -1)
          {
          /* FAILURE - cannot read var script output */

          if (errno == EINTR)
            goto retryread;

          log_err(errno, id, "pipe read");

          sprintf(pva->va_value, "? %d",
            RM_ERR_SYSTEM);

          pclose(child);

          continue;
          }

        /* SUCCESS */

        pclose(child);

        tmpBuf[child_len] = '\0';

        /* Transfer returned data into var value field */
        
        first_line = TRUE;

        ptr = strtok(tmpBuf,"\n;");

        ptr2 = pva->va_value;

        ptr2[0] = '\0';

        /* 
         * OUTPUT FORMAT:  Take what script gives us.
         * Script should output 1 or more lines of Name=value1+value2+...
         */

        while (ptr != NULL)
          {
          if (!first_line)
            strcat(ptr2,varattr_delimiter);

          strcat(ptr2,ptr);

          first_line = FALSE;

          ptr = strtok(NULL,"\n;");
          }  /* END while (ptr != NULL) */
        }    /* END else ((child = popen(pva->va_cmd,"r")) == NULL) */
      }      /* END if ((pva->va_lasttime == 0) || ...) */

    if (pva->va_value[0] != '\0')
      {
      if (*list != '\0')
        strcat(list, varattr_delimiter);

      strcat(list, pva->va_value);
      }

    if ((int)strlen(list) >= listlen)
      {
      listlen += BUFSIZ;
      list = realloc(list, listlen);

      if (list == NULL)
        {
        log_err(errno,id,"cannot alloc memory");

        return(" ");
        }
      }
    }    /* END for (pva) */

  if (list[0] == '\0')
    strcat(list, " ");

  return(list);
  }  /* END reqvarattr() */





char *reqgres(

  struct rm_attribute *attrib)  /* I (ignored) */

  {
  char  *id = "reqgres";

  struct config *cp;

  static char   GResBuf[1024];
  char          tmpLine[1024];

  int           sindex;

  if (attrib != NULL)
    {
    log_err(-1, id, extra_parm);

    rm_errno = RM_ERR_BADPARAM;

    return(NULL);
    }

  /* build gres string */

  /* FORMAT:  <GRES>:<VALUE>[+<GRES>:<VALUE>]... */

  GResBuf[0] = '\0';

  if (config_array == NULL)
    {
    return(GResBuf);
    }

  for (cp = config_array;cp->c_name != NULL;cp++)
    {
    if (cp->c_u.c_value == NULL)
      continue;

    /* verify parameter is not special */

    for (sindex = 0;sindex < RM_NPARM;sindex++)
      {
      if (special[sindex].name == NULL)
        break;

      if (!strcmp(special[sindex].name, cp->c_name))
        break;
      }  /* END for (sindex) */

    if ((sindex < RM_NPARM) &&
        (special[sindex].name != NULL) &&
        (!strcmp(special[sindex].name, cp->c_name)))
      {
      /* specified parameter is special parameter */

      continue;
      }

    /* verify parameter is not common */

    for (sindex = 0;sindex < RM_NPARM;sindex++)
      {
      if (common_config[sindex].c_name == NULL)
        break;

      if (!strcmp(common_config[sindex].c_name, cp->c_name))
        break;
      }  /* END for (sindex) */

    if ((sindex < RM_NPARM) &&
        (common_config[sindex].c_name != NULL) &&
        !strcmp(common_config[sindex].c_name, cp->c_name) &&
        strcmp(common_config[sindex].c_name, "gres"))
      {
      /* specified parameter is common parameter */

      continue;
      }

    if (!strncmp(cp->c_name, "size", strlen("size")))
      continue;

    if (GResBuf[0] != '\0')
      strncat(GResBuf, "+", 1024);

    snprintf(tmpLine, 1024, "%s:%s",
             cp->c_name,
             cp->c_u.c_value);

    strncat(GResBuf, tmpLine, (sizeof(GResBuf) - strlen(GResBuf) - 1));
    }  /* END for (cp) */

  return(GResBuf);
  }  /* END reqgres() */




static char *reqstate(

  struct rm_attribute *attrib)  /* I (ignored) */

  {
  static char state[1024];

  if ((internal_state & INUSE_DOWN) && (MOMConfigDownOnError != 0))
    strcpy(state, "down");
  else if (internal_state & INUSE_BUSY)
    strcpy(state, "busy");
  else
    strcpy(state, "free");

  return(state);
  }  /* END reqstate() */




static char *requname(

  struct rm_attribute *attrib)

  {
  char *id = "uname";
  char *cp;

  if (attrib != NULL)
    {
    log_err(-1, id, extra_parm);

    rm_errno = RM_ERR_BADPARAM;

    return(NULL);
    }

  cp = getuname();

  return(cp);
  }  /* END requname() */





static char *validuser(

  struct rm_attribute *attrib)

  {
  char *id = "valid_user";

  struct passwd *p;

  if ((attrib == NULL) || (attrib->a_value == NULL))
    {
    log_err(-1, id, no_parm);
    rm_errno = RM_ERR_NOPARAM;

    return(NULL);
    }

  p = getpwnam(attrib->a_value);

  if (p != NULL)
    {
    return("yes");
    }

  return("no");
  }    /* END validuser() */





char *loadave(

  struct rm_attribute *attrib)

  {
  char       *id = "loadave";
  static char  ret_string[20];
  double       la;

  if (attrib)
    {
    log_err(-1, id, extra_parm);

    rm_errno = RM_ERR_BADPARAM;

    return(NULL);
    }

  if (get_la(&la) != 0)
    {
    rm_errno = RM_ERR_SYSTEM;

    return(NULL);
    }

  sprintf(ret_string, "%.2f",

          la);

  return(ret_string);
  }  /* END loadave() */





/*
** Search the array of resources read from the config files.
*/

struct config *rm_search(

        struct config *where,  /* I */
        char          *what)   /* I */

  {

  struct config *cp;

  if (where == NULL || what == NULL)
    {
    return NULL;
    }

  for (cp = where;cp->c_name != NULL;cp++)
    {
    if (strcmp(cp->c_name, what) == 0)
      {
      return(cp);
      }
    }    /* END for (cp) */

  return(NULL);
  }  /* END rm_search() */





/*
** Search the various resource lists.
*/

char *dependent(

  char               *res,  /* I */
  struct rm_attribute *attr) /* I */

  {

  struct config        *ap;

  extern struct config  standard_config[];

  extern struct config  dependent_config[];

  ap = rm_search(common_config, res);

  if (ap != NULL)
    {
    return(ap->c_u.c_func(attr));
    }

  ap = rm_search(standard_config, res);

  if (ap != NULL)
    {
    return(ap->c_u.c_func(attr));
    }

  ap = rm_search(dependent_config, res);

  if (ap != NULL)
    {
    return(ap->c_u.c_func(attr));
    }

  rm_errno = RM_ERR_UNKNOWN;

  return(NULL);
  }  /* END dependent() */






void
DIS_rpp_reset(void)

  {
  if (dis_getc != rpp_getc)
    {
    dis_getc = rpp_getc;
    dis_puts = (int (*)(int, const char *, size_t))rpp_write;
    dis_gets = (int (*)(int, char *, size_t))rpp_read;
    disr_skip   = (int (*)(int, size_t))rpp_skip;

    disr_commit = rpp_rcommit;
    disw_commit = rpp_wcommit;
    }

  return;
  }  /* END DIS_rpp_reset() */





/*
** Initialize standard resource array
*/

void
initialize(void)

  {
  char *id = "initialize";

  log_record(PBSEVENT_SYSTEM, 0, id, "independent");

  dep_initialize();

  return;
  }




void
cleanup(void)

  {
  dep_cleanup();

  return;
  }




/*
** Clean up after a signal.
*/

void die(

  int sig)

  {
  char *id = "die";

  if (sig > 0)
    {
    sprintf(log_buffer, "caught signal %d",
            sig);

    log_record(PBSEVENT_SYSTEM, 0, id, log_buffer);
    }
  else
    {
    log_record(PBSEVENT_SYSTEM, 0, id, "abnormal termination");
    }

  cleanup();

  log_close(1);

  exit(1);
  }  /* END die() */




/*
** Check for fatal memory allocation error.
*/

void memcheck(

  char *buf)

  {
  if (buf != NULL)
    {
    return;
    }

  log_err(-1, "memcheck", "memory allocation failed");

  die(0);

  return;
  }  /* END memcheck() */





/*
** Check the ret_string buffer to make sure that there is
** enought room starting at *spot to hold len characters more.
** If not, realloc the buffer and make *spot point to
** the corresponding place that it used to point to in
** the old buffer.
*/

void checkret(

  char **spot,
  int    len)

  {
  char *id = "checkret";
  char *hold;

  if ((*spot - ret_string) < (ret_size - len))
    {
    return;
    }

  ret_size += len * 2;  /* new buf size */

  sprintf(log_buffer, "size increased to %d",
          ret_size);

  log_record(PBSEVENT_SYSTEM, 0, id, log_buffer);

  hold = realloc(ret_string, ret_size); /* new buf */

  memcheck(hold);

  *spot = *spot - ret_string + hold;  /* new spot in buf */

  ret_string = hold;

  return;
  }  /* END checkret() */





char *skipwhite(

  char *str)

  {
  for (;*str;str++)
    {
    if (!isspace(*str))
      break;
    }

  return(str);
  }





char *tokcpy(

  char *str,
  char *tok)

  {
  for (;*str;str++, tok++)
    {
    if (!isalnum(*str) && *str != ':' && *str != '_')
      break;

    *tok = *str;
    }  /* END tokcpy() */

  *tok = '\0';

  return(str);
  }  /* END tokcpy() */




void rmnl(

  char *str)

  {
  int i;

  i = strlen(str);

  while (--i)
    {
    if ((*(str + i) != '\n') && !isspace((int)*(str + i)))
      break;

    *(str + i) = '\0';
    }

  return;
  }





u_long addclient(

  char *name)  /* I */

  {
  static char   id[] = "addclient";

  struct hostent *host;

  struct in_addr  saddr;
  u_long   ipaddr;

  /* FIXME: must be able to retry failed lookups later */

  if ((host = gethostbyname(name)) == NULL)
    {
    sprintf(log_buffer, "host %s not found",
            name);

    log_err(-1, id, log_buffer);

    return(0);
    }

  memcpy(&saddr, host->h_addr, host->h_length);

  ipaddr = ntohl(saddr.s_addr);

  tinsert(ipaddr, NULL, &okclients);

  return(ipaddr);
  }  /* END addclient() */





static u_long setpbsclient(

  char *value)  /* I */

  {
  u_long rc;

  if ((value == NULL) || (value[0] == '\0'))
    {
    /* FAILURE */

    return(1);
    }

  rc = addclient(value);

  if (rc != 0)
    {
    /* FAILURE */

    return(1);
    }

  return(0);
  }  /* END setpbsclient() */




/* FIXME: we need to handle a non-default port number */

static u_long setpbsserver(

  char *value)  /* I */

  {
  static char   id[] = "setpbsserver";

  if ((value == NULL) || (*value == '\0'))
    {
    return(1);    /* FAILURE - nothing specified */
    }

  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, value);


  return(mom_server_add(value));
  }  /* END setpbsserver() */




static u_long settmpdir(

  char *Value)

  {
  static  char    id[] = "settmpdir";

  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, Value);

  if (*Value != '/')
    {
    log_err(-1, id, "tmpdir must be a full path");

    return(0);
    }

  strncpy(tmpdir_basename, Value, sizeof(tmpdir_basename));

  return(1);
  }

static u_long setxauthpath(

  char *Value)

  {
  static  char    id[] = "setxauthpath";

  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, Value);

  if (*Value != '/')
    {
    log_err(-1, id, "xauthpath must be a full path");

    return(0);
    }

  strncpy(xauth_path, Value, sizeof(xauth_path));

  return(1);
  }





static u_long setrcpcmd(

  char *Value)  /* I */

  {
  static char  id[] = "rcpcmd";
  static char *ptr;

  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, Value);

  if (*Value != '/')
    {
    log_err(-1, id, "rcpcmd must be a full path");

    /* FAILURE */

    return(0);
    }

  strncpy(rcp_path, Value, sizeof(rcp_path));

  strcpy(rcp_args, "");

  if ((ptr = strchr(rcp_path, ' ')) != NULL)
    {
    *ptr = '\0';

    if (*(ptr + 1) != '\0')
      {
      strncpy(rcp_args, ptr + 1, sizeof(rcp_args));
      }
    }

  /* SUCCESS */

  return(1);
  }  /* END setrcpcmd() */





static u_long setlogevent(

  char *value)

  {
  char *bad;

  *log_event_mask = strtol(value, &bad, 0);

  if ((*bad == '\0') || isspace((int)*bad))
    {
    return(1);
    }

  return(0);
  }  /* END setlogevent() */





/* NOTE:  maskclient is global */

static u_long restricted(

  char *name)

  {
  static char id[] = "restricted";

  char **tmpMaskClient;

  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, name);

  if (mask_max == 0)
    {
    if ((maskclient = (char **)calloc(4, sizeof(char *))) == NULL)
      {
      /* FAILURE - cannot alloc memory */

      log_err(errno,id,"cannot alloc memory");

      return(-1);	
      }

    mask_max = 4;
    }

  maskclient[mask_num] = strdup(name);

  if (maskclient[mask_num] == NULL)
    {
    /* FAILURE - cannot alloc memory */

    log_err(errno,id,"cannot alloc memory");

    return(-1);
    }

  mask_num++;

  if (mask_num == mask_max)
    {
    mask_max *= 2;

    tmpMaskClient = (char **)realloc(
      maskclient,
      mask_max * sizeof(char *));

    if (tmpMaskClient == NULL)
      {
      /* FAILURE - cannot alloc memory */

      log_err(errno,id,"cannot alloc memory");

      return(-1);
      }

    maskclient = tmpMaskClient;
    }

  /* SUCCESS */

  return(1);
  }  /* END restricted() */





static u_long configversion(

  char *Value)  /* I */

  {
  static char   id[] = "configversion";

  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, Value);

  if (Value == NULL)
    {
    /* FAILURE */

    return(0);
    }

  strncpy(MOMConfigVersion, Value, sizeof(MOMConfigVersion));

  /* SUCCESS */

  return(1);
  }  /* END configversion() */





static u_long setdownonerror(

  char *Value)  /* I */

  {
  static char   id[] = "setdownonerror";
  int           enable = -1;

  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, Value);

  if (Value == NULL)
    {
    /* FAILURE */

    return(0);
    }

  /* accept various forms of "true", "yes", and "1" */
  switch (Value[0])
    {

    case 't':

    case 'T':

    case 'y':

    case 'Y':

    case '1':

      enable = 1;

      break;

    case 'f':

    case 'F':

    case 'n':

    case 'N':

    case '0':

      enable = 0;

      break;

    }

  if (enable != -1)
    {
    MOMConfigDownOnError = enable;
    }

  return(1);
  }  /* END setdownonerror() */


static u_long setenablemomrestart(

  char *Value)  /* I */

  {
  static char   id[] = "setenablemomrestart";
  int           enable = -1;

  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, Value);

  if (Value == NULL)
    {
    /* FAILURE */

    return(0);
    }

  /* accept various forms of "true", "yes", and "1" */
  switch (Value[0])
    {

    case 't':

    case 'T':

    case 'y':

    case 'Y':

    case '1':

      enable = 1;

      break;

    case 'f':

    case 'F':

    case 'n':

    case 'N':

    case '0':

      enable = 0;

      break;

    }

  if (enable != -1)
    {
    MOMConfigRestart = enable;
    }

  return(1);
  }  /* END setenablemomrestart() */




static u_long cputmult(

  char *value)  /* I */

  {
  static char id[] = "cputmult";

  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, value);

  if ((cputfactor = atof(value)) == 0.0)
    {
    return(0); /* error */
    }

  return(1);
  }  /* END cputmult() */





static u_long wallmult(

  char *value)

  {
  static char id[] = "wallmult";

  double tmpD;

  if (value == NULL)
    {
    /* FAILURE */

    return(0);
    }

  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, value);

  tmpD = atof(value);

  if ((tmpD == 0.0) && (value[0] != '\0'))
    {
    /* FAILURE */

    return(0);
    }

  /* SUCCESS */

  wallfactor = tmpD;

  return(1);
  }  /* END wallmult() */




static u_long usecp(

  char *value)  /* I */

  {
  char        *pnxt;
  static int   cphosts_max = 0;

  struct cphosts   *newp = NULL;

  static char *id = "usecp";

  /* FORMAT:  <HOST>:<FROM> <TO> */

  /*
   * HvB and Willem added this for logging purpose
   */

  log_record(
    PBSEVENT_SYSTEM,
    PBS_EVENTCLASS_SERVER,
    id,
    value);

  if (cphosts_max == 0)
    {
    pcphosts = malloc(2 * sizeof(struct cphosts));

    if (pcphosts == NULL)
      {
      sprintf(log_buffer, "%s: out of memory while allocating pcphosts",
        id);

      log_err(-1, id, log_buffer);

      return(0);
      }

    cphosts_max = 2;
    }
  else if (cphosts_max == cphosts_num)
    {
    newp = realloc(
      pcphosts,
      (cphosts_max + 2) * sizeof(struct cphosts));

    if (newp == NULL)
      {
      /* FAILURE */

      sprintf(log_buffer,"%s: out of memory while reallocating pcphosts",
        id);

      log_err(-1,id,log_buffer);

      return(0);
      }

    pcphosts = newp;

    cphosts_max += 2;
    }

  pnxt = strchr(value, (int)':');

  if (pnxt == NULL)
    {
    /* request failed */

    sprintf(log_buffer, "invalid host specification: %s",
      value);

    log_err(-1, id, log_buffer);

    return(0);
    }

  *pnxt++ = '\0';

  pcphosts[cphosts_num].cph_hosts = strdup(value);

  if (pcphosts[cphosts_num].cph_hosts == NULL)
    {
    /* FAILURE */

    sprintf(log_buffer, "%s: out of memory in strdup(cph_hosts)",
      id);

    log_err(-1, id, log_buffer);

    return(0);
    }

  value = pnxt; /* now ptr to path */

  while (!isspace(*pnxt))
    {
    if (*pnxt == '\0')
      {
      sprintf(log_buffer, "invalid '%s' specification %s: "
        "missing destination path",
        id,
        value);

      log_err(-1, id, log_buffer);

      free(pcphosts[cphosts_num].cph_hosts);

      return(0);
      }

    pnxt++;
    }

  *pnxt++ = '\0';

  pcphosts[cphosts_num].cph_from = strdup(value);

  if (pcphosts[cphosts_num].cph_from == NULL)
    {
    sprintf(log_buffer, "%s: out of memory in strdup(cph_from)",
      id);

    log_err(-1, id, log_buffer);

    free(pcphosts[cphosts_num].cph_hosts);

    return(0);
    }

  pcphosts[cphosts_num].cph_to = strdup(skipwhite(pnxt));

  if (pcphosts[cphosts_num].cph_to == NULL)
    {
    sprintf(log_buffer, "%s: out of memory in strdup(cph_to)",
      id);

    log_err(-1, id, log_buffer);

    free(pcphosts[cphosts_num].cph_hosts);
    free(pcphosts[cphosts_num].cph_from);

    return(0);
    }

  cphosts_num++;

  return(1);
  }  /* END usecp() */




static unsigned long prologalarm(

  char *value)  /* I */

  {
  int i;

  log_record(
    PBSEVENT_SYSTEM,
    PBS_EVENTCLASS_SERVER,
    "prologalarm",
    value);

  i = (int)atoi(value);

  if (i <= 0)
    {
    return(0); /* error */
    }

  pe_alarm_time = (unsigned int)i;

  return(1);
  }  /* END prologalarm() */





static unsigned long setloglevel(

  char *value)  /* I */

  {
  int i;

  log_record(
    PBSEVENT_SYSTEM,
    PBS_EVENTCLASS_SERVER,
    "setloglevel",
    value);

  i = (int)atoi(value);

  if (i < 0)
    {
    return(0);  /* error */
    }

  LOGLEVEL = (unsigned int)i;

  return(1);
  }  /* END setloglevel() */





static unsigned long setumask(

  char *value)  /* I */

  {
  log_record(
    PBSEVENT_SYSTEM,
    PBS_EVENTCLASS_SERVER,
    "setumask",
    value);

  strncpy(DEFAULT_UMASK, value, sizeof(DEFAULT_UMASK));

  return(1);
  }  /* END setumask() */




static unsigned long setpreexec(

  char *value)  /* I */

  {
#if SHELL_USE_ARGV == 0
  static char *id = "setpreexec";
#endif
  log_record(
    PBSEVENT_SYSTEM,
    PBS_EVENTCLASS_SERVER,
    "setpreexec",
    value);

  strncpy(PRE_EXEC, value, sizeof(PRE_EXEC));

#if SHELL_USE_ARGV == 0
  log_err(0, id, "pbs_mom not configured with enable-shell-user-argv option");
#endif

  return(1);
  }  /* END setpreexec() */


static unsigned long setsourceloginbatch(

  char *value)  /* I */

  {

  log_record(
    PBSEVENT_SYSTEM,
    PBS_EVENTCLASS_SERVER,
    "setsourceloginbatch",
    value);

  if (value[0] != '\0')
    {
    /* accept various forms of "true", "yes", and "1" */
    switch (value[0])
      {

      case 't':

      case 'T':

      case 'y':

      case 'Y':

      case '1':

        src_login_batch = TRUE;

        break;

      case 'f':

      case 'F':

      case 'n':

      case 'N':

      case '0':

        src_login_batch = FALSE;

        break;

      default:
        sprintf(log_buffer, "Unknown value of %s", value);

        log_record(
          PBSEVENT_SYSTEM,
          PBS_EVENTCLASS_SERVER,
          "setsourceloginbatch",
          log_buffer);
        break;

      }
    }

  return(1);
  }  /* END setsourceloginbatch() */


static unsigned long setsourcelogininteractive(

  char *value)  /* I */

  {

  log_record(
    PBSEVENT_SYSTEM,
    PBS_EVENTCLASS_SERVER,
    "setsourcelogininteractive",
    value);

  if (value[0] != '\0')
    {
    /* accept various forms of "true", "yes", and "1" */
    switch (value[0])
      {

      case 't':

      case 'T':

      case 'y':

      case 'Y':

      case '1':

        src_login_interactive = TRUE;

        break;

      case 'f':

      case 'F':

      case 'n':

      case 'N':

      case '0':

        src_login_interactive = FALSE;

        break;

      default:
        sprintf(log_buffer, "Unknown value of %s", value);

        log_record(
          PBSEVENT_SYSTEM,
          PBS_EVENTCLASS_SERVER,
          "setsourcelogininteractive",
          log_buffer);
        break;

      }
    }

  return(1);
  }  /* END setsourcelogininteractive() */


static unsigned long jobstartblocktime(

  char *value)  /* I */

  {
  int i;

  log_record(
    PBSEVENT_SYSTEM,
    PBS_EVENTCLASS_SERVER,
    "startblocktime",
    value);

  i = (int)strtol(value, NULL, 10);

  if ((i < 0) || ((i == 0) && (value[0] != '0')))
    {
    return(0);  /* error */
    }

  TJobStartBlockTime = i;

  return(1);
  }  /* END jobstartblocktime() */





static unsigned long setstatusupdatetime(

  char *value)  /* I */

  {
  int i;

  log_record(
    PBSEVENT_SYSTEM,
    PBS_EVENTCLASS_SERVER,
    "setstateuspdatetime",
    value);

  i = (int)strtol(value, NULL, 10);

  if (i < 1)
    {
    return(0);  /* error */
    }

  ServerStatUpdateInterval = (unsigned int)i;

  return(1);
  }  /* END setstatusupdatetime() */





static unsigned long setcheckpolltime(

  char *value)  /* I */

  {
  int i;

  log_record(
    PBSEVENT_SYSTEM,
    PBS_EVENTCLASS_SERVER,
    "setcheckpolltime",
    value);

  i = (int)strtol(value, NULL, 10);

  if (i < 1)
    {
    return(0);  /* error */
    }

  CheckPollTime = (unsigned int)i;

  return(1);
  }  /* END setcheckpolltime() */




/*
** Add static resource or shell escape line from config file.
** This is a support routine for read_config().
*/

static void add_static(

  char *str,     /* I */
  char *file,    /* I */
  int   linenum) /* I */

  {
  int  i;
  char  name[50];

  struct config_list *cp;

  str = tokcpy(str, name); /* resource name */
  str = skipwhite(str);   /* resource value */

  /* FORMAT:  <ATTR> [!]<VALUE> */

  if (*str == '!') /* shell escape command */
    {
    /* remove trailing newline */

    rmnl(str);
    }
  else
    {
    /* get the value */
    i = strlen(str);

    while (--i)
      {
      /* strip trailing blanks */

      if (!isspace((int)*(str + i)))
        break;

      *(str + i) = '\0';
      }
    }

  cp = (struct config_list *)malloc(sizeof(struct config_list));

  memcheck((char *)cp);

  cp->c_link = config_list;
  cp->c.c_name = strdup(name);

  memcheck(cp->c.c_name);

  cp->c.c_u.c_value = strdup(str);

  memcheck(cp->c.c_u.c_value);

  sprintf(log_buffer, "%s[%d] add name %s value %s",
    file,
    linenum,
    name,
    str);

  log_record(
    PBSEVENT_DEBUG,
    0,
    "add_static",
    log_buffer);

  config_list = cp;

  return;
  }  /* END add_static() */





static unsigned long setidealload(

  char *value)

  {
  char  newstr[50] = "ideal_load ";
  float  val;

  val = atof(value);

  log_record(
    PBSEVENT_SYSTEM,
    PBS_EVENTCLASS_SERVER,
    "ideal_load",
    value);

  if (val < 0.0)
    {
    return(0); /* error */
    }

  ideal_load_val = val;

  if (max_load_val < 0.0)
    max_load_val = val; /* set a default */

  strcat(newstr, value);

  return(1);
  }  /* END setidealload() */




static unsigned long setignwalltime(

  char *value)  /* I */

  {
  char newstr[50] = "ignwalltime ";

  log_record(
    PBSEVENT_SYSTEM,
    PBS_EVENTCLASS_SERVER,
    "ignwalltime",
    value);

  if (!strncasecmp(value, "t", 1) || (value[0] == '1') || !strcasecmp(value, "on"))
    {
    ignwalltime = 1;
    }
  else
    {
    ignwalltime = 0;
    }

  strcat(newstr, value);

  /* SUCCESS */

  return(1);
  }  /* END setignwalltime() */



static unsigned long setignmem(

  char *value)  /* I */

  {
  log_record(
    PBSEVENT_SYSTEM,
    PBS_EVENTCLASS_SERVER,
    "ignmem",
    value);

  if (!strncasecmp(value,"t",1) || (value[0] == '1') || !strcasecmp(value,"on") )
    ignmem = 1;
  else
    ignmem = 0;

  return(1);
  } /* END setignmem() */



static unsigned long setigncput(

  char *value) /* I */

  {
  log_record(
    PBSEVENT_SYSTEM,
    PBS_EVENTCLASS_SERVER,
    "igncput",
    value);

  if (!strncasecmp(value,"t",1) || (value[0] == '1') || !strcasecmp(value,"on") )
    igncput = 1;
  else
    igncput = 0;

  return(1);
  }


static unsigned long setignvmem(

  char *value)  /* I */

  {
  char newstr[50] = "setignvmem ";

  log_record(
    PBSEVENT_SYSTEM,
    PBS_EVENTCLASS_SERVER,
    "setignvmem",
    value);

  if (!strncasecmp(value, "t", 1) || (value[0] == '1') || !strcasecmp(value, "on"))
    {
    ignvmem = 1;
    }
  else
    {
    ignvmem = 0;
    }

  strcat(newstr, value);

  /* SUCCESS */

  return(1);
  }  /* END setignvmem() */


static unsigned long setautoidealload(

  char *value)

  {
  log_record(
    PBSEVENT_SYSTEM,
    PBS_EVENTCLASS_SERVER,
    "auto_ideal_load",
    value);

  auto_ideal_load = strdup(value);

  /*
    add_static(auto_ideal_load,"config",0);

    nconfig++;
  */

  return(1);
  }  /* END setautoidealload() */





static unsigned long setallocparcmd(

  char *value)  /* I */

  {
  log_record(
    PBSEVENT_SYSTEM,
    PBS_EVENTCLASS_SERVER,
    "allocparcmd",
    value);

  AllocParCmd = strdup(value);

  return(1);
  }  /* END setallocparcmd() */





static unsigned long setautomaxload(

  char *value)

  {
  log_record(
    PBSEVENT_SYSTEM,
    PBS_EVENTCLASS_SERVER,
    "auto_max_load",
    value);

  auto_max_load = strdup(value);

  /*
    add_static(auto_ideal_load,"config",0);

    nconfig++;
  */

  return(1);
  }  /* END setautomaxload() */





static unsigned long setmaxconnecttimeout(

  char *value)  /* I */

  {
  MaxConnectTimeout = strtol(value, NULL, 10);

  if (MaxConnectTimeout < 0)
    {
    MaxConnectTimeout = 10000;

    return(0);
    }

  return(1);
  }




static unsigned long setnodecheckscript(

  char *value)

  {
  char   newstr[1024] = "node_check_script ";

  struct stat sbuf;

  log_record(
    PBSEVENT_SYSTEM,
    PBS_EVENTCLASS_SERVER,
    "node_check_script",
    value);

  if ((stat(value, &sbuf) == -1) || !(sbuf.st_mode & S_IXUSR))
    {
    /* FAILURE */

    /* file does not exist or is not executable */

    return(0);
    }

  strncpy(PBSNodeCheckPath, value, sizeof(PBSNodeCheckPath));

  strcat(newstr, value);

  /* SUCCESS */

  return(1);
  }  /* END setnodecheckscript() */





static unsigned long setnodecheckinterval(

  char *value)

  {
  char newstr[1024] = "node_check_interval ";

  log_record(
    PBSEVENT_SYSTEM,
    PBS_EVENTCLASS_SERVER,
    "node_check_interval",
    value);

  PBSNodeCheckInterval = (int)strtol(value, NULL, 10);

  if (strstr(value, "jobstart"))
    PBSNodeCheckProlog = 1;

  if (strstr(value, "jobend"))
    PBSNodeCheckEpilog = 1;

  strcat(newstr, value);

  return(1);
  }  /* END setnodecheckinterval() */





static unsigned long settimeout(

  char *value)

  {
  char newstr[1024];

  log_record(
    PBSEVENT_SYSTEM,
    PBS_EVENTCLASS_SERVER,
    "timeout",
    value);

  DIS_tcp_settimeout(strtol(value, NULL, 10));

  snprintf(newstr, sizeof(newstr), "%s %s",
           "timeout",
           value);

  return(1);
  }  /* END settimeout() */





static unsigned long setmaxload(

  char *value)  /* I */

  {
  char  newstr[50] = "max_load ";
  float  val;

  val = atof(value);

  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, "max_load", value);

  if (val < 0.0)
    {
    return(0); /* error */
    }

  max_load_val = val;

  if (ideal_load_val < 0.0)
    ideal_load_val = val;

  strcat(newstr, value);

  return(1);
  }  /* END max_load() */





static unsigned long setlogfilemaxsize(

  char *value)  /* I */

  {
  log_file_max_size = strtol(value, NULL, 10);

  if (log_file_max_size < 0)
    {
    log_file_max_size = 0;

    return(0);
    }

  return(1);
  }




static unsigned long setlogfilerolldepth(

  char *value)  /* I */

  {
  log_file_roll_depth = strtol(value, NULL, 10);

  if (log_file_roll_depth < 1)
    {
    log_file_roll_depth = 1;

    return(0);
    }

  return(1);
  }



static unsigned long setlogdirectory(

  char *value)  /* I */

  {
  path_log = strdup(value);

  return(1);
  }




static unsigned long setlogfilesuffix(

  char *value)  /* I */

  {
  log_init(value, NULL);

  return(1);
  }



static unsigned long setlogkeepdays(
 
  char *value)  /* I */

  {
  int i;

  i = (int)atoi(value);

  if (i < 0)
    {
    return(0);  /* error */
    }

  LOGKEEPDAYS = i;

  return(1);
  }



static u_long setvarattr(

  char *value)  /* I */

  {
  static char *id = "setvarattr";

  struct varattr *pva;
  char           *ptr;

  pva = calloc(1, sizeof(struct varattr));

  if (pva == NULL)
    {
    /* FAILURE */

    log_err(errno, id, "no memory");

    return(0);
    }

  CLEAR_LINK(pva->va_link);

  /* FORMAT:  <TTL> <PATH> */
  /* extract TTL */

  ptr = value;

  pva->va_ttl = strtol(ptr, NULL, 10);

  /* step forward to end of TTL */

  while (!isspace(*ptr))
    ptr++;

  if (*ptr == '\0')
    {
    free(pva);

    return(0);
    }

  /* skip white space */

  while (isspace(*ptr))
    ptr++;

  if (*ptr == '\0')
    {
    free(pva);

    return(0);
    }

  /* preserve command and args */

  pva->va_cmd = strdup(ptr);

  append_link(&mom_varattrs, &pva->va_link, pva);

  /* SUCCESS */

  return(1);
  }  /* END setvarattr() */





static unsigned long setnodefilesuffix(

  char *value)  /* I */

  {
  char *ptr;

  ptr = strtok(value, ",");

  nodefile_suffix = strdup(ptr);

  ptr = strtok(NULL, ",");

  if (ptr != NULL)
    submithost_suffix = strdup(ptr);

  /* SUCCESS */

  return(1);
  }  /* END setnodexfilesuffix() */




static unsigned long setmomhost(

  char *value)  /* I */

  {
  hostname_specified = 1;

  strncpy(mom_host, value, PBS_MAXHOSTNAME);     /* remember name */

  /* SUCCESS */

  return(1);
  }  /* END setmomhost() */


static u_long setrreconfig(

  char *Value)  /* I */

  {
  static char   id[] = "setrreconfig";
  int           enable = -1;

  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, Value);

  if (Value == NULL)
    {
    /* FAILURE */

    return(0);
    }

  /* accept various forms of "true", "yes", and "1" */
  switch (Value[0])
    {

    case 't':

    case 'T':

    case 'y':

    case 'Y':

    case '1':

      enable = 1;

      break;

    case 'f':

    case 'F':

    case 'n':

    case 'N':

    case '0':

      enable = 0;

      break;

    }

  if (enable != -1)
    {
    MOMConfigRReconfig = enable;
    }

  return(1);
  }  /* END setrreconfig() */


static unsigned long setnospooldirlist(

  char *value)  /* I */

  {
  char *TokPtr;
  char *ptr;

  int   index = 0;

  char  tmpLine[1024];

  ptr = strtok_r(value, " \t\n:,", &TokPtr);

  while (ptr != NULL)
    {
    TNoSpoolDirList[index] = strdup(ptr);

    snprintf(tmpLine, sizeof(tmpLine), "added NoSpoolDir[%d] '%s'",
             index,
             ptr);

    log_record(
      PBSEVENT_SYSTEM,
      PBS_EVENTCLASS_SERVER,
      "setnospooldirlist",
      tmpLine);

    index++;

    if (index >= TMAX_NSDCOUNT)
      break;

    ptr = strtok_r(NULL, " \t\n:,", &TokPtr);
    }  /* END while (ptr != NULL) */

  /* SUCCESS */

  return(1);
  }  /* END setnospooldirlist() */


static unsigned long aliasservername( char *value)
  {
    log_record(
      PBSEVENT_SYSTEM,
      PBS_EVENTCLASS_SERVER,
      "aliasservername",
      value);

    if(value)
      {
      server_alias = (char *)malloc(strlen(value)+1);
      if (server_alias)
        {
        strcpy(server_alias, value);
        }
      }


    return(1);
  }




static unsigned long setspoolasfinalname(

  char *value)  /* I */

  {
  log_record(
    PBSEVENT_SYSTEM,
    PBS_EVENTCLASS_SERVER,
    "spoolasfinalname",
    value);

  if (!strncasecmp(value,"t",1) || (value[0] == '1') || !strcasecmp(value,"on") )
    spoolasfinalname = 1;
  else
    spoolasfinalname = 0;

  return(1);
  }  /* END setspoolasfinalname() */




static unsigned long setremchkptdirlist(

  char *value)  /* I */

  {
  char *TokPtr;
  char *ptr;

  int   index = 0;
  char  tmpLine[1024];
  
  while ((TRemChkptDirList[index] != NULL) && (index < TMAX_RCDCOUNT))
    {
    index++;
    }

  if (index >= TMAX_RCDCOUNT)
    return (1);

  ptr = strtok_r(value, " \t\n:,", &TokPtr);

  while (ptr != NULL)
    {
    TRemChkptDirList[index] = strdup(ptr);

    snprintf(tmpLine, sizeof(tmpLine), "added RemChkptDir[%d] '%s'",
             index,
             ptr);

    log_record(
      PBSEVENT_SYSTEM,
      PBS_EVENTCLASS_SERVER,
      "setremchkptdirlist",
      tmpLine);

    index++;

    if (index >= TMAX_RCDCOUNT)
      break;

    ptr = strtok_r(NULL, " \t\n:,", &TokPtr);
    }  /* END while (ptr != NULL) */

  /* SUCCESS */

  return (1);
  }  /* END setremchkptdirlist() */






void
check_log(void)

  {
  last_log_check = time_now;

  /* periodically record the version and loglevel */

  sprintf(log_buffer, msg_info_mom, PACKAGE_VERSION, LOGLEVEL);

  log_event(
    PBSEVENT_SYSTEM | PBSEVENT_FORCE,
    PBS_EVENTCLASS_SERVER,
    msg_daemonname,
    log_buffer);

  if (LOGKEEPDAYS > 0)
    {
    /* remove logs older than log_keep_days */

    snprintf(log_buffer,sizeof(log_buffer),"checking for old pbs_mom logs in dir '%s' (older than %d days)",
      path_log,
      LOGKEEPDAYS);
   
    log_event(
      PBSEVENT_SYSTEM | PBSEVENT_FORCE,
      PBS_EVENTCLASS_SERVER,
      msg_daemonname,
      log_buffer);

    if (log_remove_old(path_log,(LOGKEEPDAYS * SECS_PER_DAY)) != 0)
      {
      log_err(-1,"check_log","failure occurred when checking for old pbs_mom logs");
      }
    }

  if (log_file_max_size <= 0)
    {
    return;
    }

  if (log_size() >= log_file_max_size)
    {
    log_event(
      PBSEVENT_SYSTEM | PBSEVENT_FORCE,
      PBS_EVENTCLASS_SERVER,
      msg_daemonname,
      "Rolling log file");

    log_roll(log_file_roll_depth);
    }

  return;
  }  /* END check_log() */





/*
** Open and read the config file.  Save information in a linked
** list.  After reading the file, create an array, copy the list
** elements to the array and free the list.
*/

/* NOTE:  add new mom config parameters to 'special[]' */

int read_config(

  char *file)  /* I */

  {
  static char id[] = "read_config";

  FILE                 *conf;

  struct stat            sb;

  struct config_list *cp;

  struct config  *ap;
  char                   line[120];
  char                   name[50];
  char                  *str;
  char                  *ptr;

  int                    linenum;
  int                    i;

  int                    IgnConfig = 0;

  int                    rc;

  int n, list_len;
  char *server_list_ptr;
  char *tp;


  if (LOGLEVEL >= 3)
    {
    sprintf(log_buffer, "updating configuration using file '%s'",
            (file != NULL) ? file : "NULL");

    log_record(
      PBSEVENT_SYSTEM,
      PBS_EVENTCLASS_SERVER,
      id,
      log_buffer);
    }

  for (i = 0;i < mask_num;i++)
    {
    free(maskclient[i]);
    }

  mask_num = 0;

  if (file == NULL)
    file = config_file;

  rc = 0;

  if (file[0] == '\0')
    {
    log_record(
      PBSEVENT_SYSTEM,
      PBS_EVENTCLASS_SERVER,
      id,
      "ALERT:  no config file specified");

    IgnConfig = 1;  /* no config file */
    }

  if ((IgnConfig == 0) && (stat(file, &sb) == -1))
    {
    IgnConfig = 1;

    sprintf(log_buffer, "fstat: %s",
            file);

    log_err(errno, id, log_buffer);

    if (config_file_specified != 0)
      {
      /* file specified and not there, return failure */

      log_record(
        PBSEVENT_SYSTEM,
        PBS_EVENTCLASS_SERVER,
        id,
        "ALERT:  cannot open config file - no file");

      rc = 1;
      }
    else
      {
      /* "config" file not located, return success */

      if (LOGLEVEL >= 3)
        {
        sprintf(log_buffer, "cannot open file '%s'",
                file);

        log_record(
          PBSEVENT_SYSTEM,
          PBS_EVENTCLASS_SERVER,
          id,
          log_buffer);
        }

      rc = 0;
      }
    }  /* END if ((IgnConfig == 0) && (stat(file,&sb) == -1)) */

  if (IgnConfig == 0)
    {
#if !defined(DEBUG) && !defined(NO_SECURITY_CHECK)

    if (chk_file_sec(file, 0, 0, S_IWGRP | S_IWOTH, 1, NULL))
      {
      /* not authorized to access specified file, return failure */

      log_record(
        PBSEVENT_SYSTEM,
        PBS_EVENTCLASS_SERVER,
        id,
        "ALERT:  cannot open config file - permissions");

      IgnConfig = 1;

      rc = 1;
      }

#endif  /* NO_SECURITY_CHECK */
    }    /* END if (ignConfig == 0) */

  if (IgnConfig == 0)
    {
    if ((conf = fopen(file, "r")) == NULL)
      {
      sprintf(log_buffer, "fopen: %s",
              file);

      log_err(errno, id, log_buffer);

      IgnConfig = 1;

      rc = 1;
      }
    }    /* END if (IgnConfig == 0) */

  if (IgnConfig == 0)
    {
    nconfig = 0;
    linenum = 0;

    while (fgets(line, sizeof(line), conf))
      {
      linenum++;

      if (line[0] == '#') /* comment */
        continue;

      if ((ptr = strchr(line, '#')) != NULL)
        {
        /* allow inline comments */

        *ptr = '\0';
        }

      str = skipwhite(line); /* pass over initial whitespace */

      if (*str == '\0')
        continue;

      if (LOGLEVEL >= 6)
        {
        sprintf(log_buffer, "processing config line '%.64s'",
                str);

        log_record(
          PBSEVENT_SYSTEM,
          PBS_EVENTCLASS_SERVER,
          id,
          log_buffer);
        }

      if (*str == '$')
        {
        /* special command */

        str = tokcpy(++str, name); /* resource name */

        for (i = 0;special[i].name;i++)
          {
          if (strcasecmp(name, special[i].name) == 0)
            break;
          }  /* END for (i) */

        if (special[i].name == NULL)
          {
          /* didn't find it */

          sprintf(log_buffer, "special command name %s not found (ignoring line)",
                  name);

          log_err(-1, id, log_buffer);

          continue;
          }

        str = skipwhite(str);  /* command param */

        rmnl(str);

        if (special[i].handler(str) == 0)
          {
          sprintf(log_buffer, "%s[%d] special command %s failed with %s",
                  file,
                  linenum,
                  name,
                  str);

          log_err(-1, id, log_buffer);
          }

        continue;
        }

      add_static(str, file, linenum);

      nconfig++;
      }  /* END while (fgets()) */

    /*
    ** Create a new array.
    */

    if (config_array != NULL)
      {
      for (ap = config_array;ap->c_name != NULL;ap++)
        {
        free(ap->c_name);
        free(ap->c_u.c_value);
        }

      free(config_array);
      }

    config_array = (struct config *)calloc(nconfig + 1, sizeof(struct config));

    memcheck((char *)config_array);

    /*
    ** Copy in the new information saved from the file.
    */

    for (i = 0, ap = config_array;i < nconfig;i++, ap++)
      {
      *ap = config_list->c;
      cp = config_list->c_link;

      free(config_list); /* don't free name and value strings */
      config_list = cp; /* they carry over from the list */
      }

    ap->c_name = NULL;  /* one extra */

    fclose(conf);
    }  /* END if (IgnConfig == 0) */

  if (mom_server_count == 0)
    {
    /* No server names in torque/mom_priv/config.  Get names from torque/server_name. */

    server_list_ptr = pbs_get_server_list();
    list_len = csv_length(server_list_ptr);

    for (n = 0; n < list_len; n++)
      {
      tp = csv_nth(server_list_ptr, n);

      if (tp)
        {
        setpbsserver(tp);
        }
      }
    }

  return(rc);
  }  /* END read_config() */






/*
** Get an rm_attribute structure from a string.  If a NULL is passed
** for the string, use the previously remembered string.
*/

struct rm_attribute *momgetattr(

        char *str) /* I */

  {
  char *id = "momgetattr";

  static char cookie[] = "tag:"; /* rm_attribute to ignore */
  static char *hold = NULL;
  static char qual[80] = "";
  static char valu[4096] = "";

  static struct rm_attribute attr =
    {
    qual, valu
    };

  int         level, i;

  if (str == NULL) /* if NULL is passed, use prev value */
    str = hold;

  /* FORMAT: ??? */

  do
    {
    str = skipwhite(str);

    if (*str++ != '[')
      {
      return(NULL);
      }

    str = skipwhite(str);  /* copy qualifier */

    str = tokcpy(str, qual);
    str = skipwhite(str);

    if (*str++ != '=')
      {
      return(NULL);
      }

    level = 0;

    for (i = 0;*str;str++, i++)
      {
      if (*str == '[')
        {
        level++;
        }
      else if (*str == ']')
        {
        if (level == 0)
          break;

        level--;
        }

      valu[i] = *str;
      }

    if (*str++ != ']')
      {
      return(NULL);
      }

    valu[i] = '\0';

    if (LOGLEVEL >= 7)
      {
      sprintf(log_buffer, "found %s = %s",
              qual,
              valu);

      log_record(
        PBSEVENT_JOB,
        PBS_EVENTCLASS_JOB,
        id,
        log_buffer);
      }
    }
  while (strncmp(qual, cookie, sizeof(cookie) - 1) == 0);

  hold = str;

  if (LOGLEVEL >= 5)
    {
    sprintf(log_buffer, "passing back %s = %s",
            qual,
            valu);

    log_record(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      id,
      log_buffer);
    }

  return(&attr);
  }  /* END momgetattr() */





/*
** Check the request against the format of the line read from
** the config file.  If it is a static value, there should be
** no params.  If it is a shell escape, the parameters (if any)
** should match the command line for the system call.
*/

char *conf_res(

  char               *resline, /* I */
  struct rm_attribute *attr)    /* I */

  {
  char *id = "conf_res";

  char *name[RM_NPARM];
  char *value[RM_NPARM];
  int used[RM_NPARM];  /* (boolean) */
  char param[80], *d;
  int i, fd, len;
  FILE *child;
  char *child_spot;
  int child_len;

  if (resline == NULL)
    {
    return("");
    }

  if (resline[0] != '!')
    {
    /* static value */

    if (attr != NULL)
      {
      sprintf(ret_string, "? %d",
              RM_ERR_BADPARAM);

      return(ret_string);
      }

    return(resline);
    }

  /*
  ** From here on we are going to put together a shell command
  ** to do the requestor's bidding.  Parameter substitution
  ** is the first step.
  */

  for (i = 0;i < RM_NPARM;i++)
    {
    /* remember params */

    if (attr == NULL)
      {
      /* FAILURE */

      break;
      }

    name[i] = strdup(attr->a_qualifier);

    memcheck(name[i]);

    value[i] = strdup(attr->a_value);

    memcheck(value[i]);

    used[i] = 0;

    attr = momgetattr(NULL);
    }  /* END for (i) */

  if (attr != NULL)
    {
    /* too many params */
    log_err(-1, id, "too many params");

    sprintf(ret_string, "? %d",
            RM_ERR_BADPARAM);

    goto done;
    }

  name[i] = NULL;

  for (d = ret_string, resline++;*resline;)
    {
    /* scan command */

    if (*resline == '%')
      {
      /* possible token */

      char *hold;

      hold = tokcpy(resline + 1, param);

      for (i = 0;name[i];i++)
        {
        if (strcmp(param, name[i]) == 0)
          break;
        }

      if (name[i])
        {
        /* found a match */

        char *x = value[i];

        while (*x)
          {
          *d++ = *x++;
          }

        resline = hold;

        used[i] = 1;
        }
      else
        {
        *d++ = *resline++;
        }
      }
    else
      {
      *d++ = *resline++;
      }
    }

  for (i = 0;name[i];i++)
    {
    if (!used[i])
      {
      /* parameter sent but not used */
      log_err(-1, id,
              "unused parameters");

      sprintf(ret_string, "? %d",
              RM_ERR_BADPARAM);

      goto done;
      }
    }    /* END for (i) */

  *d = '\0';

  DBPRT(("command: %s\n",
         ret_string))

  if ((child = popen(ret_string, "r")) == NULL)
    {
    log_err(errno, id, "popen");

    sprintf(ret_string, "? %d",
            RM_ERR_SYSTEM);

    goto done;
    }

  fd = fileno(child);

  child_spot = ret_string;
  child_len = 0;
  child_spot[0] = '\0';

retryread:

  while ((len = read(fd, child_spot, ret_size - child_len)) > 0)
    {
    for (i = 0;i < len;i++)
      {
      if (child_spot[i] == '\n')
        break;
      }

    if (i < len)
      {
      /* found newline */

      child_len += i + 1;

      break;
      }

    child_len += len;

    child_spot += len;

    checkret(&child_spot, len);
    }

  if (len == -1)
    {
    if (errno == EINTR)
      {
      goto retryread;
      }

    log_err(errno, id, "pipe read");

    sprintf(ret_string, "? %d",
            RM_ERR_SYSTEM);

    fclose(child);

    goto done;
    }

  pclose(child);

  if (child_len > 0)
    ret_string[child_len - 1] = '\0'; /* hack off newline */

done:

  for (i = 0;name[i] != NULL;i++)
    {
    /* free up params */

    free(name[i]);
    free(value[i]);
    }  /* END for (i) */

  return(ret_string);
  }  /* END conf_res() */




static void catch_abort(

  int sig)

  {

  struct rlimit rlimit;

  /*
   * Reset ourselves to the default signal handler to try and
   * prevent recursive core dumps.
   */

  struct sigaction act;

  sigemptyset(&act.sa_mask);
  act.sa_flags   = 0;
  act.sa_handler = SIG_DFL;

  sigaction(SIGSEGV, &act, NULL);
  sigaction(SIGBUS, &act, NULL);
  sigaction(SIGFPE, &act, NULL);
  sigaction(SIGILL, &act, NULL);
  sigaction(SIGTRAP, &act, NULL);
  sigaction(SIGSYS, &act, NULL);

  log_err(sig, "mom_main", "Caught fatal core signal");
  rlimit.rlim_cur = RLIM_INFINITY;
  rlimit.rlim_max = RLIM_INFINITY;

  setrlimit(RLIMIT_CORE, &rlimit);
  abort();

  return;
  }  /* END catch_abort() */





static void catch_hup(

  int sig)

  {
  sprintf(log_buffer, "caught signal %d",
          sig);

  log_record(PBSEVENT_SYSTEM, 0, "catch_hup", "reset");

  call_hup = 1;

  rpp_dbprt = 1 - rpp_dbprt; /* toggle debug prints for RPP */


  return;
  }  /* END catch_hup() */




/*
 * Do a restart of resmom.
 * Read the last seen config file and
 * Clean up and reinit the dependent code.
 */

static void
process_hup(void)

  {
  char *id = "process_hup";

  call_hup = 0;
  log_record(PBSEVENT_SYSTEM, 0, id, "reset");

  log_close(1);
  log_open(log_file, path_log);
  log_file_max_size = 0;
  log_file_roll_depth = 1;
  read_config(NULL);
  check_log();
  cleanup();

  initialize();

  return;
  }  /* END process_hup() */




/*
** Got an alarm call.
** Close all general network connections, clean up and reinit the
** dependent code.
*/

void toolong(

  int sig)

  {
  char *id = "toolong";

  log_record(PBSEVENT_SYSTEM, 0, id, "alarm call");

  if (LOGLEVEL >= 1)
    DBPRT(("alarm call\n"))

    return;
  }  /* END toolong() */







#ifdef DEBUG

void log_verbose(

  char *id,
  char *buf,
  int  len)

  {
  int i;
  char *cp;

  len = MIN(len, 50);

  cp = log_buffer;

  for (i = 0;i < len;i++)
    {
    int c = buf[i];

    if (isprint(c))
      {
      *cp++ = c;
      }
    else
      {
      sprintf(cp, "(%d)",
              c);

      cp += strlen(cp);
      }
    }

  *cp = '\0';

  log_record(PBSEVENT_DEBUG, 0, id, log_buffer);

  return;
  }  /* END log_verbose() */


#else
#define log_verbose(a, b, c)
#endif

/*
** See if an IP address matches any names stored as "restricted"
** access hosts.  Return 0 if a name matches, 1 if not.
*/

int bad_restrict(

  u_long ipadd)

  {

  struct hostent *host;

  struct in_addr in;
  int i, len1, len2;
  char *cp1, *cp2;

  in.s_addr = htonl(ipadd);

  if ((host = gethostbyaddr(
                (void *) & in,
                sizeof(struct in_addr),
                AF_INET)) == NULL)
    {
    return(1);
    }

  len1 = strlen(host->h_name) - 1;

  for (i = 0;i < mask_num;i++)
    {
    len2 = strlen(maskclient[i]) - 1;

    if (len1 < len2)
      continue;

    cp1 = (char *)&host->h_name[len1];

    cp2 = &maskclient[i][len2];

    /* check case insensitve */

    while ((len2 >= 0) && (tolower(*cp1) == tolower(*cp2)))
      {
      cp1--;
      cp2--;

      len2--;
      }  /* END while () */

    if (((len2 == 0) && (*cp2 == '*')) || (len2 == -1))
      {
      return(0);
      }
    }    /* END for (i) */

  return(1);
  }  /* END bad_restrict() */





/*
** Process a request for the resource monitor.  The i/o
** will take place using DIS over a tcp fd or an rpp stream.
*/

int rm_request(

  int iochan,
  int version,
  int tcp)     /* I */

  {
  static char id[] = "rm_request";
  char name[100];
  char output[BUFSIZ << 2];
  int len;
  int  command, ret;
  int  restrictrm = 0;
  char  *curr, *value, *cp, *body;

  struct config  *ap;

  struct rm_attribute *attr;

  struct sockaddr_in *addr;
  unsigned long ipadd;
  u_short port;
  void (*close_io)(int);
  int (*flush_io)(int);

  extern struct connection svr_conn[];

  int   NotTrusted = 0;

  char *BPtr;
  int   BSpace;

  errno = 0;
  log_buffer[0] = '\0';

  if (tcp)
    {
    ipadd = svr_conn[iochan].cn_addr;
    port = svr_conn[iochan].cn_port;

    close_io = close_conn;
    flush_io = DIS_tcp_wflush;
    }
  else
    {
    addr = rpp_getaddr(iochan);
    ipadd = ntohl(addr->sin_addr.s_addr);
    port = ntohs((unsigned short)addr->sin_port);

    close_io = (void(*)(int))rpp_close;
    flush_io = rpp_flush;
    }

  if (version != RM_PROTOCOL_VER)
    {
    sprintf(log_buffer, "protocol version %d unknown",
            version);

    goto bad;
    }

  if (((port_care != FALSE) && (port >= IPPORT_RESERVED)) ||
      (tfind(ipadd, &okclients) == NULL))
    {
    if (bad_restrict(ipadd))
      {
      sprintf(log_buffer, "bad attempt to connect - unauthorized (port: %d)",
              port);

      NotTrusted = 1;

      goto bad;
      }

    restrictrm = 1;
    }

  /* looks okay, find out what command it is */

  command = disrsi(iochan, &ret);

  if (ret != DIS_SUCCESS)
    {
    sprintf(log_buffer, "no command %s",
            dis_emsg[ret]);

    goto bad;
    }

  switch (command)
    {
    case RM_CMD_CLOSE:  /* no response to this */

      close_io(iochan);

      return(1);

      /*NOTREACHED*/

      break;

    case RM_CMD_REQUEST:

      /* query resource data */

      reqnum++;


      ret = diswsi(iochan, RM_RSP_OK);

      if (ret != DIS_SUCCESS)
        {
        sprintf(log_buffer, "write request response failed: %s",
                dis_emsg[ret]);

        goto bad;
        }

      for (;;)
        {
        cp = disrst(iochan, &ret);

        if (ret == DIS_EOD)
          {
          break;
          }

        if (ret != DIS_SUCCESS)
          {
          sprintf(log_buffer, "problem with request line: %s",
                  dis_emsg[ret]);

          goto bad;
          }

        curr = skipwhite(cp);

        curr = tokcpy(curr, name);

        if (name[0] == '\0')
          {
          /* no name */

          sprintf(output, "%s=? %d",
                  cp,
                  RM_ERR_UNKNOWN);
          }
        else
          {
          if (!strncasecmp(name, "clearjob", strlen("clearjob")))
            {
            char *ptr = NULL;

            job *pjob = NULL, *pjobnext = NULL;

            if ((*curr == '=') && ((*curr) + 1 != '\0'))
              {
              ptr = curr + 1;
              }

            /* purge job if local */

            if (ptr == NULL)
              {
              strcpy(output, "invalid clearjob request");
              }
            else
              {
              char tmpLine[1024];

              if (!strcasecmp(ptr, "all"))
                {
                if ((pjob = (job *)GET_NEXT(svr_alljobs)) != NULL)
                  {
                  while (pjob != NULL)
                    {
                    sprintf(tmpLine, "clearing job %s",
                            pjob->ji_qs.ji_jobid);

                    log_record(PBSEVENT_SYSTEM, 0, id, tmpLine);

                    pjobnext = (job *)GET_NEXT(pjob->ji_alljobs);

                    job_purge(pjob);

                    pjob = pjobnext;

                    strcat(output, tmpLine);
                    strcat(output, "\n");
                    }
                  }

                strcat(output, "clear completed");
                }
              else if ((pjob = find_job(ptr)) != NULL)
                {
                sprintf(tmpLine, "clearing job %s",
                        pjob->ji_qs.ji_jobid);

                log_record(PBSEVENT_SYSTEM, 0, id, tmpLine);

                job_purge(pjob);

                strcpy(output, tmpLine);
                }
              }
            }
          else if (!strncasecmp(name, "clearmsg", strlen("clearmsg")))
            {
            /*  clear rm messages */

            PBSNodeMsgBuf[0] = '\0';

            strcpy(output, "messages cleared");

            log_record(PBSEVENT_SYSTEM, 0, id, "messages cleared");
            }
          else if (!strncasecmp(name, "cycle", strlen("cycle")))
            {
            /*  force immediate cycle */

            LastServerUpdateTime = 0;

            strcpy(output, "cycle forced");

            log_record(PBSEVENT_SYSTEM, 0, id, "reporting cycle forced");
            }
          else if (!strncasecmp(name, "status_update_time", strlen("status_update_time")))
            {
            /* set or report status_update_time */

            if ((*curr == '=') && ((*curr) + 1 != '\0'))
              {
              setstatusupdatetime(curr + 1);
              }

            sprintf(output, "status_update_time=%d",

                    ServerStatUpdateInterval);
            }
          else if (!strncasecmp(name, "check_poll_time", strlen("check_poll_time")))
            {
            /* set or report check_poll_time */

            if ((*curr == '=') && ((*curr) + 1 != '\0'))
              {
              setcheckpolltime(curr + 1);
              }

            sprintf(output, "check_poll_time=%d",
                    CheckPollTime);
            }
          else if (!strncasecmp(name, "jobstartblocktime", strlen("jobstartblocktime")))
            {
            /* set or report jobstartblocktime */

            if ((*curr == '=') && ((*curr) + 1 != '\0'))
              {
              jobstartblocktime(curr + 1);
              }

            sprintf(output, "jobstartblocktime=%ld",

                    TJobStartBlockTime);
            }
          else if (!strncasecmp(name, "loglevel", strlen("loglevel")))
            {
            /* set or report loglevel */

            if ((*curr == '=') && ((*curr) + 1 != '\0'))
              {
              setloglevel(curr + 1);
              }

            sprintf(output, "loglevel=%d",
                    LOGLEVEL);
            }
          else if (!strncasecmp(name, "down_on_error", strlen("down_on_error")))
            {
            /* set or report down_on_error */

            if ((*curr == '=') && ((*curr) + 1 != '\0'))
              {
              setdownonerror(curr + 1);
              }

            sprintf(output, "down_on_error=%d",

                    MOMConfigDownOnError);
            }
          else if (!strncasecmp(name, "enablemomrestart", strlen("enablemomrestart")))
            {
            /* set or report enablemomrestart */

            if ((*curr == '=') && ((*curr) + 1 != '\0'))
              {
              setenablemomrestart(curr + 1);
              }

            sprintf(output, "enablemomrestart=%d",

                    MOMConfigRestart);
            }
          else if (!strncasecmp(name, "rcpcmd", strlen("rcpcmd")))
            {
            /* set or report rcp_path and rcp_args */

            if ((*curr == '=') && ((*curr) + 1 != '\0'))
              {
              setrcpcmd(curr + 1);
              }

            sprintf(output, "rcpcmd=%s %s",

                    rcp_path, rcp_args);
            }
          else if (!strncasecmp(name, "version", strlen("version")))
            {
            /* report version */

            sprintf(output, "version=%s",
                    PACKAGE_VERSION);
            }
          else if ((!strncasecmp(name, "configversion", strlen("configversion"))) && (MOMConfigVersion[0] != '\0'))
            {
            /* report configversion */

            sprintf(output, "configversion=%s",
                    MOMConfigVersion);
            }
          else if (!strncasecmp(name, "diag", strlen("diag")))
            {
            char tmpLine[1024];
            char *ptr;

            int rc;
            time_t Now;

            job *pjob;

            struct varattr *pva;

            time(&Now);

            ptr = name + strlen("diag");

            verbositylevel = (int)strtol(ptr, NULL, 10);

            output[0] = '\0';

            BPtr = output;
            BSpace = sizeof(output);

            sprintf(tmpLine, "\nHost: %s/%s   Version: %s   PID: %ld\n",
                    mom_short_name,
                    mom_host,
                    PACKAGE_VERSION,
                    (long)getpid());

            MUStrNCat(&BPtr, &BSpace, tmpLine);

            mom_server_all_diag(&BPtr, &BSpace);

            sprintf(tmpLine, "HomeDirectory:          %s\n",
                    (mom_home != NULL) ? mom_home : "N/A");

            MUStrNCat(&BPtr, &BSpace, tmpLine);

#ifdef HAVE_SYS_STATVFS_H
              {
#include <sys/statvfs.h>

              struct statvfs VFSStat;

              if (statvfs(path_spool, &VFSStat) < 0)
                {
                MUSNPrintF(&BPtr, &BSpace, "ALERT:  cannot stat stdout/stderr spool directory '%s' (errno=%d) %s\n",
                           path_spool,
                           errno,
                           strerror(errno));
                }
              else
                {
                if (VFSStat.f_bavail > 0)
                  {
                  if (verbositylevel >= 1)
                    MUSNPrintF(&BPtr, &BSpace, "stdout/stderr spool directory: '%s' (%d blocks available)\n",
                               path_spool,
                               VFSStat.f_bavail);
                  }
                else
                  {
                  MUSNPrintF(&BPtr, &BSpace, "ALERT:  stdout/stderr spool directory '%s' is full\n",
                             path_spool);
                  }
                }
              }    /* END BLOCK */
#endif /* HAVE_SYS_STATVFS_H */

            if (MOMConfigVersion[0] != '\0')
              {
              sprintf(tmpLine, "ConfigVersion:          %s\n",
                      MOMConfigVersion);

              MUStrNCat(&BPtr, &BSpace, tmpLine);
              }

            if (verbositylevel >= 3)
              {
#if SYSLOG
              MUStrNCat(&BPtr, &BSpace, "NOTE:  syslog enabled\n");
#else /* SYSLOG */
              MUStrNCat(&BPtr, &BSpace, "NOTE:  syslog not enabled (use 'configure --enable-syslog' to enable)\n");
#endif /* SYSLOG */
              }

            if (verbositylevel >= 3)
              {
              if (PBSNodeCheckPath[0] != '\0')
                {
                sprintf(tmpLine, "Node Health Check Script: %s (%d second update interval)\n",
                        PBSNodeCheckPath,
                        PBSNodeCheckInterval * ServerStatUpdateInterval);

                MUStrNCat(&BPtr, &BSpace, tmpLine);
                }
              }

            sprintf(tmpLine, "MOM active:             %ld seconds\n",
                    (long)Now - MOMStartTime);

            MUStrNCat(&BPtr, &BSpace, tmpLine);

            if (verbositylevel >= 1)
              {
              sprintf(tmpLine, "Check Poll Time:        %d seconds\n",
                      CheckPollTime);

              MUStrNCat(&BPtr, &BSpace, tmpLine);

              sprintf(tmpLine, "Server Update Interval: %d seconds\n",
                      ServerStatUpdateInterval);

              MUStrNCat(&BPtr, &BSpace, tmpLine);
              }

            if (PBSNodeMsgBuf[0] != '\0')
              {
              sprintf(tmpLine, "MOM Message:            %s (use 'momctl -q clearmsg' to clear)\n",
                      PBSNodeMsgBuf);

              MUStrNCat(&BPtr, &BSpace, tmpLine);
              }

            if (MOMUNameMissing[0] != '\0')
              {
              sprintf(tmpLine, "WARNING:  passwd file is corrupt (job requests user '%s' - not found in local passwd file)\n",
                      MOMUNameMissing);

              MUStrNCat(&BPtr, &BSpace, tmpLine);
              }

            if (MOMPrologTimeoutCount > 0)
              {
              sprintf(tmpLine, "WARNING:  %d prolog timeouts (%d seconds) detected since start up - increase $prologalarm or investigate prolog\n",
                      MOMPrologTimeoutCount,
                      pe_alarm_time);

              MUStrNCat(&BPtr, &BSpace, tmpLine);
              }

            if (MOMPrologFailureCount > 0)
              {
              sprintf(tmpLine, "WARNING:  %d prolog failures detected since start up - investigate prolog\n",
                      MOMPrologFailureCount);

              MUStrNCat(&BPtr, &BSpace, tmpLine);
              }

            sprintf(tmpLine, "LogLevel:               %d (use SIGUSR1/SIGUSR2 to adjust)\n",

                    LOGLEVEL);

            MUStrNCat(&BPtr, &BSpace, tmpLine);

            if (verbositylevel >= 1)
              {
#if RPP
              sprintf(tmpLine, "Communication Model:    %s\n", "RPP");
#else  /* RPP */
              sprintf(tmpLine, "Communication Model:    %s\n", "TCP");
#endif /* RPP */

              MUStrNCat(&BPtr, &BSpace, tmpLine);

              if ((MOMIsLocked == 1) || (MOMIsPLocked == 1) || (verbositylevel >= 4))
                {
                sprintf(tmpLine, "MemLocked:              %s",
                        (MOMIsLocked == 0) ? "FALSE" : "TRUE");

                if (MOMIsLocked == 1)
                  strcat(tmpLine, "  (mlock)");

                if (MOMIsPLocked == 1)
                  strcat(tmpLine, "  (plocked)");

                strcat(tmpLine, "\n");

                MUStrNCat(&BPtr, &BSpace, tmpLine);
                }
              }    /* END if (verbositylevel >= 1) */

            if ((verbositylevel >= 1) && (pbs_tcp_timeout > 0))
              {
              sprintf(tmpLine, "TCP Timeout:            %d seconds\n",
                      (int)pbs_tcp_timeout);

              MUStrNCat(&BPtr, &BSpace, tmpLine);
              }

            if (verbositylevel >= 1)
              {

              struct stat s;

              int prologfound = 0;

              if (stat(path_prolog, &s) != -1)
                {
                MUSNPrintF(&BPtr, &BSpace, "Prolog:                 %s (enabled)\n",
                           path_prolog);

                prologfound = 1;
                }
              else if (verbositylevel >= 2)
                {
                MUSNPrintF(&BPtr, &BSpace, "Prolog:                 %s (disabled)\n",
                           path_prolog);
                }

              if (stat(path_prologp, &s) != -1)
                {
                MUSNPrintF(&BPtr, &BSpace, "Parallel Prolog:        %s (enabled)\n",
                           path_prologp);

                prologfound = 1;
                }

              if (prologfound == 1)
                {
                sprintf(tmpLine, "Prolog Alarm Time:      %d seconds\n",
                        pe_alarm_time);

                MUStrNCat(&BPtr, &BSpace, tmpLine);
                }
              }

            if (verbositylevel >= 2)
              {
              /* check alarm */

              rc = alarm(alarm_time);

              alarm(rc);

              sprintf(tmpLine, "Alarm Time:             %d of %d seconds\n",
                      rc,
                      alarm_time);

              MUStrNCat(&BPtr, &BSpace, tmpLine);
              }

            if (verbositylevel >= 1)
              {
              /* display okclient list */

              tmpLine[0] = '\0';

              tlist(okclients, tmpLine, sizeof(tmpLine));

              MUSNPrintF(&BPtr, &BSpace, "Trusted Client List:    %s\n",
                         tmpLine);
              }

            if (verbositylevel >= 1)
              {
              tmpLine[0] = '\0';

              MUSNPrintF(&BPtr, &BSpace, "Copy Command:           %s %s\n",
                         rcp_path,
                         rcp_args);
              }

            /* joblist */

            if ((pjob = (job *)GET_NEXT(svr_alljobs)) == NULL)
              {
              sprintf(tmpLine, "NOTE:  no local jobs detected\n");

              MUStrNCat(&BPtr, &BSpace, tmpLine);
              }
            else
              {
              int    numvnodes = 0;
              task  *ptask;
              char   SIDList[1024];

              char  *VPtr;  /* job env variable value pointer */

              char *SPtr;
              int   SSpace;

              for (;pjob != NULL;pjob = (job *)GET_NEXT(pjob->ji_alljobs))
                {
                SPtr   = SIDList;
                SSpace = sizeof(SIDList);

                SIDList[0] = '\0';

                for (ptask = (task *)GET_NEXT(pjob->ji_tasks);
                     ptask != NULL;
                     ptask = (task *)GET_NEXT(ptask->ti_jobtask))
                  {
                  /* only check on tasks that we think should still be around */

                  if (ptask->ti_qs.ti_status != TI_STATE_RUNNING)
                    continue;

                  /* NOTE:  on linux systems, the session master should have
                     pid == sessionid */

                  MUSNPrintF(&SPtr, &SSpace, "%s%d",
                             (SIDList[0] != '\0') ? "," : "",
                             ptask->ti_qs.ti_sid);
                  }  /* END for (task) */

                numvnodes += pjob->ji_numvnod;

                sprintf(tmpLine, "job[%s]  state=%s  sidlist=%s",
                        pjob->ji_qs.ji_jobid,
                        PJobSubState[pjob->ji_qs.ji_substate],
                        SIDList);

                MUStrNCat(&BPtr, &BSpace, tmpLine);

                if (verbositylevel >= 4)
                  {
                  /* report job variables */

                  VPtr = get_job_envvar(pjob, "BATCH_PARTITION_ID");

                  if (VPtr != NULL)
                    {
                    sprintf(tmpLine, "  BATCH_PARTITION_ID=%s",
                            VPtr);

                    MUStrNCat(&BPtr, &BSpace, tmpLine);
                    }

                  VPtr = get_job_envvar(pjob, "BATCH_ALLOC_COOKIE");

                  if (VPtr != NULL)
                    {
                    sprintf(tmpLine, "  BATCH_ALLOC_COOKIE=%s",
                            VPtr);

                    MUStrNCat(&BPtr, &BSpace, tmpLine);
                    }
                  }    /* END if (verbositylevel >= 4) */

                MUStrNCat(&BPtr, &BSpace, "\n");
                }  /* END for (pjob) */

              sprintf(tmpLine, "Assigned CPU Count:     %d\n",
                      numvnodes);

              MUStrNCat(&BPtr, &BSpace, tmpLine);
              }  /* END else ((pjob = (job *)GET_NEXT(svr_alljobs)) == NULL) */

            if ((pjob = (job *)GET_NEXT(svr_newjobs)) != NULL)
              {
              while (pjob != NULL)
                {
                sprintf(tmpLine, "job[%s]  state=NEW\n",
                        pjob->ji_qs.ji_jobid);

                MUStrNCat(&BPtr, &BSpace, tmpLine);

                pjob = (job *)GET_NEXT(pjob->ji_alljobs);
                }
              }

            if ((pva = (struct varattr *)GET_NEXT(mom_varattrs)) != NULL)
              {
              MUStrNCat(&BPtr, &BSpace, "Varattrs:\n");

              while (pva != NULL)
                {
                sprintf(tmpLine, "  ttl=%d  last=%s  cmd=%s\n  value=%s\n\n",
                        pva->va_ttl,
                        ctime(&pva->va_lasttime),
                        pva->va_cmd,
                        (pva->va_value != NULL) ? pva->va_value : "NULL");

                MUStrNCat(&BPtr, &BSpace, tmpLine);

                pva = (struct varattr *)GET_NEXT(pva->va_link);
                }
              }

            MUStrNCat(&BPtr, &BSpace, "\ndiagnostics complete\n");

            log_record(PBSEVENT_SYSTEM, 0, id, "internal diagnostics complete");
            }
          else
            {
            ap = rm_search(config_array, name);

            attr = momgetattr(curr);

            if (LOGLEVEL >= 3)
              log_record(PBSEVENT_SYSTEM, 0, id, "setting alarm in rm_request");

            alarm(alarm_time);

            if ((ap != NULL) && !restrictrm)
              {
              /* static */

              sprintf(output, "%s=%s",
                      cp,
                      conf_res(ap->c_u.c_value, attr));
              }
            else
              {
              /* check dependent code */

              log_buffer[0] = '\0';

              value = dependent(name, attr);

              if (value != NULL)
                {
                sprintf(output, "%s=%s",
                        cp,
                        value);
                }
              else
                {
                /* not found anywhere */

                sprintf(output, "%s=? %d",
                        cp,
                        rm_errno);
                }
              }

            alarm(0);
            }
          }  /* END (name[0] == '\0') */

        free(cp);

        ret = diswst(iochan, output);


        if (ret != DIS_SUCCESS)
          {
          sprintf(log_buffer, "write string failed %s",
                  dis_emsg[ret]);

          goto bad;
          }
        }    /* END for () */

      break;

    case RM_CMD_CONFIG:

      {
      char *ptr;

      if (MOMConfigRReconfig == FALSE)
        {
        log_err(-1, id,
                "remote reconfiguration disabled, ignoring request");

        goto bad;
        }

      if (restrictrm)
        {
        log_err(-1, id, "restricted configure attempt");

        goto bad;
        }

      log_record(PBSEVENT_SYSTEM, 0, id, "configure");

      body = disrst(iochan, &ret);

      /* FORMAT:  FILE:<FILENAME> or <FILEDATA> (NYI) */

      if (ret == DIS_EOD)
        {
        /* no file specified, use default */

        body = NULL;
        }
      else if (ret != DIS_SUCCESS)
        {
        sprintf(log_buffer, "problem with config body %s",
                dis_emsg[ret]);

        goto bad;
        }
      else
        {
        FILE *fp;

        if ((ptr = strstr(body, "CONFIG:")) != NULL)
          {
          ptr += strlen("CONFIG:");

          /* overwrite config with data and clear body */

          if ((fp = fopen(config_file, "w+")) == NULL)
            {
            sprintf(log_buffer, "cannot open config file %s",
                    config_file);

            goto bad;
            }

          if (fwrite(ptr, sizeof(char), strlen(ptr) + 1, fp) < (strlen(ptr) + 1))
            {
            fclose(fp);

            sprintf(log_buffer, "cannot write config file %s",
                    config_file);

            goto bad;
            }

          fclose(fp);

          body = NULL;
          }
        }

      len = read_config(body);

      ret = diswsi(iochan, len ? RM_RSP_ERROR : RM_RSP_OK);

      if (ret != DIS_SUCCESS)
        {
        sprintf(log_buffer, "write config response failed %s",
                dis_emsg[ret]);

        goto bad;
        }
      }    /* END (case RM_CMD_CONFIG) */

    break;

    case RM_CMD_SHUTDOWN:

      if (restrictrm)
        {
        log_err(-1, id, "restricted shutdown attempt");

        goto bad;
        }

      log_record(PBSEVENT_SYSTEM, 0, id, "shutdown");

      ret = diswsi(iochan, RM_RSP_OK);

      if (ret != DIS_SUCCESS)
        {
        sprintf(log_buffer, "write shutdown response failed %s",
                dis_emsg[ret]);

        log_err(-1, id, log_buffer);
        }

      flush_io(iochan);

      close_io(iochan);

      cleanup();

      log_close(1);

      rpp_shutdown();

      exit(0);

      /*NOTREACHED*/

      break;

    default:

      sprintf(log_buffer, "unknown command %d",
              command);

      log_err(-1, id, log_buffer);

      ret = diswsi(iochan, RM_RSP_ERROR);

      if (ret != DIS_SUCCESS)
        {
        sprintf(log_buffer, "write default response failed %s",
                dis_emsg[ret]);

        goto bad;
        }

      ret = diswst(iochan, log_buffer);

      if (ret != DIS_SUCCESS)
        {
        sprintf(log_buffer, "write string failed %s",
                dis_emsg[ret]);

        goto bad;
        }

      break;
    }  /* END switch(command) */

  if (flush_io(iochan) == -1)
    {
    log_err(errno, id, "flush");

    goto bad;
    }

  return 0;

bad:

  sprintf(output, "\n\tmessage refused from port %d addr %s",
          port,
          netaddr_pbs_net_t(ipadd));

  sprintf(TMOMRejectConn, "%s:%d  %s",
          netaddr_pbs_net_t(ipadd),
          port,
          (NotTrusted == 1) ? "(server not authorized)" : "(request corrupt)");

  strcat(log_buffer, output);

  log_err(errno, id, log_buffer);

  close_io(iochan);

  return(-1);
  }  /* END rm_request() */





/*
 * Read a RPP message from a stream, figure out if it is a
 * Resource Monitor request or an InterMom message.
 */

void do_rpp(

  int stream)  /* I */

  {
  static char  id[] = "do_rpp";

  int             ret, proto, version;
  void im_request(int, int);
  void is_request(int, int, int *);
  void im_eof(int, int);

  DIS_rpp_reset();
  proto = disrsi(stream, &ret);

  if (ret != DIS_SUCCESS)
    {
    DBPRT(("%s: cannot get protocol %s\n",
           id,
           dis_emsg[ret]))

    if (LOGLEVEL >= 6)
      {
      sprintf(log_buffer, "cannot get protocol %s",
              dis_emsg[ret]);

      log_err(errno, id, log_buffer);
      }

    im_eof(stream, ret);

    return;
    }

  version = disrsi(stream, &ret);

  if (ret != DIS_SUCCESS)
    {
    DBPRT(("%s: no protocol version number %s\n",
           id,
           dis_emsg[ret]))

    sprintf(log_buffer, "no protocol version number %s",
            dis_emsg[ret]);

    log_err(errno, id, log_buffer);

    im_eof(stream, ret);

    return;
    }

  switch (proto)
    {

    case RM_PROTOCOL:

      DBPRT(("%s: got a resource monitor request\n",
             id))

      if (rm_request(stream, version, 0) == 0)
        rpp_eom(stream);

      break;

    case IM_PROTOCOL:

      if (LOGLEVEL >= 6)
        {
        log_record(
          PBSEVENT_JOB,
          PBS_EVENTCLASS_JOB,
          id,
          "got an internal task manager request in do_rpp");
        }

      im_request(stream, version);

      break;

    case IS_PROTOCOL:

      {
      int tmpI;

      if (LOGLEVEL >= 3)
        {
        log_record(
          PBSEVENT_JOB,
          PBS_EVENTCLASS_JOB,
          id,
          "got an inter-server request");
        }

      is_request(stream, version, &tmpI);

      mom_server_update_receive_time(stream, PBSServerCmds[tmpI]);
      }

    break;

    default:

      if (LOGLEVEL >= 1)
        {
        sprintf(log_buffer, "unexpected request protocol type %d received",
                proto);

        log_record(
          PBSEVENT_JOB,
          PBS_EVENTCLASS_JOB,
          id,
          "got an inter-server request");
        }

      rpp_close(stream);

      break;
    }  /* END switch (proto) */

  return;
  }  /* END do_rpp() */





void rpp_request(

  int fd) /* not used */

  {
  static char id[] = "rpp_request";
  int         stream;

  for (;;)
    {
    if ((stream = rpp_poll()) == -1)
      {
      log_err(errno, id, "rpp_poll");

      break;
      }

    if (stream == -2)
      {
      /* unknown stream identifier */

      break;
      }

    do_rpp(stream);
    }  /* END for () */

  return;
  }  /* END rpp_request() */





int do_tcp(

  int fd)

  {
#ifndef NDEBUG
  static char id[] = "do_tcp";
#endif

  int ret, proto, version;
  int tm_request(int stream, int version);

  time_t tmpT;

  tmpT = pbs_tcp_timeout;

  pbs_tcp_timeout = 0;

  proto = disrsi(fd, &ret);

  if (tmpT > 0)
    {
    /* restore */

    pbs_tcp_timeout = tmpT;
    }
  else
    {
    /* initialize */

    pbs_tcp_timeout = PMOMTCPTIMEOUT;
    }

  switch (ret)
    {
    case DIS_SUCCESS:  /* worked */

      break;

    case DIS_EOF:   /* closed */

      close_conn(fd);

      /* continue to next case */

    case DIS_EOD:   /* still open */

      return(1);

      /*NOTREACHED*/

      break;

    default:

      sprintf(log_buffer, "no protocol number: %s",
              dis_emsg[ret]);

      goto bad;

      /*NOTREACHED*/

      break;
    }  /* END switch (ret) */

  version = disrsi(fd, &ret);

  if (ret != DIS_SUCCESS)
    {
    DBPRT(("%s: no protocol version number %s\n",
      id,
      dis_emsg[ret]))

    goto bad;
    }

  switch (proto)
    {
    case RM_PROTOCOL:

      {
      time_t tmpT;

      DBPRT(("%s: got a resource monitor request\n",
        id))

      tmpT = pbs_tcp_timeout;

      pbs_tcp_timeout = 0;

      ret = rm_request(fd, version, 1);

      if (tmpT > 0)
        {
        /* restore */

        pbs_tcp_timeout = tmpT;
        }
      else
        {
        /* initialize */

        pbs_tcp_timeout = PMOMTCPTIMEOUT;
        }
      }    /* END BLOCK (case RM_PROTOCOL) */

    break;

    case TM_PROTOCOL:

      DBPRT(("%s: got an internal task manager request\n",
        id))

      ret = tm_request(fd, version);

      break;

    default:

      DBPRT(("%s: unknown request %d\n",
        id,
        proto))

      goto bad;

      /*NOTREACHED*/

      break;
    }  /* END switch (proto) */

  return(ret);

bad:

  close_conn(fd);

  return(-1);
  }  /* END do_tcp() */





void tcp_request(

  int fd)

  {
  static char id[] = "tcp_request";
  int  c;
  long  ipadd;
  char  address[80];

  extern struct connection svr_conn[];

  ipadd = svr_conn[fd].cn_addr;

  sprintf(address, "%s:%d",
          netaddr_pbs_net_t(ipadd),
          ntohs(svr_conn[fd].cn_port));

  if (LOGLEVEL >= 6)
    {
    sprintf(log_buffer, "%s: fd %d addr %s",
            id,
            fd,
            address);

    log_record(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      "tcp_request",
      log_buffer);
    }

  DIS_tcp_setup(fd);

  if (tfind(ipadd, &okclients) == NULL)
    {
    sprintf(log_buffer, "bad connect from %s",
            address);

    log_err(errno, id, log_buffer);

    close_conn(fd);

    return;
    }

  log_buffer[0] = '\0';

  for (c = 0;;c++)
    {
    DIS_tcp_funcs();

    if (do_tcp(fd))
      break;
    }  /* END for (c = 0) */

  DBPRT(("%s: processed %d\n",
         id,
         c))

  return;
  }  /* END tcp_request() */



char *find_signal_name(

  int sig)

  {

  struct sig_tbl *psigt;

  extern struct sig_tbl sig_tbl[];

  for (psigt = sig_tbl; psigt->sig_name != NULL; psigt++)
    {
    if (psigt->sig_val == sig)
      {
      return(psigt->sig_name);
      }
    }

  return("unknown signal");
  }




/*
 *  Kill a job.
 * Call with the job pointer and a signal number.
 *
 * NOTE:  sends a signal to a job, does not purge job record
 *
 * @see kill_task() - child
 * @see scan_for_exiting() - parent
 */

int kill_job(

  job  *pjob,  /* I */
  int   sig,   /* I */
  char *killer_id_name, /* I - process name of calling routine */
  char *why_killed_reason) /* I - reason for killing */

  {
  task *ptask;
  int ct = 0;

  char *id = "kill_job";

  sprintf(log_buffer, "%s: sending signal %d, \"%s\" to job %s, reason: %s",
          killer_id_name,
          sig, find_signal_name(sig),
          pjob->ji_qs.ji_jobid,
          why_killed_reason);

  if (LOGLEVEL >= 2)
    {
    log_record(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      id,
      log_buffer);
    }

  DBPRT(("%s\n", log_buffer));

  /* NOTE:  should change be made to only execute precancel epilog if job is active? (NYI) */

  /* NOTE:  epilog blocks until complete, which may cause issues if shutdown grace time is
            enabled.  Change model to allow epilog.precancel to run in background and have
            kill_task() executed once it is complete (NYI) */

  /* NOTE:  this will allow kill_job to return immediately and will require sigchild
            harvesting and the kill_task loop to be called once this signal is received */

  /* NOTE:  if path_epilogpdel is not set, kill_task should be called immediately (NYI) */

  if (run_pelog(PE_EPILOGUSER, path_epilogpdel, pjob, PE_IO_TYPE_NULL) != 0)
    {
    log_err(-1, (char *)id, "precancel epilog failed");

    sprintf(PBSNodeMsgBuf, "ERROR:  precancel epilog failed");
    }

  ptask = (task *)GET_NEXT(pjob->ji_tasks);

  while (ptask != NULL)
    {
    if (ptask->ti_qs.ti_status == TI_STATE_RUNNING)
      {
      if (LOGLEVEL >= 4)
        {
        log_record(
          PBSEVENT_JOB,
          PBS_EVENTCLASS_JOB,
          pjob->ji_qs.ji_jobid,
          "kill_job found a task to kill");
        }

      ct += kill_task(ptask, sig, 0);
      }

    ptask = (task *)GET_NEXT(ptask->ti_jobtask);
    }  /* END while (ptask != NULL) */

  if (LOGLEVEL >= 6)
    {
    sprintf(log_buffer, "kill_job done (killed %d processes)",
            ct);

    log_record(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      pjob->ji_qs.ji_jobid,
      log_buffer);
    }

  return(ct);
  }  /* END kill_job() */





/*
 * mom_lock - lock out other MOMs from this directory.
 */

static void mom_lock(

  int fds,
  int op)   /* F_WRLCK or F_UNLCK */

  {

  struct flock flock;

  flock.l_type   = op;
  flock.l_whence = SEEK_SET;
  flock.l_start  = 0;
  flock.l_len    = 0; /* whole file */

  if (fcntl(fds, F_SETLK, &flock) < 0)
    {
    char tmpPath[256];

    tmpPath[0] = '\0';

    if (getcwd(tmpPath, sizeof(tmpPath)) == NULL)
      tmpPath[0] = '\0';

    sprintf(log_buffer, "cannot lock '%s/mom.lock' - another mom running",
            (tmpPath[0] != '\0') ? tmpPath : "$MOM_HOME");

    log_err(errno, msg_daemonname, log_buffer);

    fprintf(stderr, "%s\n",
            log_buffer);

    exit(1);
    }

  return;
  }  /* END mom_lock() */





/*
 * size decoding routine.
 *
 * Accepts a resource pointer and a pointer to the unsigned long integer
 * to receive the decoded value.  It returns the decoded value in kb.
 *
 *  sizeof(word) = sizeof(int)
 */

unsigned long getsize(

  resource *pres)  /* I */

  {
  unsigned long value;
  unsigned long shift;

  if (pres->rs_value.at_type != ATR_TYPE_SIZE)
    {
    return(0);
    }

  value = pres->rs_value.at_val.at_size.atsv_num;

  shift = pres->rs_value.at_val.at_size.atsv_shift;

  if (pres->rs_value.at_val.at_size.atsv_units == ATR_SV_WORDSZ)
    {
    if (value > ULONG_MAX / sizeof(int))
      {
      return(0);
      }

    value *= sizeof(int);
    }

  if (shift > 10)
    {
    shift -= 10;

    return(value << shift);
    }

  shift = 10 - shift;

  return(value >> shift);
  }





/*
 * time decoding routine.
 *
 * Accepts a resource pointer and a pointer to the unsigned long integer
 * to receive the decoded value.  It returns the decoded value of time
 * in seconds.
 */

unsigned long gettime(

  resource *pres)

  {
  if (pres->rs_value.at_type != ATR_TYPE_LONG)
    {
    return(0);
    }

  if (pres->rs_value.at_val.at_long < 0)
    {
    return(0);
    }

  return((unsigned long)pres->rs_value.at_val.at_long);
  }  /* END getttime() */




/* log_buffer reports detailed failure reason */

/* return 0:  no issues detected */
/* return 1:  over limit/child termination request detected */

int job_over_limit(

  job *pjob)  /* I */

  {
  attribute *attr;
  attribute *used;
  resource *limresc;
  resource *useresc;

  struct resource_def *rd;
  unsigned long total;
  int  index, i;
  unsigned long limit;
  char  *units;

  if (mom_over_limit(pjob))
    {
    /* mom limits violated, log_buffer populated */

    /* no more POLL's */

    pjob->ji_nodekill = pjob->ji_nodeid;

    return(1);
    }

  if ((pjob->ji_numnodes == 1) ||
      ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_HERE) == 0))
    {
    /* no other nodes or not mother superior */

    /* SUCCESS */

    return(0);
    }

  if (pjob->ji_nodekill != TM_ERROR_NODE)
    {
    /* one of the sister nodes reports a fatal error */

    hnodent *pnode = &pjob->ji_hosts[pjob->ji_nodekill];

    if (pnode->hn_sister != 0)
      {
      switch (pnode->hn_sister)
        {

        case SISTER_KILLDONE:

          sprintf(log_buffer, "node %d (%s) requested job terminate, '%s' (%d)",
                  pjob->ji_nodekill,
                  pnode->hn_host,
                  "killdone",
                  pnode->hn_sister);

          break;

        case SISTER_BADPOLL:

          sprintf(log_buffer, "node %d (%s) requested job terminate, '%s' (code %d)",
                  pjob->ji_nodekill,
                  pnode->hn_host,
                  "badpoll",
                  pnode->hn_sister);

          break;

        case SISTER_EOF:

          sprintf(log_buffer, "node %d (%s) requested job terminate, '%s' (code %d) - received SISTER_EOF attempting to communicate with sister MOM's",
                  pjob->ji_nodekill,
                  pnode->hn_host,
                  "EOF",
                  pnode->hn_sister);

          break;

        default:

          sprintf(log_buffer, "node %d (%s) requested job terminate, '%s' (code %d) - internal or network failure attempting to communicate with sister MOM's",
                  pjob->ji_nodekill,
                  pnode->hn_host,
                  "EOF",
                  pnode->hn_sister);

          break;
        }  /* END switch (pnode->hn_sister) */

      /* FAILURE */

      return(1);
      }  /* END if (pnode->hn_sister != 0) */
    }    /* END if (pjob->ji_nodekill != TM_ERROR_NODE) */

  attr = &pjob->ji_wattr[JOB_ATR_resource];

  used = &pjob->ji_wattr[JOB_ATR_resc_used];

  /* only enforce cpu time and memory usage */

  for (limresc = (resource *)GET_NEXT(attr->at_val.at_list);
       limresc != NULL;
       limresc = (resource *)GET_NEXT(limresc->rs_link))
    {
    if ((limresc->rs_value.at_flags & ATR_VFLAG_SET) == 0)
      continue;

    rd = limresc->rs_defin;

    if (!strcmp(rd->rs_name, "cput"))
      {
      if (igncput == TRUE)
        continue;
      else
        index = 0;
      }
    else if (!strcmp(rd->rs_name, "mem"))
      {
      if (ignmem == TRUE)
        continue;
      else
        index = 1;
      }
    else
      continue;

    useresc = find_resc_entry(used, rd);

    if (useresc == NULL)
      continue;

    if ((useresc->rs_value.at_flags & ATR_VFLAG_SET) == 0)
      continue;

    total = (index == 0) ? gettime(useresc) : getsize(useresc);

    for (i = 0;i < pjob->ji_numnodes - 1;i++)
      {
      noderes *nr = &pjob->ji_resources[i];

      total += ((index == 0) ? nr->nr_cput : nr->nr_mem);
      }

    limit = (index == 0) ? gettime(limresc) : getsize(limresc);

    if (limit <= total)
      break;
    }  /* END for (limresc) */

  if (limresc == NULL)
    {
    /* no limit violation detected, job ok */

    return(0);
    }

  units = index == 0 ? "secs" : "kb";

  sprintf(log_buffer, "%s job total %lu %s exceeded limit %lu %s",
          rd->rs_name,
          total,
          units,
          limit,
          units);

  pjob->ji_nodekill = pjob->ji_nodeid;

  return(1);
  }  /* END job_over_limit() */




void usage(

  char *prog)  /* I */

  {
  fprintf(stderr, "Usage: %s\n",
          prog);

  fprintf(stderr, "  -a <INT>  \\\\ Alarm Time\n");
  fprintf(stderr, "  -c <PATH> \\\\ Config File\n");
  fprintf(stderr, "  -C <PATH> \\\\ Checkpoint Dir\n");
  fprintf(stderr, "  -d <PATH> \\\\ Home Dir\n");
  fprintf(stderr, "  -C <PATH> \\\\ Checkpoint Dir\n");
  fprintf(stderr, "  -D        \\\\ DEBUG - do not background\n");
  fprintf(stderr, "  -h        \\\\ Print Usage\n");
  fprintf(stderr, "  -H <HOST> \\\\ Hostname\n");
  fprintf(stderr, "  -l        \\\\ MOM Log Dir Path\n");
  fprintf(stderr, "  -L <PATH> \\\\ Logfile\n");
  fprintf(stderr, "  -M <INT>  \\\\ MOM Port\n");
  fprintf(stderr, "  -p        \\\\ Recover Jobs (Default)\n");
  fprintf(stderr, "  -P        \\\\ Purge Jobs\n");
  fprintf(stderr, "  -q        \\\\ Do Not Recover Jobs\n");
  fprintf(stderr, "  -r        \\\\ Recover Jobs (2)\n");
  fprintf(stderr, "  -R <INT>  \\\\ RM Port\n");
  fprintf(stderr, "  -s        \\\\ Logfile Suffix\n");
  fprintf(stderr, "  -S <INT>  \\\\ Server Port\n");
  fprintf(stderr, "  -v        \\\\ Version\n");
  fprintf(stderr, "  -x        \\\\ Do Not Use Privileged Ports\n");
  fprintf(stderr, "  --about   \\\\ Print Build Information\n");
  fprintf(stderr, "  --help    \\\\ Print Usage\n");
  fprintf(stderr, "  --version \\\\ Version\n");

  }  /* END usage() */





/*
 * MOMFindMyExe - attempt to find my running executable file.
 *                returns alloc'd memory that is never freed.
 */

static char *orig_path;

char *MOMFindMyExe(

  char *argv0)  /* I */
  {
  char *link;
  int  has_slash = 0;
  char *p;
  char *p_next;
  char *path;


  link = calloc(MAXPATHLEN + 1, sizeof(char));

  if (link == NULL)
    {
    /* FAILURE */

    return(NULL);
    }

  /* Linux has a handy symlink, so try that first */

  if (readlink("/proc/self/exe", link, MAXPATHLEN) > 0)
    {
    if (link[0] != '\0' && link[0] != '[')
      {
      return(link);
      }
    }

  /* if argv0 has a /, then it should exist relative to $PWD */

  for (p = argv0; *p; p++)
    {
    if (*p == '/')
      {
      has_slash = 1;

      break;
      }
    }

  if (has_slash)
    {
    char resolvedpath[MAXPATHLEN+1];

    if (argv0[0] == '/')
      {
      strcpy(link, argv0);
      }
    else
      {
      if (getcwd(link, MAXPATHLEN) == NULL)
        {
        free(link);

        return(NULL);
        }

      strcat(link, "/");

      strcat(link, argv0);
      }

    if (realpath(link, resolvedpath) == NULL)
      {
      free(link);

      return(NULL);
      }

    strcpy(link, resolvedpath);

    if (access(link, X_OK) == 0)
      {
      return(link);
      }

    return(NULL);
    }

  /* argv0 doesn't have a /, so search $PATH */

  path = getenv("PATH");

  if (path != NULL)
    {
    for (p = path; *p; p = p_next)
      {
      char *q;
      size_t p_len;

      for (q = p;*q;q++)
        {
        if (*q == ':')
          break;
        }

      p_len = q - p;

      p_next = (*q == '\0' ? q : q + 1);

      /* We have a path item at p, of length p_len.
         Now concatenate the path item and argv0.  */

      if (p_len == 0)
        {
        /* An empty PATH element designates the current directory.  */

        if (getcwd(link, MAXPATHLEN) == NULL)
          {
          free(link);

          return(NULL);
          }

        strcat(link, "/");

        strcat(link, argv0);
        }
      else
        {
        strncpy(link, p, p_len);
        *(link + p_len) = '\0';
        strcat(link, "/");
        strcat(link, argv0);
        }

      if (access(link, X_OK) == 0)
        {
        return(link);
        }
      }  /* END for (p = path; *p; p = p_next) */
    }

  return(NULL);
  }  /* END MOMFindMyExe() */





/*
 * MOMGetFileMtime - return the mtime of a file
 */

time_t MOMGetFileMtime(

  const char *fpath)

  {

  struct stat sbuf;
  int ret;

  if ((fpath == NULL) || (*fpath == '\0'))
    {
    return(0);
    }

  ret = stat(fpath, &sbuf);

  if (ret == 0)
    {
    return(sbuf.st_mtime);
    }

  return(0);
  }  /* END MOMGetFileMtime */





/*
 * MOMCheckRestart() - set mom_run_state to restart if appropriate.
 *                     this is called when no jobs are running (below
 *                     in the main loop, and in job_purge().)
 */

void MOMCheckRestart(void)
  {
  time_t newmtime;

  if ((MOMConfigRestart <= 0) || (MOMExeTime <= 0))
    {
    return;
    }

  newmtime = MOMGetFileMtime(MOMExePath);

  if ((newmtime > 0) && (newmtime != MOMExeTime))
    {
    if (mom_run_state == MOM_RUN_STATE_RUNNING)
      mom_run_state = MOM_RUN_STATE_RESTART;

    sprintf(
      log_buffer,
      "%s has changed, initiating re-exec (now: %ld, was: %ld)",
      MOMExePath,
      (long int)newmtime,
      (long int)MOMExeTime);

    if (LOGLEVEL > 6)
      {
      log_record(
        PBSEVENT_SYSTEM,
        PBS_EVENTCLASS_SERVER,
        msg_daemonname,
        log_buffer);
      }

    DBPRT(("%s\n", log_buffer));
    }
  }  /* END MOMCheckRestart() */




/*
 * initialize_globals
 */

void initialize_globals(void)

  {
  char  *ptr;                   /* local tmp variable */

  strcpy(pbs_current_user, "pbs_mom");
  msg_daemonname = pbs_current_user;

  time(&MOMStartTime);

  CLEAR_HEAD(svr_newjobs);
  CLEAR_HEAD(svr_alljobs);
  CLEAR_HEAD(mom_polljobs);
  CLEAR_HEAD(svr_requests);
  CLEAR_HEAD(mom_varattrs);


  if (getenv("PBSMOMHOME") != NULL)
    {
    path_home = getenv("PBSMOMHOME");
    }

  MOMConfigVersion[0] = '\0';

  mom_server_all_init();

  pbsgroup = getgid();
  pbsuser  = getuid();
  loopcnt  = time(NULL);

  MOMExePath = MOMFindMyExe(program_name);
  MOMExeTime = MOMGetFileMtime(MOMExePath);

  strcpy(xauth_path, XAUTH_PATH);
  strcpy(rcp_path, RCP_PATH);
  strcpy(rcp_args, RCP_ARGS);
#ifdef DEFAULT_MOMLOGDIR
  path_log = strdup(DEFAULT_MOMLOGDIR);
#endif
#ifdef DEFAULT_MOMLOGSUFFIX
  log_init(DEFAULT_MOMLOGSUFFIX, NULL);
#endif

  /* PATH is restored before a restart */

  if (getenv("PATH") != NULL)
    {
    orig_path = strdup(getenv("PATH"));
    }

  /* get default service port */

  ptr = getenv("PBS_MOM_SERVICE_PORT");

  if (ptr != NULL)
    {
    pbs_mom_port = (int)strtol(ptr, NULL, 10);
    }

  if (pbs_mom_port <= 0)
    {
    pbs_mom_port = get_svrport(
                     PBS_MOM_SERVICE_NAME,
                     "tcp",
                     PBS_MOM_SERVICE_PORT);
    }

  ptr = getenv("PBS_BATCH_SERVICE_PORT");

  if (ptr != NULL)
    {
    default_server_port = (int)strtol(ptr, NULL, 10);
    }

  if (default_server_port <= 0)
    {
    default_server_port = get_svrport(
                            PBS_BATCH_SERVICE_NAME,
                            "tcp",
                            PBS_BATCH_SERVICE_PORT_DIS);
    }

  ptr = getenv("PBS_MANAGER_SERVICE_PORT");

  if (ptr != NULL)
    {
    pbs_rm_port = (int)strtol(ptr, NULL, 10);
    }

  if (pbs_rm_port <= 0)
    {
    pbs_rm_port = get_svrport(
                    PBS_MANAGER_SERVICE_NAME,
                    "tcp",
                    PBS_MANAGER_SERVICE_PORT);
    }

  /* set timeout values for MOM */

  MaxConnectTimeout = 10000;  /* in microseconds */

  memset(JobsToResend,0,sizeof(JobsToResend));

  }  /* END initialize_globals() */



/*
 * stop_me = signal handler for SIGTERM
 */

static void stop_me(

  int sig)  /* I */

  {
  const char *dowhat;

  /* just exit, leaving jobs running */

  mom_run_state = MOM_RUN_STATE_EXIT;

  dowhat = "leaving jobs running, just exiting";

  sprintf(log_buffer, "caught signal %d: %s",
          sig,
          dowhat);

  log_record(
    PBSEVENT_SYSTEM | PBSEVENT_FORCE,
    PBS_EVENTCLASS_SERVER,
    msg_daemonname,
    log_buffer);

  return;
  }  /* END void stop_me() */




/*
 * PBSAdjustLogLevel
 */

static void PBSAdjustLogLevel(

  int sig)  /* I */

  {
  if (sig == SIGUSR1)
    {
    /* increase log level */

    LOGLEVEL = MIN(LOGLEVEL + 1, 10);
    }
  else if (sig == SIGUSR2)
    {
    /* increase log level */

    LOGLEVEL = MAX(LOGLEVEL - 1, 0);
    }

  sprintf(log_buffer, "received signal %d: adjusting loglevel to %d",

          sig,
          LOGLEVEL);

  log_record(
    PBSEVENT_SYSTEM | PBSEVENT_FORCE,
    PBS_EVENTCLASS_SERVER,
    msg_daemonname,
    log_buffer);

  return;
  }  /* END PBSAdjustLogLevel() */





/*
 * mk_dirs - make the directory names used by MOM
 */

char *mk_dirs(

  char *base)  /* I */

  {
  char *pn;
  int   ltop = strlen(path_home);

  pn = malloc(ltop + strlen(base) + 2);

  if (pn == NULL)
    {
    /* cannot allocate memory */

    exit(2);
    }

  strcpy(pn, path_home);

  if (*(path_home + ltop - 1) != '/')
    strcat(pn, "/");

  strcat(pn, base);

  return(pn);
  }  /* END mk_dirs() */

/*
 * parse_command_line
 */

void parse_command_line(

  int   argc,    /* I */
  char *argv[])  /* I */

  {
  extern char *optarg;
  extern int   optind;
  int          errflg;
  int          c;
  char        *ptr;                   /* local tmp variable */

  errflg = 0;

  while ((c = getopt(argc, argv, "a:c:C:d:DhH:l:L:M:pPqrR:s:S:vx-:")) != -1)
    {
    switch (c)
      {

      case '-':

        if (optarg == NULL)
          break;

        if (!strcmp(optarg, "about"))
          {
          printf("package:     %s\n", PACKAGE_STRING);
          printf("sourcedir:   %s\n", PBS_SOURCE_DIR);
          printf("configure:   %s\n", PBS_CONFIG_ARGS);
          printf("buildcflags: %s\n", PBS_CFLAGS);
          printf("buildhost:   %s\n", PBS_BUILD_HOST);
          printf("builddate:   %s\n", PBS_BUILD_DATE);
          printf("builddir:    %s\n", PBS_BUILD_DIR);
          printf("builduser:   %s\n", PBS_BUILD_USER);
          printf("installdir:  %s\n", PBS_INSTALL_DIR);
          printf("serverhome:  %s\n", PBS_SERVER_HOME);
          printf("version:     %s\n", PACKAGE_VERSION);

          exit(0);
          }
        else if (!strcmp(optarg, "version"))
          {
          printf("version: %s\n",
                 PACKAGE_VERSION);

          exit(0);
          }
        else if (!strcmp(optarg, "help"))
          {
          usage(argv[0]);

          exit(0);
          }
        else
          {
          errflg = 1;
          }

        break;

      case 'a':

        alarm_time = (int)strtol(optarg, &ptr, 10);

        if ((alarm_time <= 0) || (*ptr != '\0'))
          {
          fprintf(stderr, "%s: bad alarm time\n",
                  optarg);

          errflg = 1;
          }

        break;

      case 'c': /* config file */

        config_file_specified = 1;

        strcpy(config_file, optarg); /* remember name */

        break;

      case 'h':

        usage(argv[0]);  /* exits */

        exit(0);

        break;

      case 'H': /* multihomed host */

        hostname_specified = 1;

        strncpy(mom_host, optarg, PBS_MAXHOSTNAME); /* remember name */

        break;

      case 'C':
        mom_checkpoint_set_directory_path(optarg);
        break;

      case 'd': /* directory */

        path_home = optarg;

        break;

      case 'D':  /* debug */

        DOBACKGROUND = 0;

        break;

      case 'l':

        path_log = strdup(optarg);

        break;

      case 'L':

        log_file = optarg;

        break;

      case 'M':

        pbs_mom_port = (unsigned int)atoi(optarg);

        if (pbs_mom_port == 0)
          {
          fprintf(stderr, "Bad MOM port value %s\n",
                  optarg);

          exit(1);
          }

        break;

      case 'p':

        if (!recover_set)
          {
          recover = JOB_RECOV_RUNNING;
          recover_set = TRUE;
          }
        else
          {
          errflg = 1;
          }

        break;

      case 'P':

        if ( !recover_set )
          {
          recover = JOB_RECOV_DELETE;
          recover_set = TRUE;
          }
        else
          {
          errflg = 1;
          }

        break;

      case 'r':

        if (!recover_set)
          {
          recover = JOB_RECOV_TERM_REQUE;
          recover_set = TRUE;
          }
        else
          {
          errflg = 1;
          }

        break;

      case 'q':

        if (!recover_set)
          {
          recover = JOB_RECOV_REQUE;
          recover_set = TRUE;
          }
        else
          {
          errflg = 1;
          }

        break;

      case 'R':

        pbs_rm_port = (unsigned int)atoi(optarg);

        if (pbs_rm_port == 0)
          {
          fprintf(stderr, "Bad RM port value %s\n",
                  optarg);

          exit(1);
          }

        break;

      case 's':

        log_init(optarg, NULL);

        break;

      case 'S':

        default_server_port = (unsigned int)atoi(optarg);

        if (default_server_port == 0)
          {
          fprintf(stderr, "Bad Server port value %s\n",
                  optarg);

          exit(1);
          }

        break;

      case 'v':

        fprintf(stderr, "version: %s\n",
                PACKAGE_VERSION);

        exit(0);

        break;

      case 'x':

        port_care = FALSE;

        break;

      case '?':

      default:

        errflg = 1;

        break;
      }  /* END switch(c) */
    }    /* END while ((c = getopt(argc,argv,"a:c:C:d:Dh:L:M:prR:S:vx-:")) != -1) */

  if ((errflg > 0) || (optind != argc))
    {
    usage(argv[0]);  /* exits */

    exit(1);
    }

  return;
  }  /* END parse_command_line() */






/**
 * setup_program_environment
 */

int setup_program_environment(void)
  {
  static char   id[] = "setup_program_environment";
  int           c;
  int           hostc = 1;
#if !defined(DEBUG) && !defined(DISABLE_DAEMONS)
  FILE         *dummyfile;
#endif
  int  tryport;
  int  rppfd;  /* fd for rm and im comm */
  int  privfd = 0; /* fd for sending job info */

  struct sigaction act;
  char         *ptr;            /* local tmp variable */

  /* must be started with real and effective uid of 0 */

  if (IamRoot() == 0)
    {
        return(1);
    }

  /* The following is code to reduce security risks                */
  /* start out with standard umask, system resource limit infinite */

  umask(022);

  if (getenv("PBSLOGLEVEL") != NULL)
    {
    LOGLEVEL = (int)strtol(getenv("PBSLOGLEVEL"), NULL, 0);
    }

  if (getenv("PBSDEBUG") != NULL)
    {
    DEBUGMODE = 1;

    DOBACKGROUND = 0;
    }

  /* modify program environment */

  if ((num_var_env = setup_env(PBS_ENVIRON)) == -1)
    {
    exit(1);
    }

  c = getgid();

  /* secure suppl. groups */
  if (setgroups(1,(gid_t *)&c) != 0)
    {
    snprintf(log_buffer, sizeof(log_buffer),
      "Unable to drop secondary groups. Some MAC framework is active?\n");
    log_err(errno, id, log_buffer);
    snprintf(log_buffer, sizeof(log_buffer),
      "setgroups(group = %lu) failed: %s\n",
      (unsigned long)c, strerror(errno));
    log_err(errno, id, log_buffer);

    return(1);
    }

#ifndef DEBUG
#ifdef _CRAY

  limit(C_JOB,      0, L_CPROC, 0);
  limit(C_JOB,      0, L_CPU,   0);
  limit(C_JOBPROCS, 0, L_CPU,   0);
  limit(C_PROC,     0, L_FD,  255);
  limit(C_JOB,      0, L_FSBLK, 0);
  limit(C_JOBPROCS, 0, L_FSBLK, 0);
  limit(C_JOB,      0, L_MEM  , 0);
  limit(C_JOBPROCS, 0, L_MEM  , 0);

#else /* _CRAY */

    {

    struct rlimit rlimit;

    rlimit.rlim_cur = RLIM_INFINITY;
    rlimit.rlim_max = RLIM_INFINITY;
    setrlimit(RLIMIT_CPU,   &rlimit);
    setrlimit(RLIMIT_FSIZE, &rlimit);
    setrlimit(RLIMIT_DATA,  &rlimit);
#ifdef RLIMIT_RSS
    setrlimit(RLIMIT_RSS,   &rlimit);
#endif /* RLIMIT_RSS */
#ifdef RLIMIT_VMEM
    setrlimit(RLIMIT_VMEM, &rlimit);
#endif /* RLIMIT_VMEM */
    }  /* END BLOCK */
#endif /* else _CRAY */
#endif /* DEBUG */

  /* set up and validate home paths */

  c = 0;

  mom_home         = mk_dirs("mom_priv");
  path_jobs        = mk_dirs("mom_priv/jobs/");
  path_epilog      = mk_dirs("mom_priv/epilogue");
  path_prolog      = mk_dirs("mom_priv/prologue");
  path_epiloguser  = mk_dirs("mom_priv/epilogue.user");
  path_prologuser  = mk_dirs("mom_priv/prologue.user");
  path_epilogp     = mk_dirs("mom_priv/epilogue.parallel");
  path_prologp     = mk_dirs("mom_priv/prologue.parallel");
  path_epiloguserp = mk_dirs("mom_priv/epilogue.user.parallel");
  path_prologuserp = mk_dirs("mom_priv/prologue.user.parallel");
  path_epilogpdel  = mk_dirs("mom_priv/epilogue.precancel");

#ifndef DEFAULT_MOMLOGDIR

  if (path_log == NULL)
    path_log       = mk_dirs("mom_logs");

#endif

  path_spool       = mk_dirs("spool/");

  path_undeliv     = mk_dirs("undelivered/");

#ifdef __CYGWIN__
/*  AUX is reserved word in Windows  */
  path_aux         = mk_dirs("auxx/");
#else
  path_aux         = mk_dirs("aux/");
#endif  /* __CYGWIN__ */

  path_server_name = mk_dirs("server_name");

  init_resc_defs();

  c |= mom_checkpoint_init();

  /* change working directory to mom_priv */

  if (chdir(mom_home) == -1)
    {
    char tmpLine[1024];

    sprintf(tmpLine, "cannot change directory to home '%s'",
            mom_home);

    perror(tmpLine);

    return(1);
    }

#if !defined(DEBUG) && !defined(NO_SECURITY_CHECK)

  c |= chk_file_sec(path_jobs,    1, 0, S_IWGRP | S_IWOTH, 1, NULL);

  c |= chk_file_sec(path_aux,     1, 0, S_IWGRP | S_IWOTH, 1, NULL);

  c |= chk_file_sec(path_spool,   1, 1, S_IWOTH,         0, NULL);

  c |= chk_file_sec(path_undeliv, 1, 1, S_IWOTH,         0, NULL);

  c |= chk_file_sec(PBS_ENVIRON,  0, 0, S_IWGRP | S_IWOTH, 0, NULL);

  if (c)
    {
    return(3);
    }

#endif  /* not DEBUG and not NO_SECURITY_CHECK */

  if (hostname_specified == 0)
    {
    hostc = gethostname(mom_host, PBS_MAXHOSTNAME);
    }

  log_init(NULL, mom_host);

  /* open log file while std in,out,err still open, forces to fd 4 */

  if ((c = log_open(log_file, path_log)) != 0)
    {
    /* use given name */

    fprintf(stderr, "pbs_mom: Unable to open logfile\n");

    return(1);
    }

  check_log(); /* see if this log should be rolled */

  lockfds = open("mom.lock", O_CREAT | O_WRONLY, 0644);

  if (lockfds < 0)
    {
    sprintf(log_buffer, "pbs_mom: unable to open lock file - errno=%d '%s'\n",
      errno,
      strerror(errno));

    fprintf(stderr, "%s",
      log_buffer);

    return(1);
    }

  mom_lock(lockfds, F_WRLCK); /* See if other MOMs are running */

  /* initialize the network interface */

  if (init_network(pbs_mom_port, process_request) != 0)
    {
    c = errno;

    sprintf(log_buffer, "server port = %u, errno = %d (%s)",
      pbs_mom_port,
      c,
      strerror(c));

    if (c == EADDRINUSE)
      strcat(log_buffer, ", already in use");

    log_err(-1, msg_daemonname, log_buffer);

    strcat(log_buffer, "\n");

    fprintf(stderr, "%s", log_buffer);

    return(3);
    }

  if (init_network(pbs_rm_port, tcp_request) != 0)
    {
    c = errno;

    sprintf(log_buffer, "resource (tcp) port = %u, errno = %d (%s)",
      pbs_rm_port,
      c,
      strerror(c));

    if (c == EADDRINUSE)
      strcat(log_buffer, ", already in use");

    log_err(-1, msg_daemonname, log_buffer);

    strcat(log_buffer, "\n");

    fprintf(stderr, "%s", log_buffer);

    return(3);
    }

  /* go into the background and become own session/process group */

#if !defined(DEBUG) && !defined(DISABLE_DAEMONS)

  mom_lock(lockfds, F_UNLCK); /* unlock so child can relock */

  if (DOBACKGROUND == 1)
    {
    if (fork() > 0)
      {
      exit(0); /* parent goes away */
      }

    if (setsid() == -1)
      {
      log_err(errno, msg_daemonname, "setsid failed");

      return(2);
      }

    fclose(stdin);

    fclose(stdout);
    fclose(stderr);

    dummyfile = fopen("/dev/null", "r");
    assert((dummyfile != 0) && (fileno(dummyfile) == 0));

    dummyfile = fopen("/dev/null", "w");
    assert((dummyfile != 0) && (fileno(dummyfile) == 1));

    dummyfile = fopen("/dev/null", "w");
    assert((dummyfile != 0) && (fileno(dummyfile) == 2));
    }  /* END if (DOBACKGROUND == 1) */

  mom_lock(lockfds, F_WRLCK); /* lock out other MOMs */

#else /* DEBUG */
#if defined(_CRAY)

  /* CRAY cannot restart checkpointed job if MOM has controlling tty */

  sprintf(log_buffer, "/tmp/pbs_mom.%d",
          getpid());

  printf("Debug output will be in %s\n",
         log_buffer);

  freopen(log_buffer, "w", stdout);

  freopen(log_buffer, "w", stderr);

  ioctl(0, TCCLRCTTY, 0);

  close(0);

#endif /* _CRAY */
  setvbuf(stdout, NULL, _IOLBF, 0);

  setvbuf(stderr, NULL, _IOLBF, 0);

#endif /* DEBUG */

  /* write MOM's pid into lockfile */

  if (ftruncate(lockfds, (off_t)0) != 0)
    {
    log_err(errno, msg_daemonname, "failed to truncate lockfile");

    return(2);
    }

  sprintf(log_buffer, "%ld\n",

          (long)getpid());

  if (write(lockfds, log_buffer, strlen(log_buffer) + 1) !=
      (ssize_t)(strlen(log_buffer) + 1))
    {
    log_err(errno, msg_daemonname, "failed to write to lockfile");

    return(2);
    }

#if (PLOCK_DAEMONS & 4)
  /* lock daemon into memory */

  /* NOTE:  should reduce maximum stack limit using ulimit() before calling plock */

  if (plock(PROCLOCK) == -1)
    {
    log_err(errno, msg_daemonname, "failed to lock mom into memory with plock");
    }
  else
    {
    MOMIsPLocked = 1;
    }

#endif /* PLOCK_DAEMONS */

  sigemptyset(&allsigs);

  act.sa_mask = allsigs;

  act.sa_flags = 0;

  /*
  ** Signals to be ignored.
  */

  act.sa_handler = SIG_IGN;

  sigaction(SIGPIPE, &act, NULL);

#ifdef SIGINFO
  sigaction(SIGINFO, &act, NULL);

#endif /* SIGINFO */

  sigaddset(&allsigs, SIGHUP); /* remember to block these */

  sigaddset(&allsigs, SIGINT); /* during critical sections */

  sigaddset(&allsigs, SIGTERM); /* so we don't get confused */

  sigaddset(&allsigs, SIGCHLD);

#ifdef _CRAY
  sigaddset(&allsigs, WJSIGNAL);

#endif
  act.sa_mask = allsigs;

  /*
  ** We want to abort system calls
  ** and call a function.
  */
#ifdef SA_INTERRUPT
  act.sa_flags |= SA_INTERRUPT; /* don't restart system calls */

#endif

#ifdef NOSIGCHLDMOM
  act.sa_handler = SIG_DFL;
#else
  act.sa_handler = catch_child;	/* set up to catch death of child */
#endif

  sigaction(SIGCHLD, &act, NULL);

#ifdef _CRAY
  sigaction(WJSIGNAL, &act, NULL);

#endif

  /*
   * Catch these signals to ensure we core dump even if
   * our rlimit for core dumps is set to 0 initially.
   *
   * Chris Samuel - VPAC
   * csamuel@vpac.org - 29th July 2003
   *
   * Now conditional on the PBSCOREDUMP environment variable.
   */

  if (getenv("PBSCOREDUMP"))
    {
    act.sa_handler = catch_abort;   /* make sure we core dump */

    sigaction(SIGSEGV, &act, NULL);
    sigaction(SIGBUS, &act, NULL);
    sigaction(SIGFPE, &act, NULL);
    sigaction(SIGILL, &act, NULL);
    sigaction(SIGTRAP, &act, NULL);
    sigaction(SIGSYS, &act, NULL);
    }

  act.sa_handler = catch_hup; /* do a restart on SIGHUP */

  sigaction(SIGHUP, &act, NULL);

  act.sa_handler = toolong; /* handle an alarm call */
  sigaction(SIGALRM, &act, NULL);

  act.sa_handler = stop_me; /* shutdown for these */
  sigaction(SIGINT, &act, NULL);
  sigaction(SIGTERM, &act, NULL);

  act.sa_handler = PBSAdjustLogLevel;
  sigaction(SIGUSR1, &act, NULL);
  sigaction(SIGUSR2, &act, NULL);

#ifdef SIGXCPU
  sigaction(SIGXCPU, &act, NULL);
#endif

#ifdef SIGXFSZ
  sigaction(SIGXFSZ, &act, NULL);
#endif

#ifdef SIGCPULIM
  sigaction(SIGCPULIM, &act, NULL);
#endif

#ifdef SIGSHUTDN
  sigaction(SIGSHUTDN, &act, NULL);
#endif

#ifdef _CRAY

  /* Special code for CRAY MLS Systems */

  if (sysconf(_SC_CRAY_SECURE_SYS))
    {

    struct usrv usrv;

    if (getusrv(&usrv) < 0)
      {
      fprintf(stderr, "cannot get security info\n");

      return(1);
      }

    usrv.sv_permit = 0;

    usrv.sv_intcat = 0;
    usrv.sv_valcat = 0;

    if (setusrv(&usrv) < 0)
      {
      fprintf(stderr, "cannot put security info\n");

      return(1);
      }

    if (setucat(0) < 0)
      {
      fprintf(stderr, "cannot put security cat\n");

      return(2);
      }
    }

#endif /* _CRAY */

  /* initialize variables */


  if ((hostname_specified != 0) || (hostc == 0))
    {
    strcpy(mom_short_name, mom_host);

    c = get_fullhostname(mom_host, mom_host, PBS_MAXHOSTNAME, NULL);

    if (c != 0)
      {
      char logbuf[1024];

      snprintf(logbuf, 1024, "Unable to get my full hostname for %s error %d",
               mom_host,
               c);

      log_err(-1, msg_daemonname, logbuf);

      return(-1);
      }
    }

  if (c == -1)
    {
    log_err(-1, msg_daemonname, "Unable to get my host name");

    return(-1);
    }

  time_now = time((time_t *)0);

  ret_size = 4096;

  if ((ret_string = malloc(ret_size)) == NULL)
    {
    perror("malloc");

    exit(1);
    }

  if ((rppfd = rpp_bind(pbs_rm_port)) == -1)
    {
    log_err(errno, id, "rpp_bind");

    exit(1);
    }

  rpp_fd = -1;  /* force rpp_bind() to get another socket */

  tryport = (port_care != FALSE) ? IPPORT_RESERVED : PMAX_PORT;

  while (--tryport > 0)
    {
    if ((privfd = rpp_bind(tryport)) != -1)
      break;

    if ((errno != EADDRINUSE) && (errno != EADDRNOTAVAIL))
      break;
    }

  if (privfd == -1)
    {
    log_err(errno, id, "no privileged ports");

    exit(1);
    }

  localaddr = addclient("localhost");

  addclient(mom_host);

  if (gethostname(ret_string, ret_size) == 0)
    addclient(ret_string);

  tmpdir_basename[0] = '\0';

  if (read_config(NULL))
    {
    fprintf(stderr, "%s: cannot load config file '%s'\n",
            program_name,
            config_file);

    exit(1);
    }

  initialize();  /* init RM code */

  add_conn(rppfd, Primary, (pbs_net_t)0, 0, PBS_SOCK_INET, rpp_request);
  add_conn(privfd, Primary, (pbs_net_t)0, 0, PBS_SOCK_INET, rpp_request);

  /* initialize machine-dependent polling routines */

  if ((c = mom_open_poll()) != PBSE_NONE)
    {
    log_err(c, msg_daemonname, "pre_poll failed");

    return(3);
    }

  if (mom_get_sample() != PBSE_NONE)
    {
    log_err(c, msg_daemonname, "mom_get_sample failed after mom_open_poll");

    return(3);
    }

  /* recover & abort jobs which were under MOM's control */

  log_record(
    PBSEVENT_DEBUG,
    PBS_EVENTCLASS_SERVER,
    msg_daemonname,
    "before init_abort_jobs");

  init_abort_jobs(recover);

#ifdef _POSIX_MEMLOCK
  /* call mlockall() only 1 time, since it seems to leak mem */

  if (MOMIsLocked == 0)
    {
    int mlockall_return;

    /* make sure pbs_mom stays in RAM and doesn't get paged out */

    mlockall_return = mlockall(MCL_CURRENT | MCL_FUTURE);

    /* exit iff mlock failed, but ignore function not implemented error */

    if ((mlockall_return == -1) && (errno != ENOSYS))
      {
      perror("pbs_mom:mom_main.c:mlockall()");

      exit(1);
      }

    MOMIsLocked = 1;
    }

#endif /* _POSIX_MEMLOCK */

  /* record the fact that we are up and running */

  log_record(
    PBSEVENT_SYSTEM | PBSEVENT_FORCE,
    PBS_EVENTCLASS_SERVER,
    msg_daemonname,
    "Is up");

  DBPRT(("MOM is up\n"));

  sprintf(
    log_buffer,
    "MOM executable path and mtime at launch: %s %ld",
    MOMExePath == NULL ? "NULL" : MOMExePath,
    (long int)MOMExeTime);

  log_record(
    PBSEVENT_SYSTEM,
    PBS_EVENTCLASS_SERVER,
    id,
    log_buffer);

  ptr = getenv("MOMSLEEPTIME");

  if (ptr != NULL)
    {
    long tmpL;

    tmpL = strtol(ptr, NULL, 10);

    srand(getpid());

    sleep(tmpL % (rand() + 1));
    }  /* END if (ptr != NULL) */

  return(0);
  }  /* END setup_program_environment() */






/*
 * TMOMJobGetStartInfo
 *
 * NOTE: if pjob is NULL, return empty slot, otherwise return slot containing job.
 */

int TMOMJobGetStartInfo(

  job         *pjob, /* I */
  pjobexec_t **TJEP) /* O */

  {
  int index;

  for (index = 0;index < TMAX_JE;index++)
    {
    if (TMOMStartInfo[index].pjob == pjob)
      {
      *TJEP = &TMOMStartInfo[index];

      return(SUCCESS);
      }
    }    /* END for (index) */

  return(FAILURE);
  }  /* END TMOMJobGetStartInfo() */



/*
 * TMOMScanForStarting
 */

int TMOMScanForStarting(void)

  {
  job *pjob;
  job *nextjob;

  int  Count;
  int  RC;
  int  SC;

#ifdef MSIC
  list_link *tmpL;
#endif

  const char *id = "TMOMScanForStarting";

#ifdef MSIC
  /* NOTE:  solaris system is choking on GET_NEXT - isolate */

  tmpL = GET_NEXT(svr_alljobs);

  tmpL = svr_alljobs.ll_next->ll_struct;

  pjob = (job *)tmpL;
#endif /* MSIC */

  pjob = (job *)GET_NEXT(svr_alljobs);

  while (pjob != NULL)
    {
    nextjob = (job *)GET_NEXT(pjob->ji_alljobs);

    if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_STARTING)
      {
      pjobexec_t *TJE;

      if (LOGLEVEL >= 2)
        {
        snprintf(log_buffer, 1024, "checking job start in %s - examining pipe from child",
                 id);

        log_record(
          PBSEVENT_JOB | PBSEVENT_FORCE,
          PBS_EVENTCLASS_JOB,
          pjob->ji_qs.ji_jobid,
          log_buffer);
        }

      if (TMOMJobGetStartInfo(pjob, &TJE) == FAILURE)
        {
        sprintf(log_buffer, "job %s start data lost, server will retry",
                pjob->ji_qs.ji_jobid);

        log_record(
          PBSEVENT_ERROR,
          PBS_EVENTCLASS_JOB,
          pjob->ji_qs.ji_jobid,
          log_buffer);

        exec_bail(pjob, JOB_EXEC_RETRY);

        pjob = nextjob;

        continue;
        }

      /* check if job is ready */

      if (TMomCheckJobChild(TJE, 1, &Count, &RC) == FAILURE)
        {
        long STime;

        if (LOGLEVEL >= 3)
          {
          sprintf(log_buffer, "job %s child not started, will check later",
                  pjob->ji_qs.ji_jobid);

          log_record(
            PBSEVENT_ERROR,
            PBS_EVENTCLASS_JOB,
            pjob->ji_qs.ji_jobid,
            log_buffer);
          }

        /* if job has been in prerun > TJobStartTimeout, purge job */

        STime = pjob->ji_wattr[(int)JOB_ATR_mtime].at_val.at_long;

        if ((STime > 0) && ((time_now - STime) > TJobStartTimeout))
          {
          sprintf(log_buffer, "job %s child not started after %ld seconds, server will retry",
                  pjob->ji_qs.ji_jobid,
                  TJobStartTimeout);

          log_record(
            PBSEVENT_ERROR,
            PBS_EVENTCLASS_JOB,
            pjob->ji_qs.ji_jobid,
            log_buffer);

          memset(TJE, 0, sizeof(pjobexec_t));

          exec_bail(pjob, JOB_EXEC_RETRY);
          }
        }
      else
        {
        /* NOTE:  TMomFinalizeJob3() populates SC */

        if (TMomFinalizeJob3(TJE, Count, RC, &SC) == FAILURE)
          {
          /* no need to log this, TMomFinalizeJob3() already did */

          memset(TJE, 0, sizeof(pjobexec_t));

          exec_bail(pjob, SC);
          }
        else
          {
          /* job successfully started */

          memset(TJE, 0, sizeof(pjobexec_t));

          if (LOGLEVEL >= 3)
            {
            sprintf(log_buffer, "job %s reported successful start",
                    pjob->ji_qs.ji_jobid);

            LOG_EVENT(
              PBSEVENT_JOB,
              PBS_EVENTCLASS_JOB,
              pjob->ji_qs.ji_jobid,
              log_buffer);
            }
          }
        }    /* END else (TMomCheckJobChild() == FAILURE) */
      }      /* END if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_STARTING) */

    pjob = nextjob;
    }        /* END while (pjob != NULL) */

  return(SUCCESS);
  }  /* END TMOMScanForStarting() */





/**
 * examine_all_polled_jobs
 *
 * check on over limit condition for polled jobs
 */

void
examine_all_polled_jobs(void)

  {
  static char id[] = "examine_all_polled_jobs";
  job         *pjob;
  int         c;


  for (pjob = (job *)GET_NEXT(mom_polljobs);pjob;
       pjob = (job *)GET_NEXT(pjob->ji_jobque))
    {
    if (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RUNNING)
      continue;

    /*
    ** Send message to get info from other MOM's
    ** if I am Mother Superior for the job and
    ** it is not being killed.
    */

    if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_HERE) &&
        (pjob->ji_nodekill == TM_ERROR_NODE))
      {
      /*
      ** If can't send poll to everybody, the
      ** time has come to die.
      */

      if (send_sisters(pjob, IM_POLL_JOB) != pjob->ji_numnodes - 1)
        {
        sprintf(log_buffer, "cannot contact all sisters");

        log_record(PBSEVENT_JOB | PBSEVENT_FORCE,
                   PBS_EVENTCLASS_JOB,
                   pjob->ji_qs.ji_jobid,
                   log_buffer);
        }
      }

    c = pjob->ji_qs.ji_svrflags;

    if (c & JOB_SVFLG_OVERLMT2)
      {
      kill_job(pjob, SIGKILL, id, "job is over-limit-2");

      continue;
      }

    if (c & JOB_SVFLG_OVERLMT1)
      {
      kill_job(pjob, SIGTERM, id, "job is over-limit-1");

      pjob->ji_qs.ji_svrflags |= JOB_SVFLG_OVERLMT2;

      continue;
      }

    log_buffer[0] = '\0';

    if (job_over_limit(pjob) != 0)
      {
      log_record(
        PBSEVENT_JOB | PBSEVENT_FORCE,
        PBS_EVENTCLASS_JOB,
        pjob->ji_qs.ji_jobid,
        log_buffer);

      if (c & JOB_SVFLG_HERE)
        {
        char *kill_msg;

        kill_msg = malloc(80 + strlen(log_buffer));

        if (kill_msg != NULL)
          {
          sprintf(kill_msg,"=>> PBS: job killed: %s\n",
            log_buffer);

          message_job(pjob,StdErr,kill_msg);

          free(kill_msg);
          }
        }

      kill_job(pjob, SIGTERM, id, "job is over-limit-0");

      pjob->ji_qs.ji_svrflags |= JOB_SVFLG_OVERLMT1;
      }
    }    /* END for (pjob) */

  return;
  }      /* END examine_all_polled_jobs() */





/*
 * examine_all_running_jobs
 */

void
examine_all_running_jobs(void)

  {
  job         *pjob;
#ifdef _CRAY
  int         c;
#endif
  task         *ptask;

  for (pjob = (job *)GET_NEXT(svr_alljobs);
       pjob != NULL;
       pjob = (job *)GET_NEXT(pjob->ji_alljobs))
    {
    if (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RUNNING)
      continue; /* This job is not running, skip it. */

    if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_HERE) == 0)
      continue; /* We are not the Mother Superior for this job, skip it. */

    /* update information for my tasks */

    mom_set_use(pjob); /* Machine dependent function to compute and set attributes like cput, vmem, etc. */

    /* Have all job processes vanished undetected?       */
    /* double check by sig0 to session pid for each task */
    /* But why not use the proc_array? */

    if (pjob->ji_flags & MOM_NO_PROC)
      {
      pjob->ji_flags &= ~MOM_NO_PROC;

      for (ptask = (task *)GET_NEXT(pjob->ji_tasks);
           ptask != NULL;
           ptask = (task *)GET_NEXT(ptask->ti_jobtask))
        {
#ifdef _CRAY

        if (pjob->ji_globid == NULL)
          break;

        c = atoi(pjob->ji_globid);

        if ((kill((pid_t)c, 0) == -1) && (errno == ESRCH))
#else /* not cray */
        if ((kill(ptask->ti_qs.ti_sid, 0) == -1) && (errno == ESRCH))
#endif /* not cray */
          {
          if (LOGLEVEL >= 3)
            {
            LOG_EVENT(
              PBSEVENT_JOB,
              PBS_EVENTCLASS_JOB,
              pjob->ji_qs.ji_jobid,
              "no active process found");
            }

          ptask->ti_qs.ti_exitstat = 0;

          ptask->ti_qs.ti_status = TI_STATE_EXITED;
          pjob->ji_qs.ji_un.ji_momt.ji_exitstat = 0;

          if (LOGLEVEL >= 6)
            {
            log_record(
              PBSEVENT_ERROR,
              PBS_EVENTCLASS_JOB,
              pjob->ji_qs.ji_jobid,
              "saving task (main loop)");
            }

          task_save(ptask);

          exiting_tasks = 1;
          }  /* END if ((kill == -1) && ...) */
        }    /* END while (ptask != NULL) */
      }      /* END if (pjob->ji_flags & MOM_NO_PROC) */


    mom_checkpoint_check_periodic_timer(pjob);
    }  /* END for (pjob) */

  return;
  }  /* END examine_all_running_jobs() */





/**
 * examine_all_jobs_to_resend
 *
 * tries to resend each of the jobs that hasn't been sent yet
 */
void examine_all_jobs_to_resend(void)

  {
  int jindex;

  for (jindex=0;jindex < MAX_RESEND_JOBS;jindex++)
    {
    /* no job ptrs are stored after a NULL value */
    if (JobsToResend[jindex] == NULL)
      break;

    /* skip dummy job */
    if (JobsToResend[jindex] == (job *)DUMMY_JOB_PTR)
      continue;

    if (!post_epilogue(JobsToResend[jindex], MOM_OBIT_RETRY))
      {

      if (LOGLEVEL >= 7)
        {
        log_record(
          PBSEVENT_JOB,
          PBS_EVENTCLASS_JOB,
          JobsToResend[jindex]->ji_qs.ji_jobid,
          "job obit resent");
        }

      /* sent successfully, make this slot the dummy pointer */

      JobsToResend[jindex] = (job *)DUMMY_JOB_PTR;
      }
    }
  }  /* END examine_all_jobs_to_resend() */
    




/*
 * kill_all_running_jobs
 */

void
kill_all_running_jobs(void)

  {
  job *pjob;

  for (pjob = (job *)GET_NEXT(svr_alljobs);
       pjob != NULL;
       pjob = (job *)GET_NEXT(pjob->ji_alljobs))
    {
    if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_RUNNING)
      {
      kill_job(pjob, SIGKILL, "kill_all_running_jobs", "mom is terminating with kill jobs flag");

      pjob->ji_qs.ji_substate = JOB_SUBSTATE_EXITING;

      job_save(pjob, SAVEJOB_QUICK);
      }
    else
      {
      term_job(pjob);
      }
    }  /* END for (pjob) */

#ifndef NOSIGCHLDMOM
  if (termin_child != 0)
#endif
    scan_for_terminated();

  if (exiting_tasks)
    scan_for_exiting();

  return;
  }  /* END kill_all_running_jobs() */



/**
 * mark_for_resend
 *
 * used to keep track of jobs whose obits weren't sent correctly
 * marks them so they can be resent
 *
 * @param pjob - the job that should be resent
 */
int mark_for_resend(

  job *pjob) /* I */

  {
  int jindex;
  int rc = FAILURE;

  if (pjob == NULL)
    return(rc);

  for (jindex = 0;jindex < MAX_RESEND_JOBS;jindex++)
    {
    if ((JobsToResend[jindex] == NULL) || 
        (JobsToResend[jindex] == (job *)DUMMY_JOB_PTR))
      {
      JobsToResend[jindex] = pjob;

      if (LOGLEVEL >= 7)
        {
        log_record(
          PBSEVENT_JOB,
          PBS_EVENTCLASS_JOB,
          pjob->ji_qs.ji_jobid,
          "marking job for resend");
        }

      rc = SUCCESS;

      break;
      }
    }

  return(rc);
  }




/*
 * This is for a mom starting with the -P option. Set all existing 
 * tasks to TI_STATE_EXITED so they can be cleanup up on the mom 
 * and at the server 
 */
void prepare_child_tasks_for_delete()
  {
  char *id = "prepare_child_tasks_for_delete";
  job *job;
  extern tlist_head svr_alljobs;


  for (job = GET_NEXT(svr_alljobs);job != NULL;job = GET_NEXT(job->ji_alljobs))
    {
    task *task;

    for (task = GET_NEXT(job->ji_tasks);task != NULL;task = GET_NEXT(task->ti_jobtask))
      {

      char buf[128];

      extern int exiting_tasks;

      sprintf(buf, "preparing exited session %d for task %d in job %s for deletion",
              (int)task->ti_qs.ti_sid,
              task->ti_qs.ti_task,
              job->ji_qs.ji_jobid);

      log_event(
        PBSEVENT_JOB,
        PBS_EVENTCLASS_JOB,
        id,
        buf);

      task->ti_qs.ti_exitstat = 0;  /* actually unknown */
      task->ti_qs.ti_status = TI_STATE_EXITED;

      task_save(task);

      exiting_tasks = 1;
      }
    }
  }




/**
 * main_loop
 *
 * @see main() - parent
 */

void main_loop(void)

  {
  static char   id[] = "main_loop";

  extern time_t wait_time;
  double        myla;
  job          *pjob;
  time_t        tmpTime;
#ifdef USESAVEDRESOURCES
  int           check_dead = TRUE;
#endif    /* USESAVEDRESOURCES */

  mom_run_state = MOM_RUN_STATE_RUNNING;  /* mom_run_state is altered by stop_me() or MOMCheckRestart() */

  while (mom_run_state == MOM_RUN_STATE_RUNNING)
    {
    rpp_io();

    if (call_hup)
      {
      process_hup();  /* Do a restart of resmom */
      }

    dep_main_loop_cycle();  /* Call machine dependent code periodically */

    time_now = time(NULL);

    /* check if loadave means we should be "busy" */

    if (max_load_val > 0.0)
      {
      get_la(&myla);  /* Machine dependent load average computation (for linux read contents of /proc/loadavg) */

      /* check if need to update busy state */

      check_busy(myla);
      }

    /* should we check the log file ?*/

    if (time_now >= (last_log_check + PBS_LOG_CHECK_RATE))
      {
      check_log();  /* Possibly do a log_roll */
      }

#if 1
    if (mom_server_all_check_connection() == 0)  /* Are we connected to any server? */
      {
      /* Don't do any other processing until we've re-established
       * contact with at least one server */

      sleep(1);  /* sleep to prevent too many messages sent to server under certain failure conditions */
      }
    else
#else
    mom_server_all_check_connection();

#endif
      {
      if ((time_now >= (LastServerUpdateTime + ServerStatUpdateInterval)) || 
          (ForceServerUpdate == TRUE))
        {
        ForceServerUpdate = FALSE;

        /* Update the server on the status of this mom. */

        if (PBSNodeCheckInterval > 0)
          check_state((LastServerUpdateTime == 0));

        mom_server_all_update_stat();

        LastServerUpdateTime = time_now;
        }

      /* if needed, update server with my state change */
      /* can be changed in check_busy(), query_adp(), and is_update_stat() */

      mom_server_all_send_state();

#ifdef USESAVEDRESOURCES

      /* if -p, must poll tasks inside jobs to look for completion */

      if ((check_dead) && (recover == JOB_RECOV_RUNNING))
        {
        scan_non_child_tasks();
        }

#endif    /* USESAVEDRESOURCES */

      if (time_now >= (last_poll_time + CheckPollTime))
        {
        last_poll_time = time_now;

        if (GET_NEXT(svr_alljobs))
          {
          /* There are jobs, update process status from the OS */

          if (mom_get_sample() == PBSE_NONE)
            {
            /* no errors in getting process status information */

            examine_all_running_jobs();

            examine_all_polled_jobs();

            examine_all_jobs_to_resend();
            }
          }
        }
      }  /* END BLOCK */

#ifdef USESAVEDRESOURCES
      check_dead = FALSE;
#endif    /* USESAVEDRESOURCES */

#ifndef NOSIGCHLDMOM
    if (termin_child != 0)  /* termin_child is a flag set by the catch_child signal handler */
#endif
      scan_for_terminated();  /* machine dependent (calls mom_get_sample()???) */

    /* if -p, must poll tasks inside jobs to look for completion */

    if (recover == JOB_RECOV_RUNNING)
      scan_non_child_tasks();

    if(recover == JOB_RECOV_DELETE)
      {
      prepare_child_tasks_for_delete();
      /* we can only do this once so set recover back to the default */
      recover = JOB_RECOV_RUNNING;
      }
      

    if (exiting_tasks)
      scan_for_exiting();

    TMOMScanForStarting();

    rpp_request(42);  /* cycle the rpp messaging system */

    /* unblock signals */

    if (sigprocmask(SIG_UNBLOCK, &allsigs, NULL) == -1)
      log_err(errno, id, "sigprocmask(UNBLOCK)");

    time_now = time((time_t *)0);

    tmpTime = MIN(wait_time, time_now - (LastServerUpdateTime + ServerStatUpdateInterval));

    tmpTime = MIN(tmpTime, time_now - (last_poll_time + CheckPollTime));

    tmpTime = MAX(1, tmpTime);

    if (LastServerUpdateTime == 0)
      tmpTime = 1;

    /* wait_request does a select and then calls the connection's cn_func for sockets with data */

    if (wait_request(tmpTime, NULL) != 0)
      {
      if (errno == EBADF)
        {
        init_network(pbs_mom_port, process_request);

        init_network(pbs_rm_port, tcp_request);
        }

      log_err(-1, msg_daemonname, "wait_request failed");
      }

    /* block signals while we do things */

    if (sigprocmask(SIG_BLOCK, &allsigs, NULL) == -1)
      log_err(errno, id, "sigprocmask(BLOCK)");


    if ((pjob = (job *)GET_NEXT(svr_alljobs)) == NULL)
      {
      MOMCheckRestart();  /* There are no jobs, see if the server needs to be restarted. */
      }
    }      /* END while (mom_run_state == MOM_RUN_STATE_RUNNING) */

  return;
  }        /* END main_loop() */





/*
 * restart_mom
 */

void restart_mom(

  int   argc,
  char *argv[])

  {
  static char id[] = "restart_mom";

  char *envstr;

  envstr = malloc(
             (strlen("PATH") + strlen(orig_path) + 2) * sizeof(char));

  if (!envstr)
  {
    sprintf(log_buffer, "malloc failed prior to execing myself: %s (%d)",
            strerror(errno),
            errno);

    log_err(errno, id, log_buffer);

    return;
  }

  strcpy(envstr, "PATH=");
  strcat(envstr, orig_path);
  putenv(envstr);

  DBPRT(("Re-execing myself now...\n"));

  execvp(MOMExePath, argv);

  sprintf(log_buffer, "execing myself failed: %s (%d)",
          strerror(errno),
          errno);

  log_err(errno, id, log_buffer);

  return;
  }  /* END restart_mom() */





/*
 * main - the main program of MOM
 *
 * @see main_loop() - child
 */

int main(

  int   argc,    /* I */
  char *argv[])  /* I */

  {
  int       rc;
  int       tmpFD;

  tmpFD = sysconf(_SC_OPEN_MAX);

  /* close any inherited extra files, leaving stdin, stdout, and stderr open */

  while (--tmpFD > 2)
    close(tmpFD);

  program_name = argv[0];

  initialize_globals();

  parse_command_line(argc, argv); /* Calls exit on command line error */

  if ((rc = setup_program_environment()) != 0)
    {
    return(rc);
    }

  main_loop();

  if (mom_run_state == MOM_RUN_STATE_KILLALL)
    {
    kill_all_running_jobs();
    }

  /* shutdown mom */

  mom_close_poll();

  rpp_shutdown();

  net_close(-1);  /* close all network connections */

  if (mom_run_state == MOM_RUN_STATE_RESTART)
    {
    sprintf(log_buffer, "Will be restarting: %s",
            MOMExePath);

    log_record(PBSEVENT_SYSTEM | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER,
               msg_daemonname,
               log_buffer);
    }

  log_record(PBSEVENT_SYSTEM | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER,

             msg_daemonname,
             "Is down");

  log_close(1);

  if (mom_run_state == MOM_RUN_STATE_RESTART)
    {
    restart_mom(argc, argv);
    }

  return(0);
  }  /* END main() */




/* END mom_main.c */


