#include #include "license_pbs.h" /* See here for the software license */ /* * node_func.c - various functions dealing with nodes, properties and * the following global variables: * pbsnlist - the server's global node list * svr_totnodes - total number of pbshost entries * svr_clnodes - number of cluster (space-shared) nodes * * Included functions are: * find_nodebyname() - find a node host with a given name */ #include /* the master config generated by configure */ #include "node_func.h" #include #include #include #include #include #include #include #include #if defined(NTOHL_NEEDS_ARPA_INET_H) && defined(HAVE_ARPA_INET_H) #include #endif #include "pbs_ifl.h" #include "libpbs.h" #include "list_link.h" #include "attribute.h" #include "credential.h" #include "batch_request.h" #include "server_limits.h" #include "server.h" #include "pbs_job.h" #include "pbs_nodes.h" #include "pbs_error.h" #include "log.h" #include "dis.h" #include "../lib/Liblog/pbs_log.h" #include "../lib/Liblog/log_event.h" #include "pbs_proto.h" #include "net_connect.h" #include "utils.h" #include "u_tree.h" #include "node_manager.h" /* is_compose */ #include "../lib/Libattr/attr_node_func.h" /* free_prop_list */ #include "req_manager.h" /* mgr_set_node_attr */ #include "../lib/Libutils/u_lock_ctl.h" /* lock_node, unlock_node */ #include "../lib/Libnet/lib_net.h" /* pbs_getaddrinfo */ #include "svrfunc.h" /* get_svr_attr_* */ #include "alps_constants.h" #include "login_nodes.h" #include "work_task.h" #include "net_cache.h" #include "ji_mutex.h" #include "execution_slot_tracker.hpp" #include "alps_functions.h" #if !defined(H_ERRNO_DECLARED) && !defined(_AIX) /*extern int h_errno;*/ #endif #define NULLSTR static_cast (0); /* Global Data */ extern hello_container failures; extern struct addrinfo hints; extern int svr_totnodes; extern int svr_clnodes; extern char *path_nodes_new; extern char *path_nodes; extern char *path_nodestate; extern char *path_nodenote; extern int LOGLEVEL; extern attribute_def node_attr_def[]; /* node attributes defs */ extern AvlTree ipaddrs; extern dynamic_string *hierarchy_holder; job *get_job_from_job_usage_info(job_usage_info *jui, struct pbsnode *pnode); /* Functions in this file * find_nodebyname() - given a node host name, search allnodes * find_subnodebyname() - given a subnode name * save_characteristic() - save the the characteristics of the node along with * the address of the node * chk_characteristic() - check for changes to the node's set of * characteristics and set appropriate flag bits in the "need_todo" * location depending on which characteristics changed * status_nodeattrib() - add status of each requested (or all) node-pbs_attribute * to the status reply * initialize_pbsnode() - performs node initialization on a new node * effective_node_delete() - effectively deletes a node from the server's node * list by setting the node's "deleted" bit * setup_notification() - sets mechanism for notifying other hosts about a new * host * process_host_name_part() - processes hostname part of a batch request into a * prop structure, host's IP addresses into an array, and node * node type (cluster/time-shared) into an int variable * update_nodes_file() - used to update the nodes file when certain changes * occur to the server's internal nodes list * recompute_ntype_cnts - Recomputes the current number of cluster nodes and * current number of time-shared nodes * create_pbs_node - create basic node structure for adding a node */ struct pbsnode *alps_reporter; /* use IP address to look up matchin node structure */ struct pbsnode *PGetNodeFromAddr( pbs_net_t addr) /* I */ { struct pbsnode *pnode; int iter = -1; int aindex; while ((pnode = next_host(&allnodes,&iter,NULL)) != NULL) { for (aindex = 0; aindex < 10; aindex++) { if (pnode->nd_addrs[aindex] == 0) break; if (pnode->nd_addrs[aindex] == addr) { return(pnode); } } /* END for (aindex) */ unlock_node(pnode, __func__, 0, LOGLEVEL); } /* END for each node */ return(NULL); } /* END PGetNodeFromAddr() */ void bad_node_warning( pbs_net_t addr, /* I */ struct pbsnode *node_possessed) /* I */ { time_t now; time_t last; char log_buf[LOCAL_LOG_BUF_SIZE+1]; struct pbsnode *pnode = NULL; if (node_possessed == NULL) pnode = PGetNodeFromAddr(addr); else pnode = node_possessed; if (pnode != NULL) { /* matching node located */ now = time(NULL); last = pnode->nd_warnbad; if (!last && (now - last >= 3600)) { /* once per hour, log a warning that we can't reach the node */ snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "ALERT: unable to contact node %s", pnode->nd_name); log_event(PBSEVENT_ADMIN, PBS_EVENTCLASS_SERVER, "WARNING", log_buf); pnode->nd_warnbad = now; } /* only release the mutex if we obtained it in this function */ if (node_possessed == NULL) unlock_node(pnode, __func__, "attained in function", LOGLEVEL); } } /* END bad_node_warning() */ /* * return 0 if addr is a MOM node and node is in bad state, * return 1 otherwise (it is not a MOM node, or it's state is OK) */ int addr_ok( pbs_net_t addr, /* I */ struct pbsnode *pnode) /* I */ { int status = 1; /* assume destination host is healthy */ int release_mutex = FALSE; time_t time_now = time(NULL); node_iterator iter; /* if a node wasn't passed in, then find the node */ if (pnode == NULL) { reinitialize_node_iterator(&iter); while ((pnode = next_node(&allnodes,pnode,&iter)) != NULL) { /* NOTE: should walk thru all nd_addrs for multi-homed hosts */ /* NOTE: deleted node may have already freed nd_addrs - check should be redundant */ if ((pnode->nd_addrs == NULL) || (pnode->nd_addrs[0] != addr)) { continue; } /* node matches addr */ break; } if (pnode == NULL) return(status); else release_mutex = TRUE; } if (pnode->nd_state & INUSE_UNKNOWN) { /* definitely not ok */ status = 0; } else if (pnode->nd_state & INUSE_DOWN) { /* the node is ok if it is still talking to us */ long chk_len = 300; get_svr_attr_l(SRV_ATR_check_rate, &chk_len); if (pnode->nd_lastupdate != 0) { if (pnode->nd_lastupdate <= (time_now - chk_len)) { status = 0; } } } if (release_mutex == TRUE) unlock_node(pnode, __func__, "release_mutex = TRUE", LOGLEVEL); return(status); } /* END addr_ok() */ struct pbsnode *find_node_in_allnodes( all_nodes *an, char *nodename) { struct pbsnode *pnode = NULL; int index; if (an == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "NULL input all_nodes pointer"); return(NULL); } if (nodename == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "NULL input nodename"); return(NULL); } pthread_mutex_lock(an->allnodes_mutex); index = get_value_hash(an->ht, nodename); if (index > 0) { pnode = (struct pbsnode *)an->ra->slots[index].item; if (pnode != NULL) lock_node(pnode, __func__, 0, LOGLEVEL); } pthread_mutex_unlock(an->allnodes_mutex); return(pnode); } /* END find_node_in_allnodes() */ /* * find_nodebyname() - find a node host by its name */ struct pbsnode *find_nodebyname( const char *nodename) /* I */ { char *pslash; char *dash = NULL; char *tmp; struct pbsnode *pnode = NULL; struct pbsnode *numa = NULL; int i; int numa_index; long cray_enabled = FALSE; if (nodename == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "allnodes is not initialized"); return(NULL); } if ((pslash = strchr((char *)nodename, (int)'/')) != NULL) *pslash = '\0'; pthread_mutex_lock(allnodes.allnodes_mutex); i = get_value_hash(allnodes.ht, (void *)nodename); if (i >= 0) { if (allnodes.ra == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "allnodes is not initialized"); return(NULL); } pnode = (struct pbsnode *)allnodes.ra->slots[i].item; } if (pnode != NULL) { lock_node(pnode, __func__, NULL, LOGLEVEL); } else { get_svr_attr_l(SRV_ATR_CrayEnabled, &cray_enabled); if (cray_enabled == TRUE) { if (alps_reporter != NULL) { lock_node(alps_reporter, __func__, NULL, LOGLEVEL); if ((i = get_value_hash(alps_reporter->alps_subnodes.ht, (void *)nodename)) >= 0) { if ((pnode = (struct pbsnode *)alps_reporter->alps_subnodes.ra->slots[i].item) != NULL) { lock_node(pnode, __func__, NULL, LOGLEVEL); } } unlock_node(alps_reporter, __func__, NULL, LOGLEVEL); } } else { /* check if it was a numa node */ tmp = (char *)nodename; while ((tmp = strchr(tmp, '-')) != NULL) { dash = tmp; tmp++; } if (dash != NULL) { *dash = '\0'; numa_index = atoi(dash + 1); if ((i = get_value_hash(allnodes.ht, (void *)nodename)) >= 0) { if ((pnode = (struct pbsnode *)allnodes.ra->slots[i].item) != NULL) { lock_node(pnode, __func__, NULL, LOGLEVEL); /* get the NUMA node */ numa = AVL_find(numa_index, pnode->nd_mom_port, pnode->node_boards); if (numa != NULL) lock_node(numa, __func__, NULL, LOGLEVEL); unlock_node(pnode, __func__, NULL, LOGLEVEL); pnode = numa; } } *dash = '-'; } } } pthread_mutex_unlock(allnodes.allnodes_mutex); if (pslash != NULL) *pslash = '/'; /* restore the slash */ return(pnode); } /* END find_nodebyname() */ /* * save_characteristic() - save the characteristic values of the node along * with the address of the node */ void save_characteristic( struct pbsnode *pnode, node_check_info *nci) { if (pnode == NULL) { log_err(PBSE_BAD_PARAMETER,__func__, "NULL input pbsnode pointer"); return; } if (nci == NULL) { log_err(PBSE_BAD_PARAMETER,__func__, "NULL input node_check_info pointer"); return; } nci->state = pnode->nd_state; nci->ntype = pnode->nd_ntype; nci->nprops = pnode->nd_nprops; nci->nstatus = pnode->nd_nstatus; nci->first = pnode->nd_first; nci->first_status = pnode->nd_f_st; if (pnode->nd_note != NULL) nci->note = strdup(pnode->nd_note); else nci->note = NULL; } /* END save_characteristic() */ /* * chk_characteristic() - check the value of the characteristics against * that which was saved earlier. * Returns: * -1 if parent address doesn't match saved parent address * 0 if successful check. *pneed_todo gets appropriate * bit(s) set depending on the results of the check. * The "returned" bits get used by the caller. */ int chk_characteristic( struct pbsnode *pnode, /* I */ node_check_info *nci, /* I */ int *pneed_todo) /* O */ { char tmpLine[1024]; char log_buf[LOCAL_LOG_BUF_SIZE+1]; if (pnode == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "NULL input pbsnode pointer"); return(PBSE_BAD_PARAMETER); } if (nci == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "NULL input node_check_info pointer"); return(PBSE_BAD_PARAMETER); } if (pneed_todo == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "NULL input mask pointer"); return(PBSE_BAD_PARAMETER); } tmpLine[0] = '\0'; if (pnode->nd_state != nci->state) { if ((pnode->nd_state & INUSE_OFFLINE) && !(nci->state & INUSE_OFFLINE)) { *pneed_todo |= WRITENODE_STATE; /*marked offline */ strcat(tmpLine, "offline set"); } else if (!(pnode->nd_state & INUSE_OFFLINE) && (nci->state & INUSE_OFFLINE)) { *pneed_todo |= WRITENODE_STATE; /*removed offline*/ strcat(tmpLine, "offline cleared"); } if (tmpLine[0] != '\0') { if (LOGLEVEL >= 3) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "node %s state modified (%s)\n", pnode->nd_name, tmpLine); log_event(PBSEVENT_ADMIN,PBS_EVENTCLASS_SERVER,"chk_characteristic",log_buf); } } } if (pnode->nd_ntype != nci->ntype) *pneed_todo |= WRITE_NEW_NODESFILE; if ((nci->nprops != pnode->nd_nprops) || (nci->first != pnode->nd_first)) *pneed_todo |= WRITE_NEW_NODESFILE; if (pnode->nd_note != nci->note) /* not both NULL or with the same address */ { if (pnode->nd_note == NULL || nci->note == NULL) *pneed_todo |= WRITENODE_NOTE; /*node's note changed*/ else if (strcmp(pnode->nd_note, nci->note)) *pneed_todo |= WRITENODE_NOTE; /*node's note changed*/ } if (nci->note != NULL) { free(nci->note); nci->note = NULL; } return(PBSE_NONE); } /* END chk_characteristic() */ int login_encode_jobs( struct pbsnode *pnode, tlist_head *phead) { job *pjob; char *login_id; dynamic_string *job_str = get_dynamic_string(-1, NULL); char str_buf[MAXLINE*2]; svrattrl *pal; if (pnode == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "NULL input pbsnode pointer"); return(PBSE_BAD_PARAMETER); } if (phead == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "NULL input tlist_head pointer"); return(PBSE_BAD_PARAMETER); } if (job_str == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "job_str was not allocated"); return(PBSE_BAD_PARAMETER); } for (unsigned int i = 0; i < pnode->nd_job_usages.size(); i++) { job_usage_info *jui = pnode->nd_job_usages[i]; int jui_index; int jui_iterator = -1; login_id = NULL; pjob = get_job_from_job_usage_info(jui, pnode); if (pjob != NULL) { login_id = pjob->ji_wattr[JOB_ATR_login_node_id].at_val.at_str; unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); } while ((jui_index = jui->est.get_next_occupied_index(jui_iterator)) != -1) { if ((login_id == NULL) || (strncmp(pnode->nd_name, login_id, strlen(pnode->nd_name)))) { if (job_str->used != 0) snprintf(str_buf, sizeof(str_buf), ",%d/%s", jui_index, jui->jobid); else snprintf(str_buf, sizeof(str_buf), "%d/%s", jui_index, jui->jobid); append_dynamic_string(job_str, str_buf); } } } if ((job_str->str) == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "job_str value was not initialized"); return(PBSE_BAD_PARAMETER); } if ((pal = attrlist_create((char *)ATTR_NODE_jobs, (char *)NULL, strlen(job_str->str) + 1)) == NULL) { log_err(ENOMEM, __func__, ""); return(ENOMEM); } strcpy((char *)pal->al_value, job_str->str); pal->al_flags = ATR_VFLAG_SET; free_dynamic_string(job_str); append_link(phead, &pal->al_link, pal); return(PBSE_NONE); } /* END login_encode_jobs() */ /* status_nodeattrib() - add status of each requested (or all) node-pbs_attribute to * the status reply * * Returns: 0 is success * != 0 is error, if a node-pbs_attribute is incorrectly specified, *bad is * set to the node-pbs_attribute's ordinal position */ int status_nodeattrib( svrattrl *pal, /*an svrattrl from the request */ attribute_def *padef, /*the defined node attributes */ struct pbsnode *pnode, /*no longer an pbs_attribute ptr */ int limit, /*number of array elts in padef */ int priv, /*requester's privilege */ tlist_head *phead, /*heads list of svrattrl structs that hang */ /*off the brp_attr member of the status sub*/ /*structure in the request's "reply area" */ int *bad) /*if node-pbs_attribute error, record it's*/ /*list position here */ { int i; int rc = 0; /*return code, 0 == success*/ int index; int nth; /*tracks list position (ordinal tacker) */ pbs_attribute atemp[ND_ATR_LAST]; /*temporary array of attributes */ if (padef == NULL) { rc = PBSE_BAD_PARAMETER; log_err(rc, __func__, "input defined node attributes pointer is NULL"); return(rc); } if (pnode == NULL) { rc = PBSE_BAD_PARAMETER; log_err(rc, __func__, "input pbsnode pointer is NULL"); return(rc); } if (bad == NULL) { rc = PBSE_BAD_PARAMETER; log_err(rc, __func__, "input result mask pointer is NULL"); return(rc); } memset(&atemp, 0, sizeof(atemp)); priv &= ATR_DFLAG_RDACC; /* user-client privilege */ for (i = 0;i < ND_ATR_LAST;i++) { /*set up attributes using data from node*/ if (i == ND_ATR_state) atemp[i].at_val.at_short = pnode->nd_state; else if (i == ND_ATR_properties) atemp[i].at_val.at_arst = pnode->nd_prop; else if (i == ND_ATR_status) atemp[i].at_val.at_arst = pnode->nd_status; else if (i == ND_ATR_ntype) atemp[i].at_val.at_short = pnode->nd_ntype; else if (i == ND_ATR_jobs) atemp[i].at_val.at_jinfo = pnode; else if (i == ND_ATR_np) atemp[i].at_val.at_long = pnode->nd_slots.get_total_execution_slots(); else if (i == ND_ATR_note) atemp[i].at_val.at_str = pnode->nd_note; else if (i == ND_ATR_mom_port) atemp[i].at_val.at_long = pnode->nd_mom_port; else if (i == ND_ATR_mom_rm_port) atemp[i].at_val.at_long = pnode->nd_mom_rm_port; /* skip NUMA attributes */ else if (i == ND_ATR_num_node_boards) continue; else if (i == ND_ATR_numa_str) continue; else if (i == ND_ATR_gpus_str) continue; else if (i == ND_ATR_gpustatus) atemp[i].at_val.at_arst = pnode->nd_gpustatus; else if (i == ND_ATR_gpus) { if (pnode->nd_ngpus == 0) continue; atemp[i].at_val.at_long = pnode->nd_ngpus; } else if ((padef + i)->at_name != NULL) { if (!strcmp((padef + i)->at_name, ATTR_NODE_mics)) { if (pnode->nd_nmics == 0) continue; atemp[i].at_val.at_long = pnode->nd_nmics; } else if (!strcmp((padef + i)->at_name, ATTR_NODE_micstatus)) atemp[i].at_val.at_arst = pnode->nd_micstatus; } else { /*we don't ever expect this*/ *bad = 0; return(PBSE_UNKNODEATR); } atemp[i].at_flags = ATR_VFLAG_SET; /*artificially set the value's flags*/ } if (pal != NULL) { /*caller has requested status on specific node-attributes*/ nth = 0; while (pal != NULL) { ++nth; index = find_attr(padef, pal->al_name, limit); if (index < 0) { *bad = nth; /*name in this position can't be found*/ rc = PBSE_UNKNODEATR; break; } if ((padef + index)->at_flags & priv) { if ((index == ND_ATR_jobs) && (pnode->nd_is_alps_login == TRUE)) rc = login_encode_jobs(pnode, phead); else { if (index == ND_ATR_status) atemp[index].at_val.at_arst = pnode->nd_status; rc = ((padef + index)->at_encode( &atemp[index], phead, (padef + index)->at_name, NULL, ATR_ENCODE_CLIENT, 0)); } if (rc < 0) { rc = -rc; break; } else { /* encoding was successful */ rc = 0; } } pal = (svrattrl *)GET_NEXT(pal->al_link); } /* END while (pal != NULL) */ } /* END if (pal != NULL) */ else { /* non-specific request, return all readable attributes */ for (index = 0; index < limit; index++) { if ((index == ND_ATR_jobs) && (pnode->nd_is_alps_login == TRUE)) rc = login_encode_jobs(pnode, phead); else if (((padef + index)->at_flags & priv) && !((padef + index)->at_flags & ATR_DFLAG_NOSTAT)) { if (index == ND_ATR_status) atemp[index].at_val.at_arst = pnode->nd_status; rc = (padef + index)->at_encode( &atemp[index], phead, (padef + index)->at_name, NULL, ATR_ENCODE_CLIENT, 0); if (rc < 0) { rc = -rc; break; } else { /* encoding was successful */ rc = 0; } } } /* END for (index) */ } /* END else (pal != NULL) */ return(rc); } /* END status_nodeattrib() */ /* * initialize_pbsnode - carries out initialization on a new * pbs node. The assumption is that all the parameters are valid. */ int initialize_pbsnode( struct pbsnode *pnode, char *pname, /* node name */ u_long *pul, /* host byte order array */ /* ipaddrs for this node */ int ntype, /* time-shared or cluster */ bool isNUMANode) /* TRUE if this is a NUMA node */ { struct addrinfo *pAddrInfo; if (pnode == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "NULL pointer was passed for initialization"); return(PBSE_BAD_PARAMETER); } memset(pnode, 0, sizeof(struct pbsnode)); pnode->nd_name = pname; pnode->nd_mom_port = PBS_MOM_SERVICE_PORT; pnode->nd_mom_rm_port = PBS_MANAGER_SERVICE_PORT; pnode->nd_addrs = pul; /* list of host byte order */ pnode->nd_ntype = ntype; pnode->nd_needed = 0; pnode->nd_order = 0; pnode->nd_prop = NULL; pnode->nd_status = NULL; pnode->nd_note = NULL; pnode->nd_state = INUSE_DOWN; pnode->nd_first = init_prop(pnode->nd_name); pnode->nd_last = pnode->nd_first; pnode->nd_f_st = init_prop(pnode->nd_name); pnode->nd_l_st = pnode->nd_f_st; pnode->nd_hierarchy_level = -1; /* maximum unsigned short */ pnode->nd_nprops = 0; pnode->nd_nstatus = 0; pnode->nd_warnbad = 0; pnode->nd_ngpus = 0; pnode->nd_gpustatus = NULL; pnode->nd_ngpustatus = 0; pnode->nd_ms_jobs = initialize_resizable_array(20); if (!isNUMANode) //NUMA nodes don't have their own address and their name is not in DNS. { if (pbs_getaddrinfo(pname,NULL,&pAddrInfo)) { return (PBSE_SYSTEM); } memcpy(&pnode->nd_sock_addr,pAddrInfo->ai_addr,sizeof(struct sockaddr_in)); } pnode->nd_mutex = (pthread_mutex_t *)calloc(1, sizeof(pthread_mutex_t)); if (pnode->nd_mutex == NULL) { log_err(ENOMEM, __func__, "Could not allocate memory for the node's mutex"); return(ENOMEM); } pthread_mutex_init(pnode->nd_mutex,NULL); return(PBSE_NONE); } /* END initialize_pbsnode() */ void effective_node_delete( struct pbsnode **ppnode) { u_long *up; struct pbsnode* pnode = NULL; if (ppnode == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "NULL node pointer to pointer delete call"); return; } pnode = *ppnode; if (pnode == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "NULL node pointer delete call"); return; } remove_node(&allnodes,pnode); unlock_node(pnode, __func__, NULL, LOGLEVEL); free(pnode->nd_mutex); pnode->nd_last->next = NULL; /* just in case */ free_prop_list(pnode->nd_first); pnode->nd_first = NULL; if (pnode->nd_addrs != NULL) { for (up = pnode->nd_addrs;*up != 0;up++) { /* del node's IP addresses from tree */ ipaddrs = AVL_delete_node( *up, pnode->nd_mom_port, ipaddrs); } if (pnode->nd_addrs != NULL) { /* remove array of IP addresses */ free(pnode->nd_addrs); pnode->nd_addrs = NULL; } } free(pnode->nd_name); free(pnode); *ppnode = NULL; return; } /* END effective_node_delete() */ /** * NOTE: pul can return NULL even on SUCCESS of routine * */ static int process_host_name_part( char *objname, /* node to be's name */ u_long **pul, /* 0 terminated host addrs array */ char **pname, /* node name w/o any :ts */ int *ntype) /* node type; time-shared, not */ { char log_buf[LOCAL_LOG_BUF_SIZE]; struct addrinfo *addr_info; struct addrinfo *addr_iter; struct sockaddr_in *sai; struct in_addr addr; char *phostname; /* caller supplied hostname */ int ipcount = 0; int len; int totalipcount; char hname[MAXLINE]; char tmpHName[MAXLINE]; char *hptr; static int NodeSuffixIsSet = 0; static char *NodeSuffix; int hindex; int size = 0; int rc = PBSE_NONE; ulong *tmp = NULL; len = (objname==NULL)?0:strlen(objname); if (len == 0) { return(PBSE_UNKNODE); } if (pul == NULL) return(PBSE_BAD_PARAMETER); phostname = strdup(objname); if (phostname == NULL) { return(PBSE_SYSTEM); } *ntype = NTYPE_CLUSTER; *pul = NULL; if (pbs_getaddrinfo(phostname, &hints, &addr_info) != 0) { snprintf(log_buf, sizeof(log_buf), "host %s not found", objname); log_err(PBSE_UNKNODE, __func__, log_buf); free(phostname); phostname = NULL; return(PBSE_UNKNODE); } if (LOGLEVEL >= 6) { char tmpLine[MAXLINE]; snprintf(tmpLine, sizeof(tmpLine), "successfully loaded host structure for '%s'->'%s'", phostname, addr_info->ai_canonname); log_event(PBSEVENT_ADMIN, PBS_EVENTCLASS_SERVER, __func__, tmpLine); } sai = (struct sockaddr_in *)addr_info->ai_addr; addr = sai->sin_addr; if (addr_info->ai_canonname == NULL) { free(phostname); return(PBSE_SYSTEM); } addr_info = insert_addr_name_info(addr_info,phostname); if (addr_info == NULL) { return(PBSE_SYSTEM); } snprintf(hname, sizeof(hname), "%s", addr_info->ai_canonname); totalipcount = 0; if (NodeSuffixIsSet == 0) { char *node_suffix = NULL; get_svr_attr_str(SRV_ATR_NodeSuffix, &node_suffix); if (node_suffix != NULL) { NodeSuffix = strdup(node_suffix); } NodeSuffixIsSet = 1; } if (NodeSuffix != NULL) { char *ptr; /* NOTE: extract outside of loop because hname will be freed */ ptr = strchr(hname, '.'); if (ptr != NULL) { *ptr = '\0'; snprintf(tmpHName, sizeof(tmpHName), "%s%s.%s", hname, NodeSuffix, ptr + 1); *ptr = '.'; } else { snprintf(tmpHName, sizeof(tmpHName), "%s%s", hname, NodeSuffix); } } for (hindex = 0;hindex < 2;hindex++) { if (hindex == 0) { hptr = hname; } else if (NodeSuffix != NULL) { hptr = tmpHName; } else { continue; } if ((rc = pbs_getaddrinfo(hptr, NULL, &addr_iter)) != 0) { snprintf(log_buf, sizeof(log_buf), "bad cname %s, h_errno=%d errno=%d (%s)", hptr, h_errno, errno, pbs_strerror(errno)); log_err(PBSE_UNKNODE, __func__, log_buf); if (phostname != NULL) { free(phostname); phostname = NULL; } return(PBSE_UNKNODE); } /* count host ipaddrs */ for (addr_iter = addr_info; addr_iter != NULL; addr_iter = addr_iter->ai_next) ipcount++; if (*pul == NULL) { size = sizeof(u_long) * (ipcount + 1); tmp = (u_long *)calloc(1, size); /* zero-terminate list */ } else { size += sizeof(u_long) * ipcount; tmp = (u_long *)realloc(*pul, size); } if (tmp == NULL) { if (phostname != NULL) { free(phostname); phostname = NULL; } } *pul = tmp; for (addr_iter = addr_info; addr_iter != NULL; addr_iter = addr_iter->ai_next) { u_long ipaddr; addr = ((struct sockaddr_in *)addr_iter->ai_addr)->sin_addr; ipaddr = ntohl(addr.s_addr); (*pul)[totalipcount++] = ipaddr; } (*pul)[totalipcount] = 0; /* zero-term array ip addrs */ } /* END for (hindex) */ *pname = phostname; /* return node name */ return(PBSE_NONE); /* function successful */ } /* END process_host_name_part() */ /* * write_compute_node_properties() * * writes out any extra properties or features that have been added to compute * nodes to the nodes file. * They are written in the format cray_compute feature1[ feature2[...]] * * @pre-cond: nin must be an open file pointer * @post-cond: all compute nodes with extra features have been written to nin * */ void write_compute_node_properties( struct pbsnode &reporter, FILE *nin) { struct pbsnode *alps_node; int iter = -1; while ((alps_node = next_host(&(reporter.alps_subnodes), &iter, NULL)) != NULL) { /* only write nodes that have more than just cray_compute as their properties. * Checking for > 2 properties should be sufficient -- all computes have * cray_compute and all nodes have their name as a property */ if ((alps_node->nd_first != NULL) && (alps_node->nd_first->next != NULL) && (alps_node->nd_first->next->next != NULL)) { std::stringstream buf; buf << alps_node->nd_name; for (struct prop *pp = alps_node->nd_first; pp != NULL; pp = pp->next) { if (strcmp(pp->name, alps_node->nd_name)) buf << " " << pp->name; } fprintf(nin, "%s\n", buf.str().c_str()); } unlock_node(alps_node, __func__, "loop", LOGLEVEL); } } /* END write_compute_node_properties() */ /* * update_nodes_file - When called, this function will update * the nodes file. Specifically, it will * walk the server's array of pbsnodes * constructing for each entry a nodes file * line if that entry is not marked as deleted. * These are written to a temporary file. * Upon successful conclusion that file replaces * the nodes file. */ int update_nodes_file( struct pbsnode *held) { struct pbsnode *np; int j; int iter = -1; FILE *nin; long cray_enabled = FALSE; if (LOGLEVEL >= 2) { DBPRT(("%s: entered\n", __func__)) } if ((nin = fopen(path_nodes_new, "w")) == NULL) { log_event( PBSEVENT_ADMIN, PBS_EVENTCLASS_SERVER, "nodes", (char *)"Node description file update failed"); return(-1); } if ((svr_totnodes == 0)) { log_event( PBSEVENT_ADMIN, PBS_EVENTCLASS_SERVER, "nodes", (char *)"Server has empty nodes list"); fclose(nin); return(-1); } get_svr_attr_l(SRV_ATR_CrayEnabled, &cray_enabled); /* for each node ... */ /* NOTE: DO NOT change this loop to iterate over numa nodes. Since they * aren't real hosts they should NOT appear in the nodes file */ while ((np = next_host(&allnodes,&iter,held)) != NULL) { /* ... write its name, and if time-shared, append :ts */ fprintf(nin, "%s", np->nd_name); /* write name */ /* if number of subnodes is gt 1, write that; if only one, */ /* don't write to maintain compatability with old style file */ if (np->nd_slots.get_total_execution_slots() > 1) fprintf(nin, " %s=%d", ATTR_NODE_np, np->nd_slots.get_total_execution_slots()); /* if number of gpus is gt 0, write that; if none, */ /* don't write to maintain compatability with old style file */ if (np->nd_ngpus > 0) fprintf(nin, " %s=%d", ATTR_NODE_gpus, np->nd_ngpus); /* write out the numa attributes if needed */ if (np->num_node_boards > 0) { fprintf(nin, " %s=%d", ATTR_NODE_num_node_boards, np->num_node_boards); } if ((np->numa_str != NULL) && (np->numa_str[0] != '\0')) fprintf(nin, " %s=%s", ATTR_NODE_numa_str, np->numa_str); /* write out the ports if needed */ if (np->nd_mom_port != PBS_MOM_SERVICE_PORT) fprintf(nin, " %s=%d", ATTR_NODE_mom_port, np->nd_mom_port); if (np->nd_mom_rm_port != PBS_MANAGER_SERVICE_PORT) fprintf(nin, " %s=%d", ATTR_NODE_mom_rm_port, np->nd_mom_rm_port); if ((np->gpu_str != NULL) && (np->gpu_str[0] != '\0')) fprintf(nin, " %s=%s", ATTR_NODE_gpus_str, np->gpu_str); /* write out properties */ for (j = 0;j < np->nd_nprops - 1;++j) { /* Don't write out the cray_enabled features here */ if (strcmp(np->nd_prop->as_string[j], "cray_compute") && strcmp(np->nd_prop->as_string[j], alps_reporter_feature) && strcmp(np->nd_prop->as_string[j], alps_starter_feature)) fprintf(nin, " %s", np->nd_prop->as_string[j]); } if (np->nd_is_alps_reporter == TRUE) fprintf(nin, " %s", alps_reporter_feature); if (np->nd_is_alps_login == TRUE) fprintf(nin, " %s", alps_starter_feature); /* finish off line with new-line */ fprintf(nin, "\n"); if ((cray_enabled == TRUE) && (np == alps_reporter)) write_compute_node_properties(*np, nin); fflush(nin); if (ferror(nin)) { log_event( PBSEVENT_ADMIN, PBS_EVENTCLASS_SERVER, "nodes", (char *)"Node description file update failed"); fclose(nin); if (held != np) unlock_node(np, __func__, "error", LOGLEVEL); return(-1); } if (held != np) unlock_node(np, __func__, "loop", LOGLEVEL); } /* for each node */ if ((fclose(nin)) != 0) { log_event( PBSEVENT_ADMIN, PBS_EVENTCLASS_SERVER, "nodes", (char *)"fclose of nodes file failed"); return(-1); } if (rename(path_nodes_new, path_nodes) != 0) { log_event( PBSEVENT_ADMIN, PBS_EVENTCLASS_SERVER, "nodes", (char *)"replacing old nodes file failed"); return(-1); } return(PBSE_NONE); } /* END update_nodes_file() */ /* * recompute_ntype_cnts - Recomputes the current number of cluster * nodes and current number of time-shared nodes */ void recompute_ntype_cnts(void) { int svr_loc_clnodes = 0; struct pbsnode *pnode = NULL; node_iterator iter; reinitialize_node_iterator(&iter); if (svr_totnodes) { while ((pnode = next_node(&allnodes, pnode, &iter)) != NULL) { /* count normally */ svr_loc_clnodes += pnode->nd_slots.get_total_execution_slots(); } svr_clnodes = svr_loc_clnodes; } } /* END recompute_ntype_cnts() */ /* * init_prop - allocate and initialize a prop struct * * pname points to the property string */ struct prop *init_prop( char *pname) /* I */ { struct prop *pp; if ((pp = (struct prop *)calloc(1, sizeof(struct prop))) != NULL) { pp->name = pname; pp->mark = 0; pp->next = 0; } return(pp); } /* END init_prop() */ /* * add_execution_slot - create a subnode entry and link to parent node * * NOTE: pname arg must be a copy of prop list as it is linked directly in */ int add_execution_slot( struct pbsnode *pnode) { if (pnode == NULL) return(PBSE_RMBADPARAM); pnode->nd_slots.add_execution_slot(); if ((pnode->nd_state & INUSE_JOB) != 0) pnode->nd_state &= ~INUSE_JOB; return(PBSE_NONE); } /* END add_execution_slot() */ int create_a_gpusubnode( struct pbsnode *pnode) { int rc = PBSE_NONE; struct gpusubn *tmp = NULL; if (pnode == NULL) { rc = PBSE_BAD_PARAMETER; log_err(rc, __func__, "NULL pbsnode pointer input"); return(rc); } tmp = (struct gpusubn *)calloc((1 + pnode->nd_ngpus), sizeof(struct gpusubn)); if (tmp == NULL) { rc = PBSE_MEM_MALLOC; log_err(rc,__func__, (char *)"Couldn't allocate memory for a subnode. EPIC FAILURE"); return(rc); } if (pnode->nd_ngpus > 0) { /* copy old memory to the new place */ memcpy(tmp,pnode->nd_gpusn,(sizeof(struct gpusubn) * pnode->nd_ngpus)); } /* now use the new memory */ free(pnode->nd_gpusn); pnode->nd_gpusn = tmp; /* initialize the node */ pnode->nd_gpus_real = FALSE; pnode->nd_gpusn[pnode->nd_ngpus].inuse = FALSE; pnode->nd_gpusn[pnode->nd_ngpus].mode = gpu_normal; pnode->nd_gpusn[pnode->nd_ngpus].state = gpu_unallocated; pnode->nd_gpusn[pnode->nd_ngpus].flag = okay; pnode->nd_gpusn[pnode->nd_ngpus].index = pnode->nd_ngpus; pnode->nd_gpusn[pnode->nd_ngpus].gpuid = NULL; /* increment the number of gpu subnodes and gpus free */ pnode->nd_ngpus++; pnode->nd_ngpus_free++; return(rc); } /* END create_a_gpusubnode() */ /* * copy the properties of node src to node dest * * @param dest - the node where the properties will be copied to * @param src - the node whose properties will be copied from */ int copy_properties( struct pbsnode *dest, /* I */ struct pbsnode *src) /* O */ { int need; int i; struct prop *pdest; struct prop **plink; struct array_strings *sub; struct array_strings *main_node; if (dest == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "NULL destanation pointer input"); return(PBSE_BAD_PARAMETER); } if (src == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "NULL source pointer input"); return(PBSE_BAD_PARAMETER); } /* copy features/properties */ if (src->nd_prop == NULL) return(PBSE_NONE); else if (dest->nd_first == NULL) return(PBSE_BAD_PARAMETER); main_node = src->nd_prop; /* allocate the properties for the numa node */ need = sizeof(struct array_strings) + ((main_node->as_npointers - 1) * sizeof(char *)); dest->nd_prop = (struct array_strings *)calloc(1, need); sub = dest->nd_prop; /* copy simple values */ sub->as_npointers = main_node->as_npointers; sub->as_usedptr = main_node->as_usedptr; sub->as_bufsize = main_node->as_bufsize; /* allocate the buffer */ sub->as_buf = (char *)calloc(1, sub->as_bufsize); memcpy(sub->as_buf,main_node->as_buf,sub->as_bufsize); /* set sub's offset to the same as main_nodes. Ugly and convoluted * but it works. Same process below when setting sub's as_string * values */ sub->as_next= sub->as_buf + (main_node->as_next - main_node->as_buf); plink = &dest->nd_first; for (i = 0; i < main_node->as_npointers; i++) { sub->as_string[i] = sub->as_buf + (main_node->as_string[i] - main_node->as_buf); pdest = init_prop(sub->as_string[i]); *plink = pdest; plink = &pdest->next; } /* now add in name as last prop */ pdest = init_prop(dest->nd_name); *plink = pdest; dest->nd_last = pdest; return(PBSE_NONE); } /* END copy_properties() */ /* * accepts a string of numbers separated by commas. it places the * number in val and advances the string to the next number past the comma */ static int read_val_and_advance( int *val, char **str) { char *comma; if ((*str == NULL) || (val == NULL)) return(PBSE_BAD_PARAMETER); *val = atoi(*str); comma = strchr(*str,','); if (comma != NULL) *str += comma - *str + 1; return(PBSE_NONE); } /* END read_val_and_advance() */ /* creates the private numa nodes on this node * * @param pnode - the node that will house the numa nodes * * @return 0 on success, -1 on failure */ static int setup_node_boards( struct pbsnode *pnode, u_long *pul) { int i; int j; struct pbsnode *pn; char pname[MAX_LINE]; char *np_ptr = NULL; char *gp_ptr = NULL; char *allocd_name; int np; int gpus; int rc = PBSE_NONE; char log_buf[LOCAL_LOG_BUF_SIZE]; if (pnode == NULL) { rc = PBSE_BAD_PARAMETER; log_err(rc, __func__, "NULL input pbsnode poiner"); return(rc); } pnode->parent = NULL; /* if this isn't a numa node, return no error */ if ((pnode->num_node_boards == 0) && (pnode->numa_str == NULL)) { return(PBSE_NONE); } /* determine the number of cores per node */ if (pnode->numa_str != NULL) { np_ptr = pnode->numa_str; read_val_and_advance(&np,&np_ptr); } else np = pnode->nd_slots.get_total_execution_slots() / pnode->num_node_boards; /* determine the number of gpus per node */ if (pnode->gpu_str != NULL) { gp_ptr = pnode->gpu_str; read_val_and_advance(&gpus,&gp_ptr); } else gpus = pnode->nd_ngpus / pnode->num_node_boards; for (i = 0; i < pnode->num_node_boards; i++) { pn = (struct pbsnode *)calloc(1, sizeof(struct pbsnode)); /* each numa node just has a number for a name */ snprintf(pname,sizeof(pname),"%s-%d", pnode->nd_name, i); allocd_name = strdup(pname); if (allocd_name == NULL) { /* no memory error */ log_err(PBSE_SYSTEM, __func__, "Cannot allocate memory for node name\n"); free(pn); return(PBSE_SYSTEM); } if ((rc = initialize_pbsnode(pn, allocd_name, pul, NTYPE_CLUSTER, TRUE)) != PBSE_NONE) { free(pn); return(rc); } /* make sure the server communicates on the correct ports */ pn->nd_mom_port = pnode->nd_mom_port; pn->nd_mom_rm_port = pnode->nd_mom_rm_port; memcpy(&pn->nd_sock_addr, &pnode->nd_sock_addr, sizeof(pn->nd_sock_addr)); /* update the np string pointer */ if (np_ptr != NULL) read_val_and_advance(&np,&np_ptr); /* create the subnodes for this node */ for (j = 0; j < np; j++) add_execution_slot(pn); /* create the gpu subnodes for this node */ for (j = 0; j < gpus; j++) { if (create_a_gpusubnode(pn) != PBSE_NONE) { /* ERROR */ free(pn); return(PBSE_SYSTEM); } } /* update the gpu string pointer */ if (gp_ptr != NULL) read_val_and_advance(&gpus,&gp_ptr); copy_properties(pn, pnode); /* add the node to the private tree */ pnode->node_boards = AVL_insert(i, pn->nd_mom_port, pn, pnode->node_boards); /* set my parent node pointer */ pn->parent = pnode; } /* END for each node_board */ if (LOGLEVEL >= 3) { snprintf(log_buf,sizeof(log_buf), "Successfully created %d numa nodes for node %s\n", pnode->num_node_boards, pnode->nd_name); log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_NODE, __func__, log_buf); } return(PBSE_NONE); } /* END setup_node_boards() */ /* recheck_for_node : * This function is called whenever an entry in the nodes file does * not resolve on server initialization. This function is called * periodically to see if the node is now resolvable and if so * add it to the list of available MOM nodes. */ static void recheck_for_node( struct work_task *ptask) { node_info *host_info; int rc; int bad; if ((host_info = (node_info *)ptask->wt_parm1) == NULL) { free(ptask->wt_mutex); free(ptask); return; } if ((rc = create_pbs_node( host_info->nodename, host_info->plist, host_info->perms, &bad))) { /* we created a new host_info in create_pbs_node. We need to free this one */ free_attrlist(&host_info->atrlist); if (host_info->nodename) { free(host_info->nodename); } free(host_info); } free(ptask->wt_mutex); free(ptask); return; } /* END recheck_for_node() */ /* * create_pbs_node - create pbs node structure, i.e. add a node */ int create_pbs_node( char *objname, svrattrl *plist, int perms, int *bad) { struct pbsnode *pnode = NULL; char log_buf[LOCAL_LOG_BUF_SIZE]; int ntype; /* node type; time-shared, not */ char *pname; /* node name w/o any :ts */ u_long *pul = NULL; /* 0 terminated host adrs array*/ int rc; node_info *host_info; int i; u_long addr; time_t time_now = time(NULL); if ((rc = process_host_name_part(objname, &pul, &pname, &ntype)) != 0) { svrattrl *pal, *pattrl; /* the host name in the nodes file did not resolve. We will set up a process to check periodically to see if the node will resolve later */ host_info = (node_info *)calloc(1, sizeof(node_info)); if (host_info == NULL) { log_err(-1, __func__, "create_pbs_node calloc failed"); if (pul != NULL) free(pul); return(PBSE_MEM_MALLOC); } CLEAR_HEAD(host_info->atrlist); /* allocate and copy the objname plist and perms */ host_info->perms = perms; pal = plist; while (pal != NULL) { pattrl = attrlist_create(pal->al_atopl.name, 0, strlen(pal->al_atopl.value) + 1); if (pattrl == NULL) { log_err(-1, __func__, "cannot create node attribute"); free(host_info); if (pul != NULL) free(pul); return(PBSE_MEM_MALLOC); } strcpy((char *)pattrl->al_value, pal->al_atopl.value); pattrl->al_flags = SET; append_link(&host_info->atrlist, &pattrl->al_link, pattrl); pal = (svrattrl *)GET_NEXT(pal->al_link); } pattrl = (svrattrl *)GET_NEXT(host_info->atrlist); host_info->plist = pattrl; if (objname != NULL) { host_info->nodename = (char *)calloc(1, strlen(objname)+1); if (host_info->nodename == NULL) { free(host_info); if (pul != NULL) free(pul); log_err(-1, __func__, "create_pbs_node calloc failed"); return(PBSE_MEM_MALLOC); } strcpy(host_info->nodename, objname); } set_task(WORK_Timed, time_now + 30, recheck_for_node, host_info, FALSE); if (pul != NULL) free(pul); return(rc); } if (pul == NULL) { free(pname); snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "no valid IP addresses found for '%s' - check name service", objname); log_err(-1, "process_host_name_part", log_buf); return(PBSE_SYSTEM); } if ((pnode = find_nodebyname(pname)) != NULL) { free(pname); free(pul); unlock_node(pnode, __func__, NULL, LOGLEVEL); return(PBSE_NODEEXIST); } if ((pnode = (struct pbsnode *)calloc(1, sizeof(struct pbsnode))) == NULL) { free(pul); free(pname); return(PBSE_SYSTEM); } if ((rc = initialize_pbsnode(pnode, pname, pul, ntype, FALSE)) != PBSE_NONE) { free(pul); free(pname); free(pnode); return(rc); } try { /* All nodes have at least one execution slot */ add_execution_slot(pnode); rc = mgr_set_node_attr( pnode, node_attr_def, ND_ATR_LAST, plist, perms, bad, (void *)pnode, ATR_ACTION_ALTER); if (rc != 0) { effective_node_delete(&pnode); return(rc); } } catch(...) { free(pul); free(pname); free(pnode); return(-1); } for (i = 0; pul[i]; i++) { if (LOGLEVEL >= 6) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "node '%s' allows trust for ipaddr %ld.%ld.%ld.%ld\n", pnode->nd_name, (pul[i] & 0xff000000) >> 24, (pul[i] & 0x00ff0000) >> 16, (pul[i] & 0x0000ff00) >> 8, (pul[i] & 0x000000ff)); log_record(PBSEVENT_SCHED,PBS_EVENTCLASS_REQUEST,__func__,log_buf); } addr = pul[i]; ipaddrs = AVL_insert(addr, pnode->nd_mom_port, pnode, ipaddrs); } /* END for (i) */ if ((rc = setup_node_boards(pnode,pul)) != PBSE_NONE) { return(rc); } insert_node(&allnodes,pnode); svr_totnodes++; recompute_ntype_cnts(); /* SUCCESS */ return(PBSE_NONE); } /* End create_pbs_node() */ /* * parse_node_token - parse tokens in the nodes file * * Token is returned, if null then there was none. * If there is an error, then "err" is set non-zero. * On following call, with argument "start" as null pointer, then * resume where left off. * * If "cok" is true, then this is first token (node name) and ':' is * allowed and '=' is not. For following tokens, allow '=' as separator * between "keyword" and "value". Will get value as next token. */ static char *parse_node_token( char *start, /* if null, restart where left off */ int cok, /* flag - non-zero if colon ":" allowed in token */ int comma, /* flag - non-zero if comma ',' allowed in token */ int *err, /* RETURN: non-zero if error */ char *term) /* RETURN: character terminating token */ { static char *pt; char *ts; *err = 0; if (start) pt = start; while (*pt && isspace((int)*pt)) /* skip leading whitespace */ pt++; if (*pt == '\0') { return (NULL); /* no token */ } ts = pt; /* test for legal characters in token */ for (;pt[0] != '\0';pt++) { if (isalnum((int)*pt) || strchr("-._[]", *pt) || (*pt == '\0')) continue; if (isspace((int)*pt)) break; if (cok && (*pt == ':')) continue; if (comma && (*pt == ',')) continue; if (!cok && (*pt == '=')) break; *err = 1; } /* END for() */ *term = *pt; if (*pt != '\0') { *pt++ = '\0'; } return(ts); } /* END parse_node_token() */ /* * add_to_property_list() * * adds token to the list of properties * @pre-cond: token must be a valid string pointer * @post-cond: token will be appended to propstr, which is the list of properties. */ void add_to_property_list( std::stringstream &propstr, const char *token) { if (token != NULL) { if (propstr.str().size() != 0) propstr << ","; propstr << token; } } /* * Read the file, "nodes", containing the list of properties for each node. * The list of nodes is formed and stored in allnodes. * Return -1 on error, 0 otherwise. * * Read the node state file, "node_state", for any "offline" * conditions which should be set in the nodes. */ int setup_nodes(void) { FILE *nin; char line[MAXLINE << 4]; char note[MAX_NOTE+1]; char *nodename; std::stringstream propstr; char *token; char *open_bracket; char *close_bracket; char *dash; char tmp_node_name[MAX_LINE]; char log_buf[LOCAL_LOG_BUF_SIZE]; int bad; int num; int linenum; int err; int start = -1; int end = -1; bool is_alps_reporter = false; bool is_alps_starter = false; bool is_alps_compute = false; long cray_enabled = FALSE; struct pbsnode *np; char *val; char xchar; svrattrl *pal; int perm = ATR_DFLAG_MGRD | ATR_DFLAG_MGWR; tlist_head atrlist; extern char server_name[]; extern resource_t next_resource_tag; snprintf(log_buf, sizeof(log_buf), "%s()", __func__); log_record(PBSEVENT_SCHED, PBS_EVENTCLASS_REQUEST, __func__, log_buf); CLEAR_HEAD(atrlist); if ((nin = fopen(path_nodes, "r")) == NULL) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "cannot open node description file '%s' in setup_nodes()\n", path_nodes); log_event(PBSEVENT_ADMIN,PBS_EVENTCLASS_SERVER,server_name,log_buf); return(0); } next_resource_tag = time(0); /* initialize next resource handle */ svr_totnodes = 0; get_svr_attr_l(SRV_ATR_CrayEnabled, &cray_enabled); /* clear out line so we don't have residual data if there is no LF */ memset(line, 0, sizeof(line)); for (linenum = 1; fgets(line, sizeof(line) - 1, nin); linenum++) { if (line[0] == '#') /* comment */ { memset(line, 0, sizeof(line)); continue; } is_alps_reporter = false; is_alps_starter = false; is_alps_compute = false; propstr.str(""); /* first token is the node name, may have ":ts" appended */ token = parse_node_token(line, 1, 0, &err, &xchar); if (token == NULL) { memset(line, 0, sizeof(line)); continue; /* blank line */ } if (err != 0) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "invalid character in token \"%s\" on line %d", token, linenum); goto errtoken2; } // cray allows numeric node names if (cray_enabled == FALSE) { if (!isalpha((int)*token)) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "token \"%s\" doesn't start with alpha on line %d", token, linenum); goto errtoken2; } } nodename = token; /* now process remaining tokens (if any), they may be either */ /* attributes (keyword=value) or old style properties */ while (1) { token = parse_node_token(NULL, 0, 0, &err, &xchar); if (err != 0) goto errtoken1; if (token == NULL) break; if (xchar == '=') { /* have new style pbs_attribute, keyword=value */ val = parse_node_token(NULL, 0, 1, &err, &xchar); if ((val == NULL) || (err != 0) || (xchar == '=')) goto errtoken1; pal = attrlist_create(token, 0, strlen(val) + 1); if (pal == NULL) { strcpy(log_buf, "cannot create node attribute"); goto errtoken2; } strcpy((char *)pal->al_value, val); pal->al_flags = SET; append_link(&atrlist, &pal->al_link, pal); } else { /* old style properity */ if (!strcmp(token, alps_starter_feature)) is_alps_starter = true; if (!strcmp(token, alps_reporter_feature)) { is_alps_reporter = true; add_to_property_list(propstr, "cray_compute"); } else { if (!strcmp(token, "cray_compute")) is_alps_compute = true; add_to_property_list(propstr, token); } } } /* END while(1) */ /* if any properties, create property attr and add to list */ if (propstr.str().size() != 0) { pal = (svrattrl *)attrlist_create((char *)ATTR_NODE_properties, 0, strlen(propstr.str().c_str()) + 1); if (pal == NULL) { strcpy(log_buf, "cannot create node attribute"); log_record(PBSEVENT_SCHED, PBS_EVENTCLASS_REQUEST, __func__, log_buf); /* FAILURE */ return(-1); } strcpy((char *)pal->al_value, propstr.str().c_str()); pal->al_flags = SET; append_link(&atrlist, &pal->al_link, pal); } /* now create node and subnodes */ pal = (svrattrl *)GET_NEXT(atrlist); err = PBSE_NONE; if ((open_bracket = strchr(nodename,'[')) != NULL) { int num_digits; start = atoi(open_bracket+1); dash = strchr(open_bracket,'-'); close_bracket = strchr(open_bracket,']'); if ((dash == NULL) || (close_bracket == NULL)) { snprintf(log_buf, sizeof(log_buf), "malformed nodename with range: %s, must be of form [x-y]\n", nodename); goto errtoken2; } end = atoi(dash+1); /* nullify the open bracket */ *open_bracket = '\0'; num_digits = dash - open_bracket - 1; /* move past the closing bracket */ close_bracket++; while (start <= end) { int num_len = 1; int tmp = 10; snprintf(tmp_node_name, sizeof(tmp_node_name), "%s", nodename); /* determine the length of the number */ while (start / tmp > 0) { tmp *= 10; num_len++; } /* print extra zeros if needed */ while (num_len < num_digits) { strcat(tmp_node_name,"0"); num_len++; } sprintf(tmp_node_name+strlen(tmp_node_name),"%d%s", start, close_bracket); err = create_pbs_node(tmp_node_name,pal,perm,&bad); if (err != 0) break; start++; } } else if (is_alps_compute == false) { err = create_pbs_node(nodename, pal, perm, &bad); } if (err == PBSE_NODEEXIST) { snprintf(log_buf, sizeof(log_buf), "duplicate node \"%s\"on line %d", nodename, linenum); goto errtoken2; } if (err != 0) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "could not create node \"%s\", error = %d", nodename, err); log_record(PBSEVENT_SCHED, PBS_EVENTCLASS_REQUEST, __func__, log_buf); free_attrlist(&atrlist); memset(line, 0, sizeof(line)); continue; } if (cray_enabled == TRUE) { if (is_alps_reporter == true) { np = find_nodebyname(nodename); np->nd_is_alps_reporter = TRUE; alps_reporter = np; initialize_all_nodes_array(&(np->alps_subnodes)); unlock_node(np, __func__, NULL, LOGLEVEL); } else if (is_alps_starter == true) { np = find_nodebyname(nodename); np->nd_is_alps_login = TRUE; add_to_login_holder(np); /* NYI: add to login node list */ unlock_node(np, __func__, NULL, LOGLEVEL); } else if (is_alps_compute == true) { np = create_alps_subnode(alps_reporter, nodename); // add features int bad; mgr_set_node_attr(np, node_attr_def, ND_ATR_LAST, pal, perm, &bad, (void *)np, ATR_ACTION_ALTER); unlock_node(np, __func__, NULL, LOGLEVEL); } } if (LOGLEVEL >= 3) { snprintf(log_buf, sizeof(log_buf), "node '%s' successfully loaded from nodes file", nodename); log_record(PBSEVENT_SCHED, PBS_EVENTCLASS_REQUEST, __func__, log_buf); } free_attrlist(&atrlist); memset(line, 0, sizeof(line)); } /* END for (linenum) */ if (cray_enabled == TRUE) { if (login_node_count() == 0) { snprintf(log_buf, sizeof(log_buf), "pbs_server is Cray enabled but no login nodes are configured. Jobs cannot run. Exiting"); log_err(-1, __func__, log_buf); fclose(nin); return(-1); } } fclose(nin); nin = fopen(path_nodestate, "r"); if (nin != NULL) { while (fscanf(nin, "%s %d", line, &num) == 2) { int iter = -1; while ((np = next_host(&allnodes,&iter,NULL)) != NULL) { if (strcmp(np->nd_name, line) == 0) { np->nd_state = num; /* exclusive bits are calculated later in set_old_nodes() */ np->nd_state &= ~INUSE_JOB; unlock_node(np, __func__, "match", LOGLEVEL); break; } unlock_node(np, __func__, "no match", LOGLEVEL); } } fclose(nin); } /* initialize note attributes */ nin = fopen(path_nodenote, "r"); if (nin != NULL) { while (fscanf(nin, "%s %" MAX_NOTE_STR "[^\n]", line, note) == 2) { if ((np = find_nodebyname(line)) != NULL) { np->nd_note = strdup(note); if (np->nd_note == NULL) { snprintf(log_buf, sizeof(log_buf), "couldn't allocate space for note (node = %s)", np->nd_name); log_record(PBSEVENT_SCHED, PBS_EVENTCLASS_REQUEST, __func__, log_buf); } unlock_node(np, __func__, "init - no note", LOGLEVEL); } } fclose(nin); } /* SUCCESS */ return(0); errtoken1: snprintf(log_buf, sizeof(log_buf), "token \"%s\" in error on line %d of file nodes", token, linenum); errtoken2: log_record(PBSEVENT_SCHED, PBS_EVENTCLASS_REQUEST, __func__, log_buf); free_attrlist(&atrlist); fclose(nin); /* FAILURE */ return(-1); } /* END setup_nodes() */ /* * delete_a_subnode - mark a (last) single subnode entry as deleted */ void delete_a_subnode( struct pbsnode *pnode) { pnode->nd_slots.remove_execution_slot(); return; } /* END delete_a_subnode() */ /* * deletes the last gpu subnode * frees the node and decrements the number to adjust */ static void delete_a_gpusubnode( struct pbsnode *pnode) { struct gpusubn *tmp = pnode->nd_gpusn + (pnode->nd_ngpus - 1); if (pnode->nd_ngpus < 1) { /* ERROR, can't free non-existent subnodes */ return; } if (tmp->inuse == FALSE) pnode->nd_ngpus_free--; /* decrement the number of gpu subnodes */ pnode->nd_ngpus--; /* DONE */ } /* END delete_a_gpusubnode() */ /* * node_np_action - action routine for node's np pbs_attribute */ int node_np_action( pbs_attribute *new_attr, /* derive props into this pbs_attribute*/ void *pobj, /* pointer to a pbsnode struct */ int actmode) /* action mode; "NEW" or "ALTER" */ { struct pbsnode *pnode = (struct pbsnode *)pobj; short old_np; short new_np; if (new_attr == NULL) { log_err(PBSE_BAD_PARAMETER,__func__, "NULL input attributes"); return(PBSE_BAD_PARAMETER); } if (pobj == NULL) { log_err(PBSE_BAD_PARAMETER,__func__, "NULL input node"); return(PBSE_BAD_PARAMETER); } switch (actmode) { case ATR_ACTION_NEW: new_attr->at_val.at_long = pnode->nd_slots.get_total_execution_slots(); break; case ATR_ACTION_ALTER: old_np = pnode->nd_slots.get_total_execution_slots(); new_np = (short)new_attr->at_val.at_long; if (new_np <= 0) return PBSE_BADATVAL; while (new_np != old_np) { if (new_np < old_np) { delete_a_subnode(pnode); old_np--; } else { add_execution_slot(pnode); old_np++; } } break; default: log_err(-1,__func__, "unexpected action mode"); return(-1); } return 0; } /* END node_np_action */ /* * node_mom_port_action - action routine for node's port pbs_attribute */ int node_mom_port_action( pbs_attribute *new_attr, /*derive props into this pbs_attribute*/ void *pobj, /*pointer to a pbsnode struct */ int actmode) /*action mode; "NEW" or "ALTER" */ { struct pbsnode *pnode = (struct pbsnode *)pobj; int rc = 0; if (new_attr == NULL) { rc = PBSE_BAD_PARAMETER; log_err(rc,__func__, "NULL input attributes"); return(rc); } if (pobj == NULL) { rc = PBSE_BAD_PARAMETER; log_err(rc,__func__, "NULL input node"); return(rc); } switch (actmode) { case ATR_ACTION_NEW: new_attr->at_val.at_long = pnode->nd_mom_port; break; case ATR_ACTION_ALTER: pnode->nd_mom_port = new_attr->at_val.at_long; break; default: rc = PBSE_INTERNAL; } return rc; } /* * node_mom_rm_port_action - action routine for node's port pbs_attribute */ int node_mom_rm_port_action( pbs_attribute *new_attr, /* derive props into this pbs_attribute*/ void *pobj, /* pointer to a pbsnode struct */ int actmode) /* action mode; "NEW" or "ALTER" */ { struct pbsnode *pnode = (struct pbsnode *)pobj; int rc = 0; if (new_attr == NULL) { rc = PBSE_BAD_PARAMETER; log_err(rc,__func__, "NULL input attributes"); return(rc); } if (pobj == NULL) { rc = PBSE_BAD_PARAMETER; log_err(rc,__func__, "NULL input node"); return(rc); } switch (actmode) { case ATR_ACTION_NEW: new_attr->at_val.at_long = pnode->nd_mom_rm_port; break; case ATR_ACTION_ALTER: pnode->nd_mom_rm_port = new_attr->at_val.at_long; break; default: rc = PBSE_INTERNAL; } return rc; } int node_gpus_action( pbs_attribute *new_attr, void *pnode, int actmode) { struct pbsnode *np = (struct pbsnode *)pnode; int old_gp; int new_gp; int rc = 0; if (new_attr == NULL) { rc = PBSE_BAD_PARAMETER; log_err(rc,__func__, "NULL input attributes"); return(rc); } if (pnode == NULL) { rc = PBSE_BAD_PARAMETER; log_err(rc,__func__, "NULL input node"); return(rc); } switch (actmode) { case ATR_ACTION_NEW: new_attr->at_val.at_long = np->nd_ngpus; break; case ATR_ACTION_ALTER: old_gp = np->nd_ngpus; new_gp = new_attr->at_val.at_long; if (new_gp <= 0) return PBSE_BADATVAL; while (new_gp != old_gp) { if (new_gp < old_gp) { delete_a_gpusubnode((struct pbsnode *)pnode); old_gp--; } else { create_a_gpusubnode((struct pbsnode *)pnode); old_gp++; } } break; default: rc = PBSE_INTERNAL; } return(rc); } /* END node_gpus_action() */ int node_mics_action( pbs_attribute *new_attr, void *pnode, int actmode) { struct pbsnode *np = (struct pbsnode *)pnode; int old_mics; int new_mics; int rc = 0; switch (actmode) { case ATR_ACTION_NEW: new_attr->at_val.at_long = np->nd_nmics; break; case ATR_ACTION_ALTER: old_mics = np->nd_nmics; new_mics = new_attr->at_val.at_long; if (new_mics <= 0) return(PBSE_BADATVAL); np->nd_nmics = new_mics; if (new_mics > old_mics) { np->nd_nmics_free += new_mics - old_mics; np->nd_nmics = new_mics; if (new_mics > np->nd_nmics_alloced) { struct jobinfo *tmp = (struct jobinfo *)calloc(new_mics, sizeof(struct jobinfo)); if (tmp == NULL) return(ENOMEM); memcpy(tmp, np->nd_micjobs, sizeof(struct jobinfo) * np->nd_nmics_alloced); free(np->nd_micjobs); np->nd_micjobs = tmp; np->nd_nmics_alloced = new_mics; } } break; default: rc = PBSE_INTERNAL; } return(rc); } /* END node_mics_action() */ int node_numa_action( pbs_attribute *new_attr, /* derive status into this pbs_attribute*/ void *pnode, /* pointer to a pbsnode struct */ int actmode) /* action mode; "NEW" or "ALTER" */ { struct pbsnode *np = (struct pbsnode *)pnode; int rc = 0; if (new_attr == NULL) { rc = PBSE_BAD_PARAMETER; log_err(rc,__func__, "NULL input attributes"); return(rc); } if (pnode == NULL) { rc = PBSE_BAD_PARAMETER; log_err(rc,__func__, "NULL input node"); return(rc); } switch (actmode) { case ATR_ACTION_NEW: new_attr->at_val.at_long = np->num_node_boards; break; case ATR_ACTION_ALTER: np->num_node_boards = new_attr->at_val.at_long; break; default: rc = PBSE_INTERNAL; } return(rc); } /* END node_numa_action */ int numa_str_action( pbs_attribute *new_attr, /* derive status into this pbs_attribute*/ void *pnode, /* pointer to a pbsnode struct */ int actmode) /* action mode; "NEW" or "ALTER" */ { struct pbsnode *np = (struct pbsnode *)pnode; int len = 0; if (new_attr == NULL) { log_err(PBSE_BAD_PARAMETER,__func__, "NULL input attributes"); return(PBSE_BAD_PARAMETER); } if (pnode == NULL) { log_err(PBSE_BAD_PARAMETER,__func__, "NULL input node"); return(PBSE_BAD_PARAMETER); } switch (actmode) { case ATR_ACTION_NEW: if (np->numa_str != NULL) { len = strlen(np->numa_str) + 1; new_attr->at_val.at_str = (char *)calloc(len, sizeof(char)); if (new_attr->at_val.at_str == NULL) return(PBSE_SYSTEM); strcpy(new_attr->at_val.at_str,np->numa_str); } else new_attr->at_val.at_str = NULL; break; case ATR_ACTION_ALTER: if (new_attr->at_val.at_str != NULL) { len = strlen(new_attr->at_val.at_str) + 1; np->numa_str = (char *)calloc(len, sizeof(char)); if (np->numa_str == NULL) return(PBSE_SYSTEM); strcpy(np->numa_str,new_attr->at_val.at_str); } else np->numa_str = NULL; break; default: return(PBSE_INTERNAL); } return(0); } /* END numa_str_action() */ int gpu_str_action( pbs_attribute *new_attr, void *pnode, int actmode) { struct pbsnode *np = (struct pbsnode *)pnode; int len; if (new_attr == NULL) { log_err(PBSE_BAD_PARAMETER,__func__, "NULL input attributes"); return(PBSE_BAD_PARAMETER); } if (pnode == NULL) { log_err(PBSE_BAD_PARAMETER,__func__, "NULL input node"); return(PBSE_BAD_PARAMETER); } switch (actmode) { case ATR_ACTION_NEW: if (np->gpu_str != NULL) { len = strlen(np->gpu_str) + 1; new_attr->at_val.at_str = (char *)calloc(len, sizeof(char)); if (new_attr->at_val.at_str == NULL) return(PBSE_SYSTEM); strcpy(new_attr->at_val.at_str,np->gpu_str); } else new_attr->at_val.at_str = NULL; break; case ATR_ACTION_ALTER: if (new_attr->at_val.at_str != NULL) { len = strlen(new_attr->at_val.at_str) + 1; np->gpu_str = (char *)calloc(len, sizeof(char)); if (np->gpu_str == NULL) return(PBSE_SYSTEM); strcpy(np->gpu_str,new_attr->at_val.at_str); } else np->gpu_str = NULL; break; default: return(PBSE_INTERNAL); } return(PBSE_NONE); } /* END gpu_str_action() */ /* create_partial_pbs_node - similar to create_pbs_node except there will only be a name for the new node and no attributes or properties */ int create_partial_pbs_node( char *nodename, unsigned long addr, int perms) { int ntype; /* node type; time-shared, not */ int rc; int bad = 0; svrattrl *plist = NULL; struct pbsnode *pnode = NULL; u_long *pul = NULL; char *pname = NULL; if (nodename == NULL) { log_err(PBSE_BAD_PARAMETER,__func__, "NULL input name"); return(PBSE_BAD_PARAMETER); } pnode = (struct pbsnode *)calloc(1, sizeof(struct pbsnode)); if (pnode == NULL) { return(PBSE_SYSTEM); } ntype = NTYPE_CLUSTER; pul = (u_long *)calloc(2, sizeof(u_long)); if (!pul) { free(pnode); return(PBSE_SYSTEM); } memset(pul, 0, sizeof(u_long) * 2); *pul = addr; pname = strdup(nodename); if ((rc = initialize_pbsnode(pnode, pname, pul, ntype, FALSE)) != PBSE_NONE) { free(pul); free(pname); free(pnode); return(rc); } /* create and initialize the first subnode to go with the parent node */ add_execution_slot(pnode); rc = mgr_set_node_attr( pnode, node_attr_def, ND_ATR_LAST, plist, perms, &bad, (void *)pnode, ATR_ACTION_ALTER); if (rc != 0) { lock_node(pnode, __func__, NULL, LOGLEVEL); effective_node_delete(&pnode); return(rc); } insert_node(&allnodes,pnode); AVL_insert(addr, pnode->nd_mom_port, pnode, ipaddrs); svr_totnodes++; recompute_ntype_cnts(); return(PBSE_NONE); /*create completely successful*/ } /* END create_partial_pbs_node */ /* * initializes an allocated node iterator */ void reinitialize_node_iterator( node_iterator *iter) { if (iter != NULL) { iter->node_index = -1; iter->numa_index = -1; iter->alps_index = -1; } } /* END reinitialize_node_iterator() */ static struct pbsnode *get_my_next_node_board( node_iterator *iter, struct pbsnode *pnode) { struct pbsnode *numa; iter->numa_index++; numa = AVL_find(iter->numa_index, pnode->nd_mom_port, pnode->node_boards); unlock_node(pnode, __func__, "pnode", LOGLEVEL); if (numa != NULL) lock_node(numa, __func__, "numa", LOGLEVEL); return(numa); } /* END get_my_next_node_board() */ static struct pbsnode *get_my_next_alps_node( node_iterator *iter, struct pbsnode *pnode) { struct pbsnode *alps_node = next_host(&(pnode->alps_subnodes), &(iter->alps_index), NULL); unlock_node(pnode, __func__, NULL, LOGLEVEL); return(alps_node); } /* END get_my_next_alps_node() */ /* * @return the next node, from 0->end, accounting for numa nodes */ struct pbsnode *next_node( all_nodes *an, struct pbsnode *current, node_iterator *iter) { struct pbsnode *next; struct pbsnode *tmp; if (an == NULL) { log_err(PBSE_BAD_PARAMETER,__func__, "NULL input all_nodes pointer"); return(NULL); } if (iter == NULL) { log_err(PBSE_BAD_PARAMETER,__func__, "NULL input iter pointer"); return(NULL); } if (current == NULL) { pthread_mutex_lock(an->allnodes_mutex); /* the first call to next_node */ next = (struct pbsnode *)next_thing(an->ra, &iter->node_index); if (next != NULL) lock_node(next, __func__, "next != NULL", LOGLEVEL); pthread_mutex_unlock(an->allnodes_mutex); if (next != NULL) { /* if I have node_boards, look at those and not me */ if (next->num_node_boards > 0) { next = get_my_next_node_board(iter,next); } else if (next->nd_is_alps_reporter) { next = get_my_next_alps_node(iter, next); } } } /* END first iteration */ else { long cray_enabled = FALSE; /* if current is a numa subnode, go back to the parent */ if ((iter->numa_index >= 0) || (iter->alps_index >= 0)) { tmp = current->parent; unlock_node(current, __func__, "current == NULL && numa_index > 0", LOGLEVEL); if (tmp == NULL) /* TODO: think about this check and apropriate return*/ { log_err(-1, __func__, "current->parent == NULL"); return(NULL); } lock_node(tmp, __func__, "tmp && numa_index > 0", LOGLEVEL); current = tmp; } get_svr_attr_l(SRV_ATR_CrayEnabled, &cray_enabled); /* move to the next host or get my next node board? */ if (cray_enabled == TRUE) { if (current->nd_is_alps_reporter == TRUE) { if ((next = get_my_next_alps_node(iter, current)) == NULL) { iter->alps_index = -1; pthread_mutex_lock(an->allnodes_mutex); next = (struct pbsnode *)next_thing(an->ra, &iter->node_index); pthread_mutex_unlock(an->allnodes_mutex); if (next != NULL) { lock_node(next, __func__, NULL, LOGLEVEL); if (next->nd_is_alps_reporter) next = get_my_next_alps_node(iter, next); } } } else { unlock_node(current, __func__, NULL, LOGLEVEL); iter->alps_index = -1; pthread_mutex_lock(an->allnodes_mutex); next = (struct pbsnode *)next_thing(an->ra, &iter->node_index); pthread_mutex_unlock(an->allnodes_mutex); if (next != NULL) { lock_node(next, __func__, NULL, LOGLEVEL); if (next->nd_is_alps_reporter) next = get_my_next_alps_node(iter, next); } } } else if (iter->numa_index + 1 >= current->num_node_boards) { /* reset the numa_index to -1 */ iter->numa_index = -1; /* go to the next node in all nodes */ unlock_node(current, __func__, "next == NULL && numa_index+1", LOGLEVEL); pthread_mutex_lock(an->allnodes_mutex); next = (struct pbsnode *)next_thing(an->ra, &iter->node_index); pthread_mutex_unlock(an->allnodes_mutex); if (next != NULL) { lock_node(next, __func__, "next != NULL && numa_index+1", LOGLEVEL); if (next->num_node_boards > 0) { next = get_my_next_node_board(iter, next); } } } else { next = get_my_next_node_board(iter, current); } } /* END all other iterations */ return(next); } /* END next_node() */ void initialize_all_nodes_array( all_nodes *an) { if (an == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "NULL input pointer"); return; } an->ra = initialize_resizable_array(INITIAL_NODE_SIZE); an->ht = create_hash(INITIAL_HASH_SIZE); an->allnodes_mutex = (pthread_mutex_t *)calloc(1, sizeof(pthread_mutex_t)); pthread_mutex_init(an->allnodes_mutex,NULL); } /* END initialize_all_nodes_array() */ /* * insert a node into the array * * @param pnode - the node to be inserted * @return PBSE_NONE on success */ int insert_node( all_nodes *an, /* M */ struct pbsnode *pnode) /* I */ { int rc = 0; if (an == NULL) { rc = PBSE_BAD_PARAMETER; log_err(rc, __func__, "NULL input all_nodes pointer"); return(rc); } if (pnode == NULL) { rc = PBSE_BAD_PARAMETER; log_err(rc, __func__, "NULL input node pointer"); return(rc); } pthread_mutex_lock(an->allnodes_mutex); if ((rc = insert_thing(an->ra,pnode)) == -1) { rc = ENOMEM; log_err(rc, __func__, "No memory to resize the array...SYSTEM FAILURE"); } else { add_hash(an->ht,rc,pnode->nd_name); rc = PBSE_NONE; } pthread_mutex_unlock(an->allnodes_mutex); return(rc); } /* END insert_node() */ /* * remove a node from the array * * @param pnode - the node to remove * @return PBSE_NONE if the node is removed */ int remove_node( all_nodes *an, struct pbsnode *pnode) { int rc = 0; if (an == NULL) { rc = PBSE_BAD_PARAMETER; log_err(rc, __func__, "NULL input all_nodes pointer"); return(rc); } if (pnode == NULL) { rc = PBSE_BAD_PARAMETER; log_err(rc, __func__, "NULL input node pointer"); return(rc); } if (pthread_mutex_trylock(an->allnodes_mutex)) { unlock_node(pnode, __func__, NULL, LOGLEVEL); pthread_mutex_lock(an->allnodes_mutex); lock_node(pnode, __func__, NULL, LOGLEVEL); } rc = remove_thing(an->ra,pnode); pthread_mutex_unlock(an->allnodes_mutex); return(rc); } /* END remove_node() */ struct pbsnode *next_host( all_nodes *an, /* I */ int *iter, /* M */ struct pbsnode *held) /* I */ { struct pbsnode *pnode; char *name = NULL; if (an == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "NULL input all_nodes pointer"); return(NULL); } if (iter == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "NULL input iter pointer"); return(NULL); } if (pthread_mutex_trylock(an->allnodes_mutex)) { if (held != NULL) { name = strdup(held->nd_name); unlock_node(held, __func__, NULL, LOGLEVEL); } pthread_mutex_lock(an->allnodes_mutex); } pnode = (struct pbsnode *)next_thing(an->ra,iter); if ((pnode != NULL) && ((pnode != held) && (name == NULL))) { lock_node(pnode, __func__, NULL, LOGLEVEL); } pthread_mutex_unlock(an->allnodes_mutex); if ((held != pnode) && (name != NULL)) held = find_nodebyname(name); if (name != NULL) free(name); return(pnode); } /* END next_host() */ void *send_hierarchy_threadtask( void *vp) { hello_info *hi = (hello_info *)vp; struct pbsnode *pnode = NULL; char log_buf[LOCAL_LOG_BUF_SIZE+1]; unsigned short port; if (hi == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "NULL input pointer"); return(NULL); } if (hi->name == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "NULL hello_info->name pointer"); return(NULL); } pnode = find_nodebyname(hi->name); if (pnode != NULL) { port = pnode->nd_mom_rm_port; unlock_node(pnode, __func__, NULL, LOGLEVEL); if (send_hierarchy(hi->name, port) != PBSE_NONE) { if (hi->num_retries < 3) /*TODO: why 3? remove magic number*/ { hi->num_retries++; hi->last_retry = time(NULL); add_hello_info(&failures, hi); /* don't let hi get free'd */ hi = NULL; } } else { if (LOGLEVEL >= 3) { snprintf(log_buf, sizeof(log_buf), "Successfully sent hierarchy to %s", hi->name); log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, __func__, log_buf); } } } if (hi != NULL) { free(hi->name); free(hi); } return(NULL); } /* END send_hierarchy_threadtask() */ int send_hierarchy( char *name, unsigned short port) { char log_buf[LOCAL_LOG_BUF_SIZE]; char *string; int ret = PBSE_NONE; int sock; struct addrinfo *pAddrInfo; struct sockaddr_in sa; struct tcp_chan *chan = NULL; if ((ret = pbs_getaddrinfo(name,NULL,&pAddrInfo)) != PBSE_NONE) { return ret; } memcpy(&sa,pAddrInfo->ai_addr,sizeof(sa)); sa.sin_port = htons(port); /* for now we'll only try once as this is going to be tried once each time in the loop */ sock = tcp_connect_sockaddr((struct sockaddr *)&sa, sizeof(sa)); if (sock < 0) { /* could not connect */ /* - quiting after 5 retries",*/ snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "Could not send mom hierarchy to host %s:%d", name, port); log_err(-1, __func__, log_buf); return(-1); } add_conn(sock, ToServerDIS, ntohl(sa.sin_addr.s_addr), sa.sin_port, PBS_SOCK_INET, NULL); if ((chan = DIS_tcp_setup(sock)) == NULL) { ret = PBSE_MEM_MALLOC; } /* write the protocol, version and command */ else if ((ret = is_compose(chan, IS_CLUSTER_ADDRS)) == DIS_SUCCESS) { for (string = hierarchy_holder->str; string != NULL && *string != '\0'; string += strlen(string) + 1) { if ((ret = diswst(chan, string)) != DIS_SUCCESS) { if (ret > 0) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "Could not send mom hierarchy to host %s - %s", name, dis_emsg[ret]); } else { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "Unknown error when sending mom hierarchy to host %s", name); } log_err(-1, __func__, log_buf); break; } } ret = diswst(chan, IS_EOL_MESSAGE); DIS_tcp_wflush(chan); } close_conn(sock, FALSE); if (chan != NULL) { DIS_tcp_cleanup(chan); } return(ret); } /* END send_hierarchy() */ struct hello_container* initialize_hello_container( hello_container *hc) { if (hc != NULL) { hc->ra = initialize_resizable_array(INITIAL_NODE_SIZE); hc->hello_mutex = (pthread_mutex_t *)calloc(1, sizeof(pthread_mutex_t)); pthread_mutex_init(hc->hello_mutex, NULL); } else { log_err(PBSE_BAD_PARAMETER, __func__, "NULL input container pointer was passed for initialization"); } return hc; } /* END initialize_hello_container() */ int needs_hello( hello_container *hc, char *node_name) { int needs; pthread_mutex_lock(hc->hello_mutex); needs = is_present(hc->ra, node_name); pthread_mutex_unlock(hc->hello_mutex); return(needs); } /* END needs_hello */ int add_hello( hello_container *hc, char *node_name) { int rc; hello_info *hi = (hello_info *)calloc(1, sizeof(hello_info)); hi->name = node_name; pthread_mutex_lock(hc->hello_mutex); if ((rc = insert_thing(hc->ra, hi)) == -1) { rc = ENOMEM; free(hi->name); free(hi); } pthread_mutex_unlock(hc->hello_mutex); return(rc); } /* END add_hello() */ int add_hello_after( hello_container *hc, char *node_name, int index) { hello_info *hi = NULL; int rc = -1; if (hc == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "NULL input container pointer"); return(PBSE_BAD_PARAMETER); } hi = (hello_info *)calloc(1, sizeof(hello_info)); hi->name = node_name; pthread_mutex_lock(hc->hello_mutex); if ((rc = insert_thing_after(hc->ra, hi, index)) == -1) { rc = ENOMEM; free(hi->name); free(hi); } pthread_mutex_unlock(hc->hello_mutex); return(rc); } /* END insert_thing_after() */ int add_hello_info( struct hello_container *hc, struct hello_info *hi) { int rc = -1; if (hc == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "NULL input container pointer"); return(PBSE_BAD_PARAMETER); } pthread_mutex_lock(hc->hello_mutex); if ((rc = insert_thing(hc->ra, hi)) == -1) rc = ENOMEM; pthread_mutex_unlock(hc->hello_mutex); return(rc); } /* END add_hello_info() */ hello_info *pop_hello( hello_container *hc) { hello_info *hi = NULL; int index; if (hc == NULL) { log_err(PBSE_BAD_PARAMETER, __func__, "NULL input container pointer"); return(NULL); } pthread_mutex_lock(hc->hello_mutex); index = hc->ra->slots[ALWAYS_EMPTY_INDEX].next; if (index != ALWAYS_EMPTY_INDEX) { hi = (hello_info *)hc->ra->slots[index].item; if (time(NULL) - hi->last_retry > HELLO_RESEND_WAIT_TIME) hi = (hello_info *)pop_thing(hc->ra); else hi = NULL; } pthread_mutex_unlock(hc->hello_mutex); return(hi); } /* END pop_hello() */ int remove_hello( hello_container *hc, char *node_name) { int rc = PBSE_NONE; int iter = -1; int prev_index = -1; hello_info *hi; if (hc == NULL) { rc = PBSE_BAD_PARAMETER; log_err(rc, __func__, "NULL input container pointer"); return(rc); } if (node_name == NULL) { rc = PBSE_BAD_PARAMETER; log_err(rc, __func__, "NULL input name pointer"); return(rc); } pthread_mutex_lock(hc->hello_mutex); while ((hi = (hello_info *)next_thing(hc->ra, &iter)) != NULL) { if (!strcmp(hi->name, node_name)) { if (prev_index == -1) prev_index = hc->ra->slots[ALWAYS_EMPTY_INDEX].next; rc = remove_thing_from_index(hc->ra, prev_index); } } pthread_mutex_unlock(hc->hello_mutex); return(rc); } /* END remove_hello() */