/*****************************************************************************\ ** info.c - job/node info related functions ***************************************************************************** * Copyright (C) 2011-2012 National University of Defense Technology. * Written by Hongjia Cao . * All rights reserved. * * This file is part of Slurm, a resource management program. * For details, see . * Please also read the included file: DISCLAIMER. * * Slurm is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. * * In addition, as a special exception, the copyright holders give permission * to link the code of portions of this program with the OpenSSL library under * certain conditions as described in each individual source file, and * distribute linked combinations including the two. You must obey the GNU * General Public License in all respects for all of the code used other than * OpenSSL. If you modify file(s) with this exception, you may extend this * exception to your version of the file(s), but you are not obligated to do * so. If you do not wish to do so, delete this exception statement from your * version. If you delete this exception statement from all source files in * the program, then also delete it here. * * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along * with Slurm; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. \*****************************************************************************/ #include "pmi.h" #include "setup.h" #include "client.h" #if !defined(__FreeBSD__) #include #endif #include #include #include #include #include #include #include #include #include "slurm/slurm.h" #include "src/srun/libsrun/launch.h" #include "src/common/strlcpy.h" #include "src/common/switch.h" #include "src/common/slurm_protocol_api.h" #include "src/common/xmalloc.h" #define NODE_ATTR_SIZE_INC 8 /* pending node attribute get request */ typedef struct nag_req { int fd; int rank; char key[PMI2_MAX_KEYLEN]; struct nag_req *next; } nag_req_t; static nag_req_t *nag_req_list = NULL; /* node attributes */ static int na_cnt = 0; static int na_size = 0; static char **node_attr = NULL; #define KEY_INDEX(i) (i * 2) #define VAL_INDEX(i) (i * 2 + 1) static char *ifconfig(void); extern int enqueue_nag_req(int fd, int rank, char *key) { nag_req_t *req; req = xmalloc(sizeof(nag_req_t)); req->fd = fd; req->rank = rank; strlcpy(req->key, key, PMI2_MAX_KEYLEN); /* insert in the head */ req->next = nag_req_list; nag_req_list = req; return SLURM_SUCCESS; } extern int node_attr_put(char *key, char *val) { nag_req_t *req = NULL, **pprev = NULL; client_resp_t *resp = NULL; int rc = SLURM_SUCCESS; debug3("mpi/pmi2: node_attr_put: %s=%s", key, val); if (na_cnt * 2 >= na_size) { na_size += NODE_ATTR_SIZE_INC; xrealloc(node_attr, na_size * sizeof(char*)); } node_attr[KEY_INDEX(na_cnt)] = xstrdup(key); node_attr[VAL_INDEX(na_cnt)] = xstrdup(val); na_cnt ++; /* process pending requests */ pprev = &nag_req_list; req = *pprev; while (req != NULL) { if (xstrncmp(key, req->key, PMI2_MAX_KEYLEN)) { pprev = &req->next; req = *pprev; } else { debug("mpi/pmi2: found pending request from rank %d", req->rank); /* send response msg */ if (! resp) { resp = client_resp_new(); client_resp_append(resp, CMD_KEY"=" GETNODEATTRRESP_CMD";" RC_KEY"=0;" FOUND_KEY"="TRUE_VAL";" VALUE_KEY"=%s;", val); } rc = client_resp_send(resp, req->fd); if (rc != SLURM_SUCCESS) { error("mpi/pmi2: failed to send '" GETNODEATTRRESP_CMD "' to task %d", req->rank); } /* remove the request */ *pprev = req->next; xfree(req); req = *pprev; } } if (resp) { client_resp_free (resp); } debug3("mpi/pmi2: out node_attr_put"); return SLURM_SUCCESS; } /* returned value not dup-ed */ extern char * node_attr_get(char *key) { int i; char *val = NULL; debug3("mpi/pmi2: node_attr_get: key=%s", key); for (i = 0; i < na_cnt; i ++) { if (! xstrcmp(key, node_attr[KEY_INDEX(i)])) { val = node_attr[VAL_INDEX(i)]; break; } } debug3("mpi/pmi2: out node_attr_get: val=%s", val); return val; } /* job_attr_get_netinfo() */ static char * job_attr_get_netinfo(char *key, char *attr) { char *netinfo; /* get network information of node in netinfo, xmalloc'ed */ netinfo = ifconfig(); snprintf(attr, PMI2_MAX_VALLEN, "%s", netinfo); xfree(netinfo); debug3("%s: netinfo %s", __func__, attr); return attr; } /* job_attr_get() */ extern char * job_attr_get(char *key) { static char attr[PMI2_MAX_VALLEN]; if (!xstrcmp(key, JOB_ATTR_PROC_MAP)) { return job_info.proc_mapping; } if (!xstrcmp(key, JOB_ATTR_UNIV_SIZE)) { snprintf(attr, PMI2_MAX_VALLEN, "%d", job_info.ntasks); return attr; } if (!xstrcmp(key, JOB_ATTR_RESV_PORTS)) { if (! job_info.resv_ports) return NULL; debug3("%s: SLURM_STEP_RESV_PORTS %s", __func__, job_info.resv_ports); snprintf(attr, PMI2_MAX_VALLEN, "%s", job_info.resv_ports); return attr; } if (xstrcmp(key, JOB_ATTR_NETINFO) >= 0) { if (job_attr_get_netinfo(key, attr) == NULL) { return NULL; } return attr; } return NULL; } /* ifconfig() * * Return information about network interfaces. */ static char * ifconfig(void) { struct ifaddrs *ifaddr; struct ifaddrs *ifa; int s; int n; char addr[NI_MAXHOST]; char hostname[MAXHOSTNAMELEN]; char *buf; if (getifaddrs(&ifaddr) == -1) { error("%s: getifaddrs failed %m", __func__); return NULL; } n = 0; for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) ++n; /* this should be a good guess of the size we need. */ buf = xmalloc((MAXHOSTNAMELEN + n) * 64); gethostname(hostname, sizeof(hostname)); n = sprintf(buf, "(%s", hostname); /* Walk through linked list, maintaining head pointer so we * can free list later */ for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) { if (ifa->ifa_addr == NULL) continue; #if !defined(__FreeBSD__) if (ifa->ifa_flags & IFF_LOOPBACK) continue; #endif if (ifa->ifa_addr->sa_family != AF_INET && ifa->ifa_addr->sa_family != AF_INET6) continue; if (ifa->ifa_addr->sa_family == AF_INET) { s = getnameinfo(ifa->ifa_addr, sizeof(struct sockaddr_in), addr, NI_MAXHOST, NULL, 0, NI_NUMERICHOST); if (s != 0) { error("%s: AF_INET getnameinfo() failed: %s", __func__, gai_strerror(s)); continue; } n = n + sprintf(buf + n, ",(%s,%s,%s)", ifa->ifa_name, "IP_V4", addr); continue; } if (ifa->ifa_addr->sa_family == AF_INET6) { s = getnameinfo(ifa->ifa_addr, sizeof(struct sockaddr_in6), addr, NI_MAXHOST, NULL, 0, NI_NUMERICHOST); if (s != 0) { error("%s: AF_INET6 getnameinfo() failed: %s", __func__, gai_strerror(s)); continue; } n = n + sprintf(buf + n, ",(%s,%s,%s)", ifa->ifa_name, "IP_V6", addr); } } sprintf(buf + n, ")"); debug("%s: ifconfig %s", __func__, buf); freeifaddrs(ifaddr); return buf; }