/* * OpenPBS (Portable Batch System) v2.3 Software License * * Copyright (c) 1999-2000 Veridian Information Solutions, Inc. * All rights reserved. * * --------------------------------------------------------------------------- * For a license to use or redistribute the OpenPBS software under conditions * other than those described below, or to purchase support for this software, * please contact Veridian Systems, PBS Products Department ("Licensor") at: * * www.OpenPBS.org +1 650 967-4675 sales@OpenPBS.org * 877 902-4PBS (US toll-free) * --------------------------------------------------------------------------- * * This license covers use of the OpenPBS v2.3 software (the "Software") at * your site or location, and, for certain users, redistribution of the * Software to other sites and locations. Use and redistribution of * OpenPBS v2.3 in source and binary forms, with or without modification, * are permitted provided that all of the following conditions are met. * After December 31, 2001, only conditions 3-6 must be met: * * 1. Commercial and/or non-commercial use of the Software is permitted * provided a current software registration is on file at www.OpenPBS.org. * If use of this software contributes to a publication, product, or * service, proper attribution must be given; see www.OpenPBS.org/credit.html * * 2. Redistribution in any form is only permitted for non-commercial, * non-profit purposes. There can be no charge for the Software or any * software incorporating the Software. Further, there can be no * expectation of revenue generated as a consequence of redistributing * the Software. * * 3. Any Redistribution of source code must retain the above copyright notice * and the acknowledgment contained in paragraph 6, this list of conditions * and the disclaimer contained in paragraph 7. * * 4. Any Redistribution in binary form must reproduce the above copyright * notice and the acknowledgment contained in paragraph 6, this list of * conditions and the disclaimer contained in paragraph 7 in the * documentation and/or other materials provided with the distribution. * * 5. Redistributions in any form must be accompanied by information on how to * obtain complete source code for the OpenPBS software and any * modifications and/or additions to the OpenPBS software. The source code * must either be included in the distribution or be available for no more * than the cost of distribution plus a nominal fee, and all modifications * and additions to the Software must be freely redistributable by any party * (including Licensor) without restriction. * * 6. All advertising materials mentioning features or use of the Software must * display the following acknowledgment: * * "This product includes software developed by NASA Ames Research Center, * Lawrence Livermore National Laboratory, and Veridian Information * Solutions, Inc. * Visit www.OpenPBS.org for OpenPBS software support, * products, and information." * * 7. DISCLAIMER OF WARRANTY * * THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND. ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT * ARE EXPRESSLY DISCLAIMED. * * IN NO EVENT SHALL VERIDIAN CORPORATION, ITS AFFILIATED COMPANIES, OR THE * U.S. GOVERNMENT OR ANY OF ITS AGENCIES BE LIABLE FOR ANY DIRECT OR INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * This license will be governed by the laws of the Commonwealth of Virginia, * without reference to its choice of law rules. */ #include /* the master config generated by configure */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(NTOHL_NEEDS_ARPA_INET_H) && defined(HAVE_ARPA_INET_H) #include #endif #include "pbs_ifl.h" #include "pbs_error.h" #include "log.h" #include "../lib/Liblog/pbs_log.h" #include "../lib/Liblog/log_event.h" #include "net_connect.h" #include "rpp.h" #include "dis.h" #include "dis_init.h" #include "list_link.h" #include "attribute.h" #include "pbs_nodes.h" #include "resmon.h" #include "server_limits.h" #include "pbs_job.h" #include "utils.h" #include "u_tree.h" #include "mom_hierarchy.h" #include "mom_server.h" #include "mom_comm.h" #include "mcom.h" #include "pbs_constants.h" /* Long */ #include "mom_server_lib.h" #include "../lib/Libifl/lib_ifl.h" /* pbs_disconnect_socket */ #include "alps_functions.h" #define MAX_GPUS 32 #ifdef NVML_API #include "nvml.h" #endif /* NVML_API */ extern int find_file(char *, char *); extern char mom_host[]; extern int MOMNvidiaDriverVersion; extern int use_nvidia_gpu; extern time_t time_now; int nvidia_gpu_modes[50]; /* * Function to initialize the Nvidia nvml api */ #ifdef NVML_API void log_nvml_error( nvmlReturn_t rc, char* gpuid, const char* id) { switch (rc) { case NVML_SUCCESS: if (LOGLEVEL >= 3) { log_err( PBSE_RMSYSTEM, id, (char *)"Successful"); } break; case NVML_ERROR_ALREADY_INITIALIZED: if (LOGLEVEL >= 3) { log_err( PBSE_RMSYSTEM, id, (char *)"Already initialized"); } break; case NVML_ERROR_NO_PERMISSION: if (LOGLEVEL >= 1) { log_err( PBSE_RMSYSTEM, id, (char *)"No permission"); } break; case NVML_ERROR_INVALID_ARGUMENT: if (LOGLEVEL >= 1) { log_err( PBSE_RMSYSTEM, id, (char *)"NVML invalid argument"); } break; case NVML_ERROR_NOT_FOUND: if (LOGLEVEL >= 1) { sprintf(log_buffer, "NVML device %s not found", (gpuid != NULL) ? gpuid : "NULL"); log_err( PBSE_RMSYSTEM, id, log_buffer); } break; case NVML_ERROR_NOT_SUPPORTED: if (LOGLEVEL >= 1) { sprintf(log_buffer, "NVML device %s not supported", (gpuid != NULL) ? gpuid : "NULL"); log_err( PBSE_RMSYSTEM, id, log_buffer); } break; case NVML_ERROR_UNKNOWN: if (LOGLEVEL >= 1) { log_err( PBSE_RMSYSTEM, id, (char *)"Unknown error"); } break; default: if (LOGLEVEL >= 1) { sprintf(log_buffer, "Unexpected error code %d", rc); log_err( PBSE_RMSYSTEM, id, log_buffer); } break; } } /* * Function to initialize the Nvidia nvml api */ int init_nvidia_nvml() { nvmlReturn_t rc; unsigned int device_count; rc = nvmlInit(); if (rc == NVML_SUCCESS) { rc = nvmlDeviceGetCount(&device_count); if (rc == NVML_SUCCESS) { if ((int)device_count > 0) return (TRUE); sprintf(log_buffer,"No Nvidia gpus detected\n"); log_ext(-1, __func__, log_buffer, LOG_DEBUG); /* since we detected no gpus, shut down nvml */ shut_nvidia_nvml(); return (FALSE); } } log_nvml_error (rc, NULL, __func__); return (FALSE); } /* * Function to shutdown the Nvidia nvml api */ int shut_nvidia_nvml() { nvmlReturn_t rc; if (!use_nvidia_gpu) return (TRUE); rc = nvmlShutdown(); if (rc == NVML_SUCCESS) return (TRUE); log_nvml_error (rc, NULL, __func__); return (FALSE); } /* * Function to get the NVML device handle */ nvmlDevice_t get_nvml_device_handle( char *gpuid) { nvmlReturn_t rc; nvmlDevice_t device_hndl; char *ptr; unsigned int index; /* if gpuid contains a : then try to get the device handle by pci bus id */ ptr = strchr(gpuid, ':'); if (ptr != NULL) { rc = nvmlDeviceGetHandleByPciBusId(gpuid, &device_hndl); } else { /* try to get the device handle by index */ index = atoi(gpuid); rc = nvmlDeviceGetHandleByIndex(index, &device_hndl); } if (rc == NVML_SUCCESS) return (device_hndl); log_nvml_error (rc, gpuid, __func__); return (NULL); } #endif /* NVML_API */ /* * Function to determine if the nvidia kernel module is loaded */ static int check_nvidia_module_loaded() { char line[4096]; FILE *file; file = fopen("/proc/modules", "r"); if (!file) { if (LOGLEVEL >= 3) { log_err( errno, __func__, "Failed to read /proc/modules"); } return(FALSE); } while (fgets(line, sizeof(line), file)) { char *tok = strtok(line, " \t"); if (tok) { if (strcmp(tok, "nvidia") == 0) { fclose(file); return(TRUE); } } } if (LOGLEVEL >= 3) { log_err( PBSE_RMSYSTEM, __func__, "No Nvidia driver loaded"); } fclose(file); return(FALSE); } /* * Function to get the nvidia driver version */ static int check_nvidia_version_file() { char line[4096]; FILE *file; /* if file does not exist then version is too old */ file = fopen("/proc/driver/nvidia/version", "r"); if (!file) { if (LOGLEVEL >= 3) { log_err( PBSE_RMSYSTEM, __func__, "No Nvidia driver info available. Driver not supported?"); } return(FALSE); } while (fgets(line, sizeof(line), file)) { char *tok; if (strncmp(line, "NVRM", 4) == 0) { if (LOGLEVEL >= 3) { sprintf(log_buffer,"Nvidia driver info: %s\n", line); log_ext(-1, __func__, log_buffer, LOG_DEBUG); } tok = strstr(line, "Kernel Module"); if (tok) { tok += 13; MOMNvidiaDriverVersion = atoi(tok); if (MOMNvidiaDriverVersion >= 260) { fclose(file); return(TRUE); } break; } } } fclose(file); return(FALSE); } /* * Function to determine if nvidia-smi is setup correctly */ int check_nvidia_setup() { #ifndef NVML_API int rc; #endif static int check_setup = TRUE; static int nvidia_setup_is_ok = FALSE; /* Check the setup for the nvidia gpus */ if (check_setup) { #ifndef NVML_API char *pathEnv; #endif /* only check the setup once */ check_setup = FALSE; /* check if the nvidia module is loaded in */ if (!check_nvidia_module_loaded()) { return (FALSE); } /* see if we can get the nvidia driver version */ if (!check_nvidia_version_file()) { return (FALSE); } #ifdef NVML_API nvidia_setup_is_ok = TRUE; #else /* Get the PATH environment variable so we can see * if the nvidia-smi executable is in the execution path */ pathEnv = getenv("PATH"); if (pathEnv == NULL) { if (LOGLEVEL >= 3) { log_err(PBSE_RMSYSTEM, __func__, "cannot get PATH"); } return(FALSE); } /* We have the PATH, now find the nvidia-smi executable */ rc = find_file(pathEnv, (char *)"nvidia-smi"); if (rc == FALSE) { if (LOGLEVEL >= 3) { log_err(PBSE_RMSYSTEM, __func__, "cannot find nvidia-smi in PATH"); } return(FALSE); } nvidia_setup_is_ok = TRUE; #endif /* NVML_API */ } return (nvidia_setup_is_ok); } /* * Function to collect nvidia-smi data */ static char *gpus( char *buffer, int buffer_size) { FILE *fd; char *ptr; /* pointer to the current place to copy data into munge_buf */ int bytes_read; int total_bytes_read = 0; char buf[RETURN_STRING_SIZE]; char cmdbuf[101]; if (!check_nvidia_setup()) { return (FALSE); } if (MOMNvidiaDriverVersion >= 270) { sprintf(cmdbuf, "nvidia-smi -q -x 2>&1"); } else /* 260 driver */ { sprintf(cmdbuf, "nvidia-smi -a -x 2>&1"); } if (LOGLEVEL >= 7) { sprintf(log_buffer,"%s: GPU cmd issued: %s\n", __func__, cmdbuf); log_ext(-1, __func__, log_buffer, LOG_DEBUG); } if ((fd = popen(cmdbuf, "r")) != NULL) { memset(buffer, 0, buffer_size); ptr = buffer; do { bytes_read = fread(buf, sizeof(char), MUNGE_SIZE, fd); if (bytes_read > 0) { total_bytes_read += bytes_read; memcpy(ptr, buf, bytes_read); ptr += bytes_read; } } while(bytes_read > 0); pclose(fd); if (bytes_read == -1) { /* read failed */ if (LOGLEVEL >= 0) { sprintf(log_buffer, "error reading popen pipe"); log_err(PBSE_RMSYSTEM, __func__, log_buffer); } return(NULL); } } else { if (LOGLEVEL >= 0) { sprintf(log_buffer, "error %d (%s) on popen", errno, strerror(errno)); log_err(PBSE_RMSYSTEM, __func__, log_buffer); } return(NULL); } return(buffer); } /* * Function to collect gpu modes */ static int gpumodes( int buffer[], int buffer_size) { FILE *fd; char *ptr; /* pointer to the current place to copy data into buf */ char buf[201]; int idx; int gpuid; int gpumode; if (!check_nvidia_setup()) { return (FALSE); } for (idx=0; idx= 7) { sprintf(log_buffer,"%s: GPU cmd issued: %s\n", __func__, "nvidia-smi -s 2>&1"); log_ext(-1, __func__, log_buffer, LOG_DEBUG); } if ((fd = popen("nvidia-smi -s 2>&1", "r")) != NULL) { while (!feof(fd)) { if (fgets(buf, 200, fd)) { ptr = buf; ptr = strstr(ptr, "GPU"); if (ptr) { ptr += 4; gpuid = atoi(ptr); ptr = strchr(ptr, ':'); if (ptr) { ptr++; gpumode = atoi(ptr); } buffer[gpuid] = gpumode; } } } pclose(fd); } else { if (LOGLEVEL >= 0) { sprintf(log_buffer, "error %d (%s) on popen", errno, strerror(errno)); log_err(PBSE_RMSYSTEM, __func__, log_buffer); } return(FALSE); } return(TRUE); } /* * Function to set gpu mode */ int setgpumode( char *gpuid, int gpumode) { #ifdef NVML_API nvmlReturn_t rc; nvmlComputeMode_t compute_mode; nvmlDevice_t device_hndl; if (!check_nvidia_setup()) { return (FALSE); } switch (gpumode) { case gpu_normal: compute_mode = NVML_COMPUTEMODE_DEFAULT; break; case gpu_exclusive_thread: compute_mode = NVML_COMPUTEMODE_EXCLUSIVE_THREAD; break; case gpu_prohibited: compute_mode = NVML_COMPUTEMODE_PROHIBITED; break; case gpu_exclusive_process: compute_mode = NVML_COMPUTEMODE_EXCLUSIVE_PROCESS; break; default: if (LOGLEVEL >= 1) { sprintf(log_buffer, "Unexpected compute mode %d", rc); log_err(PBSE_RMSYSTEM, __func__, log_buffer); } return (FALSE); } /* get the device handle */ device_hndl = get_nvml_device_handle(gpuid); if (device_hndl != NULL) { if (LOGLEVEL >= 7) { sprintf(log_buffer, "changing to mode %d for gpu %s", gpumode, gpuid); log_ext(-1, __func__, log_buffer, LOG_DEBUG); } rc = nvmlDeviceSetComputeMode(device_hndl, compute_mode); if (rc == NVML_SUCCESS) return (TRUE); log_nvml_error (rc, gpuid, __func__); } return(FALSE); #else FILE *fd; char buf[301]; if (!check_nvidia_setup()) { return (FALSE); } /* build command to be issued */ if (MOMNvidiaDriverVersion == 260) { sprintf(buf, "nvidia-smi -g %s -c %d 2>&1", gpuid, gpumode); } else /* 270 or greater driver */ { sprintf(buf, "nvidia-smi -i %s -c %d 2>&1", gpuid, gpumode); } if (LOGLEVEL >= 7) { sprintf(log_buffer,"%s: GPU cmd issued: %s\n", __func__, buf); log_ext(-1, __func__, log_buffer, LOG_DEBUG); } if ((fd = popen(buf, "r")) != NULL) { while (!feof(fd)) { if (fgets(buf, 300, fd)) { int len = strlen(buf); /* bypass blank lines */ if ((len == 1 ) && (buf[0] == '\n')) { continue; } /* for 270 and above we need to check the return string to see if it went okay */ /* 260 driver does not return anything on success */ if ((MOMNvidiaDriverVersion >= 270) && ((memcmp(buf, "Set compute mode to", 19) == 0) || (memcmp(buf, "Compute mode is already set to", 30) == 0))) { break; } if (LOGLEVEL >= 7) { sprintf( log_buffer, "nvidia-smi gpu change mode returned: %s", buf); log_ext(-1, __func__, log_buffer, LOG_INFO); } pclose(fd); return(FALSE); } } pclose(fd); } else { if (LOGLEVEL >= 0) { sprintf(log_buffer, "error %d (%s) on popen", errno, strerror(errno)); log_err(PBSE_RMSYSTEM, __func__, log_buffer); } return(FALSE); } return(TRUE); #endif /* NVML_API */ } /* * Function to reset gpu ecc count */ int resetgpuecc( char *gpuid, int reset_perm, int reset_vol) { #ifdef NVML_API nvmlReturn_t rc; nvmlEccCounterType_enum counter_type; nvmlDevice_t device_hndl; if (!check_nvidia_setup()) { return (FALSE); } if (reset_perm == 1) { /* reset ecc counts */ counter_type = NVML_AGGREGATE_ECC; } else if (reset_vol == 1) { /* reset volatile ecc counts */ counter_type = NVML_AGGREGATE_ECC; } /* get the device handle */ device_hndl = get_nvml_device_handle(gpuid); if (device_hndl != NULL) { if (LOGLEVEL >= 7) { sprintf(log_buffer, "reseting error count %d-%d for gpu %s", reset_perm, reset_vol, gpuid); log_ext(-1, __func__, log_buffer, LOG_DEBUG); } rc = nvmlDeviceClearEccErrorCounts(device_hndl, counter_type); if (rc == NVML_SUCCESS) return (TRUE); log_nvml_error (rc, gpuid, __func__); } return(FALSE); #else FILE *fd; char buf[301]; if (!check_nvidia_setup()) { return (FALSE); } /* build command to be issued */ if (MOMNvidiaDriverVersion == 260) { sprintf(buf, "nvidia-smi -g %s", gpuid); if (reset_perm == 1) { /* reset permanent ecc counts */ strcat (buf, " -p"); } if (reset_vol == 1) { /* reset volatile ecc counts */ strcat (buf, " -v"); } } else /* 270 or greater driver */ { sprintf(buf, "nvidia-smi -i %s", gpuid); /* 270 can currently reset only 1 at a time */ if (reset_perm == 1) { /* reset ecc counts */ strcat (buf, " -p 1"); } else if (reset_vol == 1) { /* reset volatile ecc counts */ strcat (buf, " -p 0"); } } strcat(buf, " 2>&1"); if (LOGLEVEL >= 7) { sprintf(log_buffer,"%s: GPU cmd issued: %s\n", __func__, buf); log_ext(-1, __func__, log_buffer, LOG_DEBUG); } if ((fd = popen(buf, "r")) != NULL) { while (!feof(fd)) { if (fgets(buf, 300, fd)) { int len = strlen(buf); /* bypass blank lines */ if ((len == 1 ) && (buf[0] == '\n')) { continue; } /* for 270 we need to check the return string to see if it went okay */ /* 260 driver does not return anything on success */ if ((MOMNvidiaDriverVersion >= 270) && ((memcmp(buf, "Reset volatile ECC errors to zero", 33) == 0) || (memcmp(buf, "Reset aggregate ECC errors to zero", 34) == 0))) { break; } if (LOGLEVEL >= 7) { sprintf( log_buffer, "nvidia-smi gpu reset ecc returned: %s", buf); log_ext(-1, __func__, log_buffer, LOG_INFO); } pclose(fd); return(FALSE); } } pclose(fd); } else { if (LOGLEVEL >= 0) { sprintf(log_buffer, "error %d (%s) on popen", errno, strerror(errno)); log_err(PBSE_RMSYSTEM, __func__, log_buffer); } return(FALSE); } return(TRUE); #endif /* NVML_API */ } /* * uses the gpu_flags to determine what to set up for job * * @param pjob - the job to set up gpus for * @return PBSE_NONE if success, error code otherwise */ int setup_gpus_for_job( job *pjob) /* I */ { char *gpu_str; char *ptr; char tmp_str[PBS_MAXHOSTNAME + 10]; int gpu_flags = 0; char gpu_id[30]; int gpu_mode = -1; /* if node does not have Nvidia recognized driver version then forget it */ if (MOMNvidiaDriverVersion < 260) return(PBSE_NONE); /* if there are no gpus, do nothing */ if ((pjob->ji_wattr[JOB_ATR_exec_gpus].at_flags & ATR_VFLAG_SET) == 0) return(PBSE_NONE); /* if there are no gpu flags, do nothing */ if ((pjob->ji_wattr[JOB_ATR_gpu_flags].at_flags & ATR_VFLAG_SET) == 0) return(PBSE_NONE); gpu_str = pjob->ji_wattr[JOB_ATR_exec_gpus].at_val.at_str; if (gpu_str == NULL) return(PBSE_NONE); gpu_flags = pjob->ji_wattr[JOB_ATR_gpu_flags].at_val.at_long; if (LOGLEVEL >= 7) { sprintf(log_buffer, "job %s has exec_gpus %s gpu_flags %d", pjob->ji_qs.ji_jobid, gpu_str, gpu_flags); log_ext(-1, __func__, log_buffer, LOG_DEBUG); } /* traverse the gpu_str to see what gpus we have assigned */ strcpy(tmp_str, mom_host); strcat(tmp_str, "-gpu/"); ptr = strstr(gpu_str, tmp_str); if (ptr == NULL) { /* might be fully qualified host name */ strcpy(tmp_str, mom_host); ptr = strchr(tmp_str, '.'); if (ptr != NULL) ptr[0] = '\0'; strcat(tmp_str, "-gpu/"); ptr = strstr(gpu_str, tmp_str); } while(ptr != NULL) { ptr = strchr(ptr, '/'); if (ptr != NULL) { ptr++; sprintf(gpu_id,"%d",atoi(ptr)); /* do we need to reset volatile error counts on gpu */ if (gpu_flags >= 1000) { if (LOGLEVEL >= 7) { sprintf(log_buffer, "job %s reseting gpuid %s volatile error counts", pjob->ji_qs.ji_jobid, gpu_id); log_ext(-1, __func__, log_buffer, LOG_DEBUG); } resetgpuecc(gpu_id, 0, 1); } gpu_mode = gpu_flags; if (gpu_mode >= 1000) { gpu_mode -= 1000; } /* do we need to change modes on gpu */ if (nvidia_gpu_modes[atoi(ptr)] != gpu_mode) { if (LOGLEVEL >= 7) { sprintf(log_buffer, "job %s change to mode %d for gpuid %s", pjob->ji_qs.ji_jobid, gpu_mode, gpu_id); log_ext(-1, __func__, log_buffer, LOG_DEBUG); } setgpumode(gpu_id, gpu_mode); } ptr = strstr(ptr, tmp_str); } } /* do we need to change mode on gpu */ return(PBSE_NONE); } /* END setup_gpus_for_job() */ /* * Function to collect gpu statuses to be sent to server. (Currently Nvidia only) */ #ifdef NVML_API void generate_server_gpustatus_nvml( dynamic_string *gpu_status) { nvmlReturn_t rc; unsigned int device_count; unsigned int tmpint; int idx; nvmlDevice_t device_hndl; nvmlPciInfo_t pci_info; nvmlMemory_t mem_info; nvmlComputeMode_t comp_mode; nvmlEnableState_t ecc_mode; nvmlEnableState_t ecc_pend_mode; nvmlEnableState_t display_mode; nvmlUtilization_t util_info; unsigned long long ecc_counts; char tmpbuf[1024+1]; if (!check_nvidia_setup()) { return; } /* get timestamp to report */ snprintf(tmpbuf, 100, "timestamp=%s", ctime(&time_now)); copy_to_end_of_dynamic_string(gpu_status, tmpbuf); /* get the driver version to report */ rc = nvmlSystemGetDriverVersion(tmpbuf, 1024); if (rc == NVML_SUCCESS) { copy_to_end_of_dynamic_string(gpu_status, "driver_ver="); append_dynamic_string(gpu_status, tmpbuf); } else { log_nvml_error (rc, NULL, __func__); } /* get the device count */ rc = nvmlDeviceGetCount(&device_count); if (rc != NVML_SUCCESS) { log_nvml_error (rc, NULL, __func__); return; } /* get the device handle for each gpu and report the data */ for (idx = 0; idx < (int)device_count; idx++) { rc = nvmlDeviceGetHandleByIndex(idx, &device_hndl); if (rc != NVML_SUCCESS) { log_nvml_error (rc, NULL, __func__); continue; } /* get the PCI info */ rc = nvmlDeviceGetPciInfo(device_hndl, &pci_info); if (rc == NVML_SUCCESS) { copy_to_end_of_dynamic_string(gpu_status, "gpuid="); append_dynamic_string(gpu_status, pci_info.busId); copy_to_end_of_dynamic_string(gpu_status, "gpu_pci_device_id="); snprintf(tmpbuf, 100, "%d", pci_info.pciDeviceId); append_dynamic_string(gpu_status, tmpbuf); copy_to_end_of_dynamic_string(gpu_status, "gpu_pci_location_id="); append_dynamic_string(gpu_status, pci_info.busId); } else { log_nvml_error (rc, NULL, __func__); } /* get the product name */ rc = nvmlDeviceGetName(device_hndl, tmpbuf, 1024); if (rc == NVML_SUCCESS) { copy_to_end_of_dynamic_string(gpu_status, "gpu_product_name="); append_dynamic_string(gpu_status, tmpbuf); } else { log_nvml_error (rc, NULL, __func__); } /* get the display mode */ rc = nvmlDeviceGetDisplayMode(device_hndl, &display_mode); if (rc == NVML_SUCCESS) { copy_to_end_of_dynamic_string(gpu_status, "gpu_display=Enabled"); } else if (rc == NVML_ERROR_INVALID_ARGUMENT) { copy_to_end_of_dynamic_string(gpu_status, "gpu_display=Disabled"); } else { log_nvml_error (rc, NULL, __func__); } /* get the fan speed */ rc = nvmlDeviceGetFanSpeed(device_hndl, &tmpint); if (rc == NVML_SUCCESS) { snprintf(tmpbuf, 20, "gpu_fan_speed=%d%%", tmpint); copy_to_end_of_dynamic_string(gpu_status, tmpbuf); } else { log_nvml_error (rc, NULL, __func__); } /* get the memory information */ rc = nvmlDeviceGetMemoryInfo(device_hndl, &mem_info); if (rc == NVML_SUCCESS) { snprintf(tmpbuf, 50, "gpu_memory_total=%lld MB", (mem_info.total/(1024*1024))); copy_to_end_of_dynamic_string(gpu_status, tmpbuf); snprintf(tmpbuf, 50, "gpu_memory_used=%lld MB", (mem_info.used/(1024*1024))); copy_to_end_of_dynamic_string(gpu_status, tmpbuf); } else { log_nvml_error (rc, NULL, __func__); } /* get the compute mode */ rc = nvmlDeviceGetComputeMode(device_hndl, &comp_mode); if (rc == NVML_SUCCESS) { copy_to_end_of_dynamic_string(gpu_status, "gpu_mode="); switch (comp_mode) { case NVML_COMPUTEMODE_DEFAULT: append_dynamic_string(gpu_status, "Default"); nvidia_gpu_modes[idx] = gpu_normal; break; case NVML_COMPUTEMODE_EXCLUSIVE_THREAD: append_dynamic_string(gpu_status, "Exclusive_Thread"); nvidia_gpu_modes[idx] = gpu_exclusive_thread; break; case NVML_COMPUTEMODE_PROHIBITED: append_dynamic_string(gpu_status, "Prohibited"); nvidia_gpu_modes[idx] = gpu_prohibited; break; case NVML_COMPUTEMODE_EXCLUSIVE_PROCESS: append_dynamic_string(gpu_status, "Exclusive_Process"); nvidia_gpu_modes[idx] = gpu_exclusive_process; break; default: append_dynamic_string(gpu_status, "Unknown"); nvidia_gpu_modes[idx] = -1; break; } } else { log_nvml_error (rc, NULL, __func__); } /* get the utilization rates */ rc = nvmlDeviceGetUtilizationRates(device_hndl, &util_info); if (rc == NVML_SUCCESS) { snprintf(tmpbuf, 100, "gpu_utilization=%d%%", util_info.gpu); copy_to_end_of_dynamic_string(gpu_status, tmpbuf); snprintf(tmpbuf, 100, "gpu_memory_utilization=%d%%", util_info.memory); copy_to_end_of_dynamic_string(gpu_status, tmpbuf); } else { log_nvml_error (rc, NULL, __func__); } /* get the ECC mode */ rc = nvmlDeviceGetEccMode(device_hndl, &ecc_mode, &ecc_pend_mode); if (rc == NVML_SUCCESS) { snprintf(tmpbuf, 50, "gpu_ecc_mode=%s", (ecc_mode == NVML_FEATURE_ENABLED) ? "Enabled" : "Disabled"); copy_to_end_of_dynamic_string(gpu_status, tmpbuf); } else { log_nvml_error (rc, NULL, __func__); } /* get the single bit ECC errors */ rc = nvmlDeviceGetTotalEccErrors(device_hndl, NVML_SINGLE_BIT_ECC, NVML_AGGREGATE_ECC, &ecc_counts); if (rc == NVML_SUCCESS) { snprintf(tmpbuf, 100, "gpu_single_bit_ecc_errors=%lld", ecc_counts); copy_to_end_of_dynamic_string(gpu_status, tmpbuf); } else { log_nvml_error (rc, NULL, __func__); } /* get the double bit ECC errors */ rc = nvmlDeviceGetTotalEccErrors(device_hndl, NVML_DOUBLE_BIT_ECC, NVML_AGGREGATE_ECC, &ecc_counts); if (rc == NVML_SUCCESS) { snprintf(tmpbuf, 100, "gpu_double_bit_ecc_errors=%lld", ecc_counts); copy_to_end_of_dynamic_string(gpu_status, tmpbuf); } else { log_nvml_error (rc, NULL, __func__); } /* get the temperature */ rc = nvmlDeviceGetTemperature(device_hndl, NVML_TEMPERATURE_GPU, &tmpint); if (rc == NVML_SUCCESS) { snprintf(tmpbuf, 25, "gpu_temperature=%d C", tmpint); copy_to_end_of_dynamic_string(gpu_status, tmpbuf); } else { log_nvml_error (rc, NULL, __func__); } } return; } #endif /* NVML_API */ /* * Function to collect gpu statuses to be sent to server. (Currently Nvidia only) */ void generate_server_gpustatus_smi( dynamic_string *gpu_status) { char *dataptr; char *tmpptr1; char *tmpptr2; char *savptr; /* * we hope we don't get more than 32 gpus on a node so we guess at how much * data might get returned from nvidia-smi. xml inflates return data. */ char gpu_string[MAX_GPUS * 3000]; int gpu_modes[MAX_GPUS]; int have_modes = FALSE; int gpuid = -1; mxml_t *EP; char *Tail; char Emsg[MAXLINE]; dataptr = gpus(gpu_string, sizeof(gpu_string)); if (dataptr == NULL) { return; } /* move past the php code*/ if ((dataptr = strstr(gpu_string, "")) != NULL) { MXMLFromString(&EP, dataptr, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "timestamp="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } else { return; } if ((dataptr = strstr(gpu_string, "")) != NULL) { MXMLFromString(&EP, dataptr, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "driver_ver="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } else { /* cannot determine driver version */ copy_to_end_of_dynamic_string(gpu_status, "driver_ver=UNKNOWN"); return; } while ((dataptr = strstr(dataptr, "")) != NULL) { MXMLFromString(&EP, dataptr, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_product_name="); append_dynamic_string(gpu_status, EP->Val); } else { dataptr = savptr; } savptr = dataptr; if ((dataptr = strstr(dataptr, "")) != NULL) { MXMLFromString(&EP, dataptr, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_pci_device_id="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } else { dataptr = savptr; } savptr = dataptr; if ((dataptr = strstr(dataptr, "")) != NULL) { MXMLFromString(&EP, dataptr, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_pci_location_id="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } else { dataptr = savptr; } savptr = dataptr; if ((dataptr = strstr(dataptr, "")) != NULL) { MXMLFromString(&EP, dataptr, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_display="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } else { dataptr = savptr; } savptr = dataptr; if ((dataptr = strstr(dataptr, "")) != NULL) { MXMLFromString(&EP, dataptr, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_temperature="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } else { dataptr = savptr; } savptr = dataptr; if ((dataptr = strstr(dataptr, "")) != NULL) { MXMLFromString(&EP, dataptr, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_fan_speed="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } else { dataptr = savptr; } savptr = dataptr; if ((dataptr = strstr(dataptr, "")) != NULL) { MXMLFromString(&EP, dataptr, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_utilization="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } else { dataptr = savptr; } if ((dataptr = strstr(dataptr, "")) != NULL) { MXMLFromString(&EP, dataptr, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_memory_utilization="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } else { dataptr = savptr; } if ((dataptr = strstr(dataptr, "")) != NULL) { if ((tmpptr1 = strstr(dataptr, "")) != NULL) { tmpptr1 = strstr(tmpptr1, ""); MXMLFromString(&EP, tmpptr1, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_single_bit_ecc_errors="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } if ((tmpptr1 = strstr(dataptr, "")) != NULL) { tmpptr1 = strstr(tmpptr1, ""); MXMLFromString(&EP, tmpptr1, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_double_bit_ecc_errors="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } } else { dataptr = savptr; } } /* end (MOMNvidiaDriverVersion == 260) */ else if (MOMNvidiaDriverVersion >= 270) { savptr = dataptr; if ((dataptr = strstr(dataptr, "")) != NULL) { MXMLFromString(&EP, dataptr, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_product_name="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } else { dataptr = savptr; } savptr = dataptr; if ((dataptr = strstr(dataptr, "")) != NULL) { MXMLFromString(&EP, dataptr, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_display="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } else { dataptr = savptr; } savptr = dataptr; if ((dataptr = strstr(dataptr, "")) != NULL) { MXMLFromString(&EP, dataptr, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_pci_device_id="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } else { dataptr = savptr; } savptr = dataptr; if ((dataptr = strstr(dataptr, "")) != NULL) { MXMLFromString(&EP, dataptr, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_pci_location_id="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } else { dataptr = savptr; } savptr = dataptr; if ((dataptr = strstr(dataptr, "")) != NULL) { MXMLFromString(&EP, dataptr, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_fan_speed="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } else { dataptr = savptr; } if ((dataptr = strstr(dataptr, "")) != NULL) { if ((tmpptr1 = strstr(dataptr, "")) != NULL) { MXMLFromString(&EP, tmpptr1, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_memory_total="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } if ((tmpptr1 = strstr(dataptr, "")) != NULL) { MXMLFromString(&EP, tmpptr1, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_memory_used="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } } else { dataptr = savptr; } savptr = dataptr; if ((dataptr = strstr(dataptr, "")) != NULL) { MXMLFromString(&EP, dataptr, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_mode="); append_dynamic_string(gpu_status, EP->Val); if (EP->Val[0] == 'D') /* Default */ { nvidia_gpu_modes[gpuid] = gpu_normal; } else if (EP->Val[0] == 'P') /* Prohibited */ { nvidia_gpu_modes[gpuid] = gpu_prohibited; } else if (EP->Val[10] == 'T') /* Exclusive_Thread */ { nvidia_gpu_modes[gpuid] = gpu_exclusive_thread; } else if (EP->Val[10] == 'P') /* Exclusive_Process */ { nvidia_gpu_modes[gpuid] = gpu_exclusive_process; } else /* unknown */ { nvidia_gpu_modes[gpuid] = -1; } MXMLDestroyE(&EP); } else { dataptr = savptr; } savptr = dataptr; if ((dataptr = strstr(dataptr, "")) != NULL) { if ((tmpptr1 = strstr(dataptr, "")) != NULL) { MXMLFromString(&EP, tmpptr1, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_utilization="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } if ((tmpptr1 = strstr(dataptr, "")) != NULL) { MXMLFromString(&EP, tmpptr1, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_memory_utilization="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } } else { dataptr = savptr; } if ((dataptr = strstr(dataptr, "")) != NULL) { if ((tmpptr1 = strstr(dataptr, "")) != NULL) { MXMLFromString(&EP, tmpptr1, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_ecc_mode="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } } else { dataptr = savptr; } if ((dataptr = strstr(dataptr, "")) != NULL) { if ((tmpptr1 = strstr(dataptr, "")) != NULL) { if ((tmpptr2 = strstr(tmpptr1, "")) != NULL) { tmpptr2 = strstr(tmpptr1, ""); MXMLFromString(&EP, tmpptr2, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_single_bit_ecc_errors="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } if ((tmpptr2 = strstr(tmpptr1, "")) != NULL) { tmpptr2 = strstr(tmpptr1, ""); MXMLFromString(&EP, tmpptr2, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_double_bit_ecc_errors="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } } } else { dataptr = savptr; } savptr = dataptr; if ((dataptr = strstr(dataptr, "")) != NULL) { if ((tmpptr1 = strstr(dataptr, "")) != NULL) { MXMLFromString(&EP, tmpptr1, &Tail, Emsg, sizeof(Emsg)); copy_to_end_of_dynamic_string(gpu_status, "gpu_temperature="); append_dynamic_string(gpu_status, EP->Val); MXMLDestroyE(&EP); } } else { dataptr = savptr; } } /* end (MOMNvidiaDriverVersion >= 270) */ else { /* unknown driver version */ if (LOGLEVEL >= 3) { log_err(PBSE_RMSYSTEM, __func__, (char *)"Unknown Nvidia driver version"); } /* need to advance dataptr so we don't recycle through same gpu */ dataptr++; } } } return; } void req_gpuctrl_mom( struct batch_request *preq) /* I */ { char *mom_node; char *gpuid; int gpumode = -1; int reset_perm = -1; int reset_vol = -1; #ifdef NVIDIA_GPUS int rc = -1; #endif /* NVIDIA_GPUS */ gpuid = preq->rq_ind.rq_gpuctrl.rq_gpuid; gpumode = preq->rq_ind.rq_gpuctrl.rq_gpumode; mom_node = preq->rq_ind.rq_gpuctrl.rq_momnode; reset_perm = preq->rq_ind.rq_gpuctrl.rq_reset_perm; reset_vol = preq->rq_ind.rq_gpuctrl.rq_reset_vol; #ifdef NVIDIA_GPUS if (LOGLEVEL >= 7) { sprintf( log_buffer, "GPU control request for node %s gpuid %s mode %d reset_perm %d reset_vol %d", mom_node, gpuid, gpumode, reset_perm, reset_vol); log_ext(-1, __func__, log_buffer, LOG_INFO); } if (!use_nvidia_gpu) { sprintf( log_buffer, "GPU control requests not active: node %s gpuid %s mode %d reset_perm %d reset_vol %d", mom_node, gpuid, gpumode, reset_perm, reset_vol); if (LOGLEVEL >= 3) { log_ext(-1, __func__, log_buffer, LOG_INFO); } req_reject(PBSE_NOSUP, 0, preq, NULL, NULL); return; } /* assume success? */ if (gpumode != -1) { rc = setgpumode(gpuid, gpumode); } else if ((reset_perm != -1) || (reset_vol != -1)) { rc = resetgpuecc(gpuid, reset_perm, reset_vol); } if (rc) { reply_ack(preq); /* * if we were successful changing the mode then we need to update the gpu * statuses */ if (gpumode != -1) { send_update_soon(); } } else { req_reject(PBSE_RMSYSTEM, 0, preq, mom_host, "failed to set gpu status"); } #else sprintf(log_buffer, "GPU control requests not supported: node %s gpuid %s mode %d reset_perm %d reset_vol %d", mom_node, gpuid, gpumode, reset_perm, reset_vol); if (LOGLEVEL >= 3) { log_ext(-1, __func__, log_buffer, LOG_INFO); } req_reject(PBSE_NOSUP, 0, preq, NULL, NULL); #endif /* NVIDIA_GPUS */ return; } /* END req_gpuctrl_mom() */ int add_gpu_status( dynamic_string *mom_status) { #ifdef NVIDIA_GPUS /* if we have no Nvidia gpus or nvidia-smi don't send gpu status */ if (!use_nvidia_gpu) return(PBSE_NONE); copy_to_end_of_dynamic_string(mom_status, START_GPU_STATUS); #ifdef NVML_API generate_server_gpustatus_nvml(mom_status); #else generate_server_gpustatus_smi(mom_status); #endif /* NVML_API */ copy_to_end_of_dynamic_string(mom_status, END_GPU_STATUS); #endif /* NVIDIA_GPUS */ return(PBSE_NONE); } /* END add_gpu_status() */