/**************************************************************************** \ * jobacct_gather_cgroup_cpuacct.c - cpuacct cgroup subsystem for * jobacct_gather/cgroup ***************************************************************************** * Copyright (C) 2011 Bull * Written by Martin Perry (martin.perry@bull.com) based on code from * Matthieu Hautreux * * This file is part of Slurm, a resource management program. * For details, see . * Please also read the included file: DISCLAIMER. * * Slurm is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. * * In addition, as a special exception, the copyright holders give permission * to link the code of portions of this program with the OpenSSL library under * certain conditions as described in each individual source file, and * distribute linked combinations including the two. You must obey the GNU * General Public License in all respects for all of the code used other than * OpenSSL. If you modify file(s) with this exception, you may extend this * exception to your version of the file(s), but you are not obligated to do * so. If you do not wish to do so, delete this exception statement from your * version. If you delete this exception statement from all source files in * the program, then also delete it here. * * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along * with Slurm; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. \*****************************************************************************/ #include #include /* getenv */ #include #include "slurm/slurm_errno.h" #include "slurm/slurm.h" #include "src/common/xstring.h" #include "src/plugins/jobacct_gather/cgroup/jobacct_gather_cgroup.h" #include "src/slurmd/slurmstepd/slurmstepd_job.h" #include "src/slurmd/slurmd/slurmd.h" static char user_cgroup_path[PATH_MAX]; static char job_cgroup_path[PATH_MAX]; static char jobstep_cgroup_path[PATH_MAX]; static char task_cgroup_path[PATH_MAX]; static xcgroup_ns_t cpuacct_ns; static xcgroup_t user_cpuacct_cg; static xcgroup_t job_cpuacct_cg; static xcgroup_t step_cpuacct_cg; List task_cpuacct_cg_list = NULL; static uint32_t max_task_id; extern int jobacct_gather_cgroup_cpuacct_init(void) { /* initialize user/job/jobstep cgroup relative paths */ user_cgroup_path[0]='\0'; job_cgroup_path[0]='\0'; jobstep_cgroup_path[0]='\0'; task_cgroup_path[0]='\0'; /* initialize cpuacct cgroup namespace */ if (xcgroup_ns_create(&cpuacct_ns, "", "cpuacct") != XCGROUP_SUCCESS) { error("jobacct_gather/cgroup: unable to create cpuacct " "namespace"); return SLURM_ERROR; } FREE_NULL_LIST(task_cpuacct_cg_list); task_cpuacct_cg_list = list_create(free_task_cg_info); return SLURM_SUCCESS; } extern int jobacct_gather_cgroup_cpuacct_fini(void) { xcgroup_t cpuacct_cg; bool lock_ok; int cc; if (user_cgroup_path[0] == '\0' || job_cgroup_path[0] == '\0' || jobstep_cgroup_path[0] == '\0' || task_cgroup_path[0] == '\0') return SLURM_SUCCESS; /* * Move the slurmstepd back to the root cpuacct cg. * The release_agent will asynchroneously be called for the step * cgroup. It will do the necessary cleanup. */ if (xcgroup_create(&cpuacct_ns, &cpuacct_cg, "", 0, 0) == XCGROUP_SUCCESS) { xcgroup_set_uint32_param(&cpuacct_cg, "tasks", getpid()); } /* Lock the root of the cgroup and remove the subdirectories * related to this job. */ lock_ok = true; if (xcgroup_lock(&cpuacct_cg) != XCGROUP_SUCCESS) { error("%s: failed to flock() %s %m", __func__, cpuacct_cg.path); lock_ok = false; } /* Clean up starting from the leaves way up, the * reverse order in which the cgroups were created. */ for (cc = 0; cc <= max_task_id; cc++) { xcgroup_t cgroup; char *buf = NULL; /* rmdir all tasks this running slurmstepd * was responsible for. */ xstrfmtcat(buf, "%s%s/task_%d", cpuacct_ns.mnt_point, jobstep_cgroup_path, cc); cgroup.path = buf; if (xcgroup_delete(&cgroup) != XCGROUP_SUCCESS) { debug2("%s: failed to delete %s %m", __func__, buf); } xfree(buf); } if (xcgroup_delete(&step_cpuacct_cg) != XCGROUP_SUCCESS) { debug2("%s: failed to delete %s %m", __func__, cpuacct_cg.path); } if (xcgroup_delete(&job_cpuacct_cg) != XCGROUP_SUCCESS) { debug2("%s: failed to delete %s %m", __func__, job_cpuacct_cg.path); } if (xcgroup_delete(&user_cpuacct_cg) != XCGROUP_SUCCESS) { debug2("%s: failed to delete %s %m", __func__, user_cpuacct_cg.path); } if (lock_ok == true) xcgroup_unlock(&cpuacct_cg); xcgroup_destroy(&user_cpuacct_cg); xcgroup_destroy(&job_cpuacct_cg); xcgroup_destroy(&step_cpuacct_cg); xcgroup_destroy(&cpuacct_cg); FREE_NULL_LIST(task_cpuacct_cg_list); user_cgroup_path[0]='\0'; job_cgroup_path[0]='\0'; jobstep_cgroup_path[0]='\0'; task_cgroup_path[0]='\0'; xcgroup_ns_destroy(&cpuacct_ns); return SLURM_SUCCESS; } extern int jobacct_gather_cgroup_cpuacct_attach_task(pid_t pid, jobacct_id_t *jobacct_id) { xcgroup_t cpuacct_cg; stepd_step_rec_t *job; uid_t uid; gid_t gid; uint32_t jobid; uint32_t stepid; uint32_t taskid; int fstatus = SLURM_SUCCESS; int rc; char* slurm_cgpath; task_cg_info_t *task_cg_info; bool need_to_add = false; job = jobacct_id->job; uid = job->uid; gid = job->gid; stepid = job->stepid; taskid = jobacct_id->taskid; if (job->het_job_id && (job->het_job_id != NO_VAL)) jobid = job->het_job_id; else jobid = job->jobid; if (taskid >= max_task_id) max_task_id = taskid; xassert(task_cpuacct_cg_list); debug("%s: jobid %u stepid %u taskid %u max_task_id %u", __func__, jobid, stepid, taskid, max_task_id); /* create slurm root cg in this cg namespace */ slurm_cgpath = jobacct_cgroup_create_slurm_cg(&cpuacct_ns); if (!slurm_cgpath) { return SLURM_ERROR; } /* build user cgroup relative path if not set (may not be) */ if (*user_cgroup_path == '\0') { if (snprintf(user_cgroup_path, PATH_MAX, "%s/uid_%u", slurm_cgpath, uid) >= PATH_MAX) { error("jobacct_gather/cgroup: unable to build uid %u " "cgroup relative path", uid); xfree(slurm_cgpath); return SLURM_ERROR; } } xfree(slurm_cgpath); /* build job cgroup relative path if not set (may not be) */ if (*job_cgroup_path == '\0') { if (snprintf(job_cgroup_path, PATH_MAX, "%s/job_%u", user_cgroup_path, jobid) >= PATH_MAX) { error("jobacct_gather/cgroup: unable to build job %u " "cpuacct cg relative path : %m", jobid); return SLURM_ERROR; } } /* build job step cgroup relative path if not set (may not be) */ if (*jobstep_cgroup_path == '\0') { int len; if (stepid == SLURM_BATCH_SCRIPT) { len = snprintf(jobstep_cgroup_path, PATH_MAX, "%s/step_batch", job_cgroup_path); } else if (stepid == SLURM_EXTERN_CONT) { len = snprintf(jobstep_cgroup_path, PATH_MAX, "%s/step_extern", job_cgroup_path); } else { len = snprintf(jobstep_cgroup_path, PATH_MAX, "%s/step_%u", job_cgroup_path, stepid); } if (len >= PATH_MAX) { error("jobacct_gather/cgroup: unable to build job step " " %u.%u cpuacct cg relative path: %m", jobid, stepid); return SLURM_ERROR; } } /* build task cgroup relative path */ if (snprintf(task_cgroup_path, PATH_MAX, "%s/task_%u", jobstep_cgroup_path, taskid) >= PATH_MAX) { error("jobacct_gather/cgroup: unable to build task %u " "cpuacct cg relative path : %m", taskid); return SLURM_ERROR; } /* * create cpuacct root cg and lock it * * we will keep the lock until the end to avoid the effect of a release * agent that would remove an existing cgroup hierarchy while we are * setting it up. As soon as the step cgroup is created, we can release * the lock. * Indeed, consecutive slurm steps could result in cg being removed * between the next EEXIST instantiation and the first addition of * a task. The release_agent will have to lock the root cpuacct cgroup * to avoid this scenario. */ if (xcgroup_create(&cpuacct_ns, &cpuacct_cg, "", 0, 0) != XCGROUP_SUCCESS) { error("jobacct_gather/cgroup: unable to create root cpuacct " "xcgroup"); return SLURM_ERROR; } if (xcgroup_lock(&cpuacct_cg) != XCGROUP_SUCCESS) { xcgroup_destroy(&cpuacct_cg); error("jobacct_gather/cgroup: unable to lock root cpuacct cg"); return SLURM_ERROR; } /* * Create user cgroup in the cpuacct ns (it could already exist) */ if (xcgroup_create(&cpuacct_ns, &user_cpuacct_cg, user_cgroup_path, uid, gid) != XCGROUP_SUCCESS) { error("jobacct_gather/cgroup: unable to create user %u cpuacct " "cgroup", uid); fstatus = SLURM_ERROR; goto error; } if (xcgroup_instantiate(&user_cpuacct_cg) != XCGROUP_SUCCESS) { xcgroup_destroy(&user_cpuacct_cg); error("jobacct_gather/cgroup: unable to instantiate user %u " "cpuacct cgroup", uid); fstatus = SLURM_ERROR; goto error; } /* * Create job cgroup in the cpuacct ns (it could already exist) */ if (xcgroup_create(&cpuacct_ns, &job_cpuacct_cg, job_cgroup_path, uid, gid) != XCGROUP_SUCCESS) { xcgroup_destroy(&user_cpuacct_cg); error("jobacct_gather/cgroup: unable to create job %u cpuacct " "cgroup", jobid); fstatus = SLURM_ERROR; goto error; } if (xcgroup_instantiate(&job_cpuacct_cg) != XCGROUP_SUCCESS) { xcgroup_destroy(&user_cpuacct_cg); xcgroup_destroy(&job_cpuacct_cg); error("jobacct_gather/cgroup: unable to instantiate job %u " "cpuacct cgroup", jobid); fstatus = SLURM_ERROR; goto error; } /* * Create step cgroup in the cpuacct ns (it could already exist) */ if (xcgroup_create(&cpuacct_ns, &step_cpuacct_cg, jobstep_cgroup_path, uid, gid) != XCGROUP_SUCCESS) { /* do not delete user/job cgroup as they can exist for other * steps, but release cgroup structures */ xcgroup_destroy(&user_cpuacct_cg); xcgroup_destroy(&job_cpuacct_cg); error("jobacct_gather/cgroup: unable to create jobstep %u.%u " "cpuacct cgroup", jobid, stepid); fstatus = SLURM_ERROR; goto error; } if (xcgroup_instantiate(&step_cpuacct_cg) != XCGROUP_SUCCESS) { xcgroup_destroy(&user_cpuacct_cg); xcgroup_destroy(&job_cpuacct_cg); xcgroup_destroy(&step_cpuacct_cg); error("jobacct_gather/cgroup: unable to instantiate jobstep " "%u.%u cpuacct cgroup", jobid, stepid); fstatus = SLURM_ERROR; goto error; } if (!(task_cg_info = list_find_first(task_cpuacct_cg_list, find_task_cg_info, &taskid))) { task_cg_info = xmalloc(sizeof(*task_cg_info)); task_cg_info->taskid = taskid; need_to_add = true; } /* * Create task cgroup in the cpuacct ns */ if (xcgroup_create(&cpuacct_ns, &task_cg_info->task_cg, task_cgroup_path, uid, gid) != XCGROUP_SUCCESS) { /* do not delete user/job cgroup as they can exist for other * steps, but release cgroup structures */ xcgroup_destroy(&user_cpuacct_cg); xcgroup_destroy(&job_cpuacct_cg); /* Don't use free_task_cg_info as the task_cg isn't there */ xfree(task_cg_info); error("jobacct_gather/cgroup: unable to create jobstep %u.%u " "task %u cpuacct cgroup", jobid, stepid, taskid); fstatus = SLURM_ERROR; goto error; } if (xcgroup_instantiate(&task_cg_info->task_cg) != XCGROUP_SUCCESS) { xcgroup_destroy(&user_cpuacct_cg); xcgroup_destroy(&job_cpuacct_cg); xcgroup_destroy(&step_cpuacct_cg); free_task_cg_info(task_cg_info); error("jobacct_gather/cgroup: unable to instantiate jobstep " "%u.%u task %u cpuacct cgroup", jobid, stepid, taskid); fstatus = SLURM_ERROR; goto error; } /* * Attach the slurmstepd to the task cpuacct cgroup */ rc = xcgroup_add_pids(&task_cg_info->task_cg, &pid, 1); if (rc != XCGROUP_SUCCESS) { error("jobacct_gather/cgroup: unable to add slurmstepd to cpuacct cg '%s'", task_cg_info->task_cg.path); fstatus = SLURM_ERROR; } else fstatus = SLURM_SUCCESS; /* Add the task cgroup to the list now that it is initialized. */ if (need_to_add) list_append(task_cpuacct_cg_list , task_cg_info); error: xcgroup_unlock(&cpuacct_cg); xcgroup_destroy(&cpuacct_cg); return fstatus; }