/*****************************************************************************\ * builtin.c - Simple builtin (FIFO) scheduler plugin. * Periodically when pending jobs can start. * This is a minimal implementation of the logic found in * src/plugins/sched/backfill/backfill.c and disregards * how jobs are scheduled sequencially. ***************************************************************************** * Copyright (C) 2003-2007 The Regents of the University of California. * Copyright (C) 2008-2010 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette * CODE-OCEC-09-009. All rights reserved. * * This file is part of Slurm, a resource management program. * For details, see . * Please also read the included file: DISCLAIMER. * * Slurm is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. * * In addition, as a special exception, the copyright holders give permission * to link the code of portions of this program with the OpenSSL library under * certain conditions as described in each individual source file, and * distribute linked combinations including the two. You must obey the GNU * General Public License in all respects for all of the code used other than * OpenSSL. If you modify file(s) with this exception, you may extend this * exception to your version of the file(s), but you are not obligated to do * so. If you do not wish to do so, delete this exception statement from your * version. If you delete this exception statement from all source files in * the program, then also delete it here. * * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along * with Slurm; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. \*****************************************************************************/ #include #include #include #include #include #include #include "slurm/slurm.h" #include "slurm/slurm_errno.h" #include "src/common/list.h" #include "src/common/macros.h" #include "src/common/node_select.h" #include "src/common/parse_time.h" #include "src/common/slurm_protocol_api.h" #include "src/common/xmalloc.h" #include "src/common/xstring.h" #include "src/slurmctld/burst_buffer.h" #include "src/slurmctld/locks.h" #include "src/slurmctld/preempt.h" #include "src/slurmctld/reservation.h" #include "src/slurmctld/slurmctld.h" #include "src/plugins/sched/builtin/builtin.h" #ifndef BACKFILL_INTERVAL # define BACKFILL_INTERVAL 30 #endif /*********************** local variables *********************/ static bool stop_builtin = false; static pthread_mutex_t term_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t term_cond = PTHREAD_COND_INITIALIZER; static bool config_flag = false; static int builtin_interval = BACKFILL_INTERVAL; static int max_sched_job_cnt = 50; static int sched_timeout = 0; /*********************** local functions *********************/ static void _compute_start_times(void); static void _load_config(void); static void _my_sleep(int secs); /* Terminate builtin_agent */ extern void stop_builtin_agent(void) { slurm_mutex_lock(&term_lock); stop_builtin = true; slurm_cond_signal(&term_cond); slurm_mutex_unlock(&term_lock); } static void _my_sleep(int secs) { struct timespec ts = {0, 0}; struct timeval now; gettimeofday(&now, NULL); ts.tv_sec = now.tv_sec + secs; ts.tv_nsec = now.tv_usec * 1000; slurm_mutex_lock(&term_lock); if (!stop_builtin) slurm_cond_timedwait(&term_cond, &term_lock, &ts); slurm_mutex_unlock(&term_lock); } static void _load_config(void) { char *sched_params = slurm_get_sched_params(); char *tmp_ptr; sched_timeout = slurm_get_msg_timeout() / 2; sched_timeout = MAX(sched_timeout, 1); sched_timeout = MIN(sched_timeout, 10); if ((tmp_ptr = xstrcasestr(sched_params, "interval="))) builtin_interval = atoi(tmp_ptr + 9); if (builtin_interval < 1) { error("Invalid SchedulerParameters interval: %d", builtin_interval); builtin_interval = BACKFILL_INTERVAL; } if ((tmp_ptr = xstrcasestr(sched_params, "max_job_bf="))) max_sched_job_cnt = atoi(tmp_ptr + 11); if ((tmp_ptr = xstrcasestr(sched_params, "bf_max_job_test="))) max_sched_job_cnt = atoi(tmp_ptr + 16); if (max_sched_job_cnt < 1) { error("Invalid SchedulerParameters bf_max_job_test: %d", max_sched_job_cnt); max_sched_job_cnt = 50; } xfree(sched_params); } static void _compute_start_times(void) { int j, rc = SLURM_SUCCESS, job_cnt = 0; List job_queue; job_queue_rec_t *job_queue_rec; job_record_t *job_ptr; part_record_t *part_ptr; bitstr_t *alloc_bitmap = NULL, *avail_bitmap = NULL; bitstr_t *exc_core_bitmap = NULL; uint32_t max_nodes, min_nodes, req_nodes, time_limit; time_t now = time(NULL), sched_start, last_job_alloc; bool resv_overlap = false; sched_start = now; last_job_alloc = now - 1; alloc_bitmap = bit_alloc(node_record_count); job_queue = build_job_queue(true, false); sort_job_queue(job_queue); while ((job_queue_rec = (job_queue_rec_t *) list_pop(job_queue))) { job_ptr = job_queue_rec->job_ptr; part_ptr = job_queue_rec->part_ptr; xfree(job_queue_rec); if (part_ptr != job_ptr->part_ptr) continue; /* Only test one partition */ if (job_cnt++ > max_sched_job_cnt) { debug2("scheduling loop exiting after %d jobs", max_sched_job_cnt); break; } /* Determine minimum and maximum node counts */ /* On BlueGene systems don't adjust the min/max node limits here. We are working on midplane values. */ min_nodes = MAX(job_ptr->details->min_nodes, part_ptr->min_nodes); if (job_ptr->details->max_nodes == 0) max_nodes = part_ptr->max_nodes; else max_nodes = MIN(job_ptr->details->max_nodes, part_ptr->max_nodes); max_nodes = MIN(max_nodes, 500000); /* prevent overflows */ if (job_ptr->details->max_nodes) req_nodes = max_nodes; else req_nodes = min_nodes; if (min_nodes > max_nodes) { /* job's min_nodes exceeds partition's max_nodes */ continue; } j = job_test_resv(job_ptr, &now, true, &avail_bitmap, &exc_core_bitmap, &resv_overlap, false); if (j != SLURM_SUCCESS) { FREE_NULL_BITMAP(avail_bitmap); FREE_NULL_BITMAP(exc_core_bitmap); continue; } rc = select_g_job_test(job_ptr, avail_bitmap, min_nodes, max_nodes, req_nodes, SELECT_MODE_WILL_RUN, NULL, NULL, exc_core_bitmap); if (rc == SLURM_SUCCESS) { last_job_update = now; if (job_ptr->time_limit == INFINITE) time_limit = 365 * 24 * 60 * 60; else if (job_ptr->time_limit != NO_VAL) time_limit = job_ptr->time_limit * 60; else if (job_ptr->part_ptr && (job_ptr->part_ptr->max_time != INFINITE)) time_limit = job_ptr->part_ptr->max_time * 60; else time_limit = 365 * 24 * 60 * 60; if (bit_overlap_any(alloc_bitmap, avail_bitmap) && (job_ptr->start_time <= last_job_alloc)) { job_ptr->start_time = last_job_alloc; } bit_or(alloc_bitmap, avail_bitmap); last_job_alloc = job_ptr->start_time + time_limit; } FREE_NULL_BITMAP(avail_bitmap); FREE_NULL_BITMAP(exc_core_bitmap); if ((time(NULL) - sched_start) >= sched_timeout) { debug2("scheduling loop exiting after %d jobs", max_sched_job_cnt); break; } } FREE_NULL_LIST(job_queue); FREE_NULL_BITMAP(alloc_bitmap); } /* Note that slurm.conf has changed */ extern void builtin_reconfig(void) { config_flag = true; } /* builtin_agent - detached thread periodically when pending jobs can start */ extern void *builtin_agent(void *args) { time_t now; double wait_time; static time_t last_sched_time = 0; /* Read config, nodes and partitions; Write jobs */ slurmctld_lock_t all_locks = { READ_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK, READ_LOCK }; _load_config(); last_sched_time = time(NULL); while (!stop_builtin) { _my_sleep(builtin_interval); if (stop_builtin) break; if (config_flag) { config_flag = false; _load_config(); } now = time(NULL); wait_time = difftime(now, last_sched_time); if ((wait_time < builtin_interval)) continue; lock_slurmctld(all_locks); _compute_start_times(); last_sched_time = time(NULL); (void) bb_g_job_try_stage_in(); unlock_slurmctld(all_locks); } return NULL; }