Add Slurm AI/HPC cluster platform project
This commit is contained in:
@@ -0,0 +1,112 @@
|
||||
---
|
||||
- name: Fix sudo permissions for slurmuser Slurm operations
|
||||
hosts: slurm_cluster
|
||||
become: true
|
||||
gather_facts: false
|
||||
|
||||
vars:
|
||||
slurm_operator_user: slurmuser
|
||||
|
||||
tasks:
|
||||
- name: Configure sudoers for slurmuser on controller
|
||||
ansible.builtin.copy:
|
||||
dest: /etc/sudoers.d/91-slurmuser-slurm-controller
|
||||
owner: root
|
||||
group: root
|
||||
mode: "0440"
|
||||
content: |
|
||||
# Managed by Ansible
|
||||
|
||||
Cmnd_Alias SLURM_SYSTEMCTL_CONTROLLER = \
|
||||
/bin/systemctl status slurmctld, \
|
||||
/bin/systemctl status slurmctld *, \
|
||||
/bin/systemctl restart slurmctld, \
|
||||
/bin/systemctl reload slurmctld, \
|
||||
/bin/systemctl start slurmctld, \
|
||||
/bin/systemctl stop slurmctld, \
|
||||
/bin/systemctl status slurmd, \
|
||||
/bin/systemctl status slurmd *, \
|
||||
/bin/systemctl restart slurmd, \
|
||||
/bin/systemctl reload slurmd, \
|
||||
/bin/systemctl start slurmd, \
|
||||
/bin/systemctl stop slurmd, \
|
||||
/usr/bin/systemctl status slurmctld, \
|
||||
/usr/bin/systemctl status slurmctld *, \
|
||||
/usr/bin/systemctl restart slurmctld, \
|
||||
/usr/bin/systemctl reload slurmctld, \
|
||||
/usr/bin/systemctl start slurmctld, \
|
||||
/usr/bin/systemctl stop slurmctld, \
|
||||
/usr/bin/systemctl status slurmd, \
|
||||
/usr/bin/systemctl status slurmd *, \
|
||||
/usr/bin/systemctl restart slurmd, \
|
||||
/usr/bin/systemctl reload slurmd, \
|
||||
/usr/bin/systemctl start slurmd, \
|
||||
/usr/bin/systemctl stop slurmd
|
||||
|
||||
Cmnd_Alias SLURM_JOURNAL_CONTROLLER = \
|
||||
/bin/journalctl -u slurmctld, \
|
||||
/bin/journalctl -u slurmctld *, \
|
||||
/bin/journalctl -u slurmd, \
|
||||
/bin/journalctl -u slurmd *, \
|
||||
/usr/bin/journalctl -u slurmctld, \
|
||||
/usr/bin/journalctl -u slurmctld *, \
|
||||
/usr/bin/journalctl -u slurmd, \
|
||||
/usr/bin/journalctl -u slurmd *
|
||||
|
||||
Cmnd_Alias SLURM_COMMANDS = \
|
||||
/usr/bin/scontrol, /usr/bin/scontrol *, \
|
||||
/usr/bin/sinfo, /usr/bin/sinfo *, \
|
||||
/usr/bin/squeue, /usr/bin/squeue *, \
|
||||
/usr/bin/scancel, /usr/bin/scancel *, \
|
||||
/usr/bin/sacct, /usr/bin/sacct *, \
|
||||
/usr/bin/sacctmgr, /usr/bin/sacctmgr *, \
|
||||
/usr/bin/sbatch, /usr/bin/sbatch *, \
|
||||
/usr/bin/srun, /usr/bin/srun *, \
|
||||
/usr/bin/salloc, /usr/bin/salloc *
|
||||
|
||||
{{ slurm_operator_user }} ALL=(root) NOPASSWD: SLURM_SYSTEMCTL_CONTROLLER, SLURM_JOURNAL_CONTROLLER, SLURM_COMMANDS
|
||||
validate: "visudo -cf %s"
|
||||
when: inventory_hostname in groups['slurm_controller']
|
||||
|
||||
- name: Configure sudoers for slurmuser on compute and GPU nodes
|
||||
ansible.builtin.copy:
|
||||
dest: /etc/sudoers.d/91-slurmuser-slurm-compute
|
||||
owner: root
|
||||
group: root
|
||||
mode: "0440"
|
||||
content: |
|
||||
# Managed by Ansible
|
||||
|
||||
Cmnd_Alias SLURM_SYSTEMCTL_COMPUTE = \
|
||||
/bin/systemctl status slurmd, \
|
||||
/bin/systemctl status slurmd *, \
|
||||
/bin/systemctl restart slurmd, \
|
||||
/bin/systemctl reload slurmd, \
|
||||
/bin/systemctl start slurmd, \
|
||||
/bin/systemctl stop slurmd, \
|
||||
/usr/bin/systemctl status slurmd, \
|
||||
/usr/bin/systemctl status slurmd *, \
|
||||
/usr/bin/systemctl restart slurmd, \
|
||||
/usr/bin/systemctl reload slurmd, \
|
||||
/usr/bin/systemctl start slurmd, \
|
||||
/usr/bin/systemctl stop slurmd
|
||||
|
||||
Cmnd_Alias SLURM_JOURNAL_COMPUTE = \
|
||||
/bin/journalctl -u slurmd, \
|
||||
/bin/journalctl -u slurmd *, \
|
||||
/usr/bin/journalctl -u slurmd, \
|
||||
/usr/bin/journalctl -u slurmd *
|
||||
|
||||
Cmnd_Alias SLURM_COMMANDS = \
|
||||
/usr/bin/scontrol, /usr/bin/scontrol *, \
|
||||
/usr/bin/sinfo, /usr/bin/sinfo *, \
|
||||
/usr/bin/squeue, /usr/bin/squeue *, \
|
||||
/usr/bin/scancel, /usr/bin/scancel *, \
|
||||
/usr/bin/sacct, /usr/bin/sacct *, \
|
||||
/usr/bin/sbatch, /usr/bin/sbatch *, \
|
||||
/usr/bin/srun, /usr/bin/srun *, \
|
||||
/usr/bin/salloc, /usr/bin/salloc *
|
||||
|
||||
{{ slurm_operator_user }} ALL=(root) NOPASSWD: SLURM_SYSTEMCTL_COMPUTE, SLURM_JOURNAL_COMPUTE, SLURM_COMMANDS
|
||||
validate: "visudo -cf %s"
|
||||
when: inventory_hostname not in groups['slurm_controller']
|
||||
Reference in New Issue
Block a user