Add Slurm AI/HPC cluster platform project
This commit is contained in:
+126
@@ -0,0 +1,126 @@
|
||||
---
|
||||
- name: Initialize Slurm accounting entities
|
||||
hosts: slurm_controller
|
||||
become: true
|
||||
gather_facts: false
|
||||
|
||||
tasks:
|
||||
- name: Wait for sacctmgr connectivity
|
||||
ansible.builtin.command:
|
||||
cmd: sacctmgr -n list cluster
|
||||
register: sacctmgr_cluster_list
|
||||
retries: 20
|
||||
delay: 2
|
||||
until: sacctmgr_cluster_list.rc == 0
|
||||
changed_when: false
|
||||
|
||||
- name: Show current accounting state before changes
|
||||
ansible.builtin.shell: |
|
||||
set -euo pipefail
|
||||
|
||||
echo "### clusters"
|
||||
sacctmgr list cluster format=Cluster,ControlHost,ControlPort,RPC
|
||||
|
||||
echo
|
||||
echo "### accounts"
|
||||
sacctmgr list account format=Account,Descr,Org
|
||||
|
||||
echo
|
||||
echo "### users"
|
||||
sacctmgr list user format=User,DefaultAccount,Admin
|
||||
|
||||
echo
|
||||
echo "### associations"
|
||||
sacctmgr list assoc format=Cluster,Account,User,Partition,Share,QOS,DefaultQOS
|
||||
args:
|
||||
executable: /bin/bash
|
||||
register: accounting_state_before
|
||||
changed_when: false
|
||||
|
||||
- name: Print current accounting state before changes
|
||||
ansible.builtin.debug:
|
||||
var: accounting_state_before.stdout_lines
|
||||
|
||||
- name: Ensure Slurm cluster exists in accounting DB
|
||||
ansible.builtin.shell: |
|
||||
set -euo pipefail
|
||||
|
||||
if sacctmgr -n list cluster format=Cluster | awk '{print $1}' | grep -qx "{{ slurm_cluster_name }}"; then
|
||||
echo "Cluster {{ slurm_cluster_name }} already exists"
|
||||
else
|
||||
sacctmgr -i add cluster {{ slurm_cluster_name }}
|
||||
fi
|
||||
args:
|
||||
executable: /bin/bash
|
||||
register: ensure_cluster
|
||||
changed_when: "'Adding Cluster' in ensure_cluster.stdout"
|
||||
|
||||
- name: Ensure default lab account exists for cluster
|
||||
ansible.builtin.shell: |
|
||||
set -euo pipefail
|
||||
|
||||
if sacctmgr -n list assoc format=Cluster,Account,User | awk '$1=="{{ slurm_cluster_name }}" && $2=="{{ slurm_account_name }}" && $3=="" {found=1} END {exit !found}'; then
|
||||
echo "Account {{ slurm_account_name }} already associated with cluster {{ slurm_cluster_name }}"
|
||||
else
|
||||
sacctmgr -i add account {{ slurm_account_name }} \
|
||||
Cluster={{ slurm_cluster_name }} \
|
||||
Description="{{ slurm_account_description }}" \
|
||||
Organization="{{ slurm_account_organization }}"
|
||||
fi
|
||||
args:
|
||||
executable: /bin/bash
|
||||
register: ensure_account
|
||||
changed_when: "'Adding Account' in ensure_account.stdout"
|
||||
|
||||
- name: Ensure slurmuser exists with lab account association
|
||||
ansible.builtin.shell: |
|
||||
set -euo pipefail
|
||||
|
||||
if sacctmgr -n list assoc format=Cluster,Account,User | awk '$1=="{{ slurm_cluster_name }}" && $2=="{{ slurm_account_name }}" && $3=="slurmuser" {found=1} END {exit !found}'; then
|
||||
echo "User slurmuser already associated with account {{ slurm_account_name }} on cluster {{ slurm_cluster_name }}"
|
||||
else
|
||||
sacctmgr -i add user slurmuser \
|
||||
Cluster={{ slurm_cluster_name }} \
|
||||
Account={{ slurm_account_name }} \
|
||||
DefaultAccount={{ slurm_account_name }}
|
||||
fi
|
||||
args:
|
||||
executable: /bin/bash
|
||||
register: ensure_user_assoc
|
||||
changed_when: "'Adding User' in ensure_user_assoc.stdout"
|
||||
|
||||
- name: Ensure slurmuser has default account set
|
||||
ansible.builtin.shell: |
|
||||
set -euo pipefail
|
||||
sacctmgr -i modify user where name=slurmuser set DefaultAccount={{ slurm_account_name }}
|
||||
args:
|
||||
executable: /bin/bash
|
||||
register: set_default_account
|
||||
changed_when: "'Modified user' in (set_default_account.stdout + set_default_account.stderr)"
|
||||
|
||||
- name: Show final accounting state
|
||||
ansible.builtin.shell: |
|
||||
set -euo pipefail
|
||||
|
||||
echo "### clusters"
|
||||
sacctmgr list cluster format=Cluster,ControlHost,ControlPort,RPC
|
||||
|
||||
echo
|
||||
echo "### accounts"
|
||||
sacctmgr list account format=Account,Descr,Org
|
||||
|
||||
echo
|
||||
echo "### users"
|
||||
sacctmgr list user format=User,DefaultAccount,Admin
|
||||
|
||||
echo
|
||||
echo "### associations"
|
||||
sacctmgr list assoc format=Cluster,Account,User,Partition,Share,QOS,DefaultQOS
|
||||
args:
|
||||
executable: /bin/bash
|
||||
register: accounting_state_after
|
||||
changed_when: false
|
||||
|
||||
- name: Print final accounting state
|
||||
ansible.builtin.debug:
|
||||
var: accounting_state_after.stdout_lines
|
||||
Reference in New Issue
Block a user