84 lines
2.6 KiB
YAML
84 lines
2.6 KiB
YAML
---
|
|
- name: Backup Slurm and Munge state on all cluster nodes
|
|
hosts: slurm_cluster
|
|
become: true
|
|
gather_facts: true
|
|
|
|
vars:
|
|
backup_base_dir: /var/backups/slurm
|
|
|
|
tasks:
|
|
- name: Create backup base directory
|
|
ansible.builtin.file:
|
|
path: "{{ backup_base_dir }}"
|
|
state: directory
|
|
owner: root
|
|
group: root
|
|
mode: "0700"
|
|
|
|
- name: Create timestamped backup directory
|
|
ansible.builtin.shell: |
|
|
set -euo pipefail
|
|
ts="$(date +%F-%H%M%S)"
|
|
dir="{{ backup_base_dir }}/$ts"
|
|
mkdir -p "$dir"
|
|
echo "$dir"
|
|
args:
|
|
executable: /bin/bash
|
|
register: backup_dir_result
|
|
changed_when: true
|
|
|
|
- name: Store backup directory fact
|
|
ansible.builtin.set_fact:
|
|
node_backup_dir: "{{ backup_dir_result.stdout }}"
|
|
|
|
- name: Backup Slurm and Munge config/state if present
|
|
ansible.builtin.shell: |
|
|
set -euo pipefail
|
|
|
|
backup_dir="{{ node_backup_dir }}"
|
|
|
|
for p in \
|
|
/etc/slurm \
|
|
/etc/slurm-llnl \
|
|
/etc/munge \
|
|
/var/spool/slurmctld \
|
|
/var/spool/slurmd \
|
|
/var/log/slurm \
|
|
/var/log/slurm-llnl
|
|
do
|
|
if [ -e "$p" ]; then
|
|
cp -a "$p" "$backup_dir/"
|
|
fi
|
|
done
|
|
|
|
systemctl status munge --no-pager > "$backup_dir/systemctl-munge.txt" 2>&1 || true
|
|
systemctl status slurmctld --no-pager > "$backup_dir/systemctl-slurmctld.txt" 2>&1 || true
|
|
systemctl status slurmd --no-pager > "$backup_dir/systemctl-slurmd.txt" 2>&1 || true
|
|
|
|
journalctl -u munge -n 200 --no-pager > "$backup_dir/journal-munge.txt" 2>&1 || true
|
|
journalctl -u slurmctld -n 200 --no-pager > "$backup_dir/journal-slurmctld.txt" 2>&1 || true
|
|
journalctl -u slurmd -n 200 --no-pager > "$backup_dir/journal-slurmd.txt" 2>&1 || true
|
|
|
|
if command -v sinfo >/dev/null 2>&1; then
|
|
sinfo > "$backup_dir/sinfo.txt" 2>&1 || true
|
|
fi
|
|
|
|
if command -v scontrol >/dev/null 2>&1; then
|
|
scontrol show config > "$backup_dir/scontrol-show-config.txt" 2>&1 || true
|
|
scontrol show nodes > "$backup_dir/scontrol-show-nodes.txt" 2>&1 || true
|
|
scontrol show partitions > "$backup_dir/scontrol-show-partitions.txt" 2>&1 || true
|
|
fi
|
|
|
|
find "$backup_dir" -maxdepth 2 -type f -o -type d
|
|
args:
|
|
executable: /bin/bash
|
|
register: backup_content
|
|
changed_when: true
|
|
|
|
- name: Show backup location on node
|
|
ansible.builtin.debug:
|
|
msg:
|
|
- "Host: {{ inventory_hostname }}"
|
|
- "Backup directory: {{ node_backup_dir }}"
|