90 lines
2.6 KiB
YAML
90 lines
2.6 KiB
YAML
|
|
---
|
||
|
|
- name: Inspect current Slurm and Munge state
|
||
|
|
hosts: slurm_cluster
|
||
|
|
become: true
|
||
|
|
gather_facts: true
|
||
|
|
|
||
|
|
tasks:
|
||
|
|
- name: Basic host info
|
||
|
|
ansible.builtin.shell: |
|
||
|
|
set -e
|
||
|
|
echo "HOST=$(hostname -f 2>/dev/null || hostname)"
|
||
|
|
echo "SHORT_HOST=$(hostname -s)"
|
||
|
|
echo "IP_ADDRESSES=$(hostname -I)"
|
||
|
|
echo "OS=$(lsb_release -ds 2>/dev/null || cat /etc/os-release | grep PRETTY_NAME || true)"
|
||
|
|
echo "KERNEL=$(uname -r)"
|
||
|
|
args:
|
||
|
|
executable: /bin/bash
|
||
|
|
register: host_info
|
||
|
|
changed_when: false
|
||
|
|
|
||
|
|
- name: Slurm package info
|
||
|
|
ansible.builtin.shell: |
|
||
|
|
dpkg -l | grep -Ei 'slurm|munge' || true
|
||
|
|
args:
|
||
|
|
executable: /bin/bash
|
||
|
|
register: package_info
|
||
|
|
changed_when: false
|
||
|
|
|
||
|
|
- name: Slurm config paths
|
||
|
|
ansible.builtin.shell: |
|
||
|
|
set -e
|
||
|
|
for p in /etc/slurm /etc/slurm-llnl /etc/munge; do
|
||
|
|
echo "### $p"
|
||
|
|
if [ -e "$p" ]; then
|
||
|
|
find "$p" -maxdepth 2 -type f -printf "%m %u %g %p\n" | sort
|
||
|
|
else
|
||
|
|
echo "MISSING"
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
args:
|
||
|
|
executable: /bin/bash
|
||
|
|
register: config_paths
|
||
|
|
changed_when: false
|
||
|
|
|
||
|
|
- name: Service state
|
||
|
|
ansible.builtin.shell: |
|
||
|
|
for s in munge slurmctld slurmd; do
|
||
|
|
echo "### $s"
|
||
|
|
systemctl is-enabled "$s" 2>/dev/null || true
|
||
|
|
systemctl is-active "$s" 2>/dev/null || true
|
||
|
|
done
|
||
|
|
args:
|
||
|
|
executable: /bin/bash
|
||
|
|
register: service_state
|
||
|
|
changed_when: false
|
||
|
|
|
||
|
|
- name: Slurm commands
|
||
|
|
ansible.builtin.shell: |
|
||
|
|
echo "### which"
|
||
|
|
command -v sinfo || true
|
||
|
|
command -v scontrol || true
|
||
|
|
command -v sbatch || true
|
||
|
|
command -v srun || true
|
||
|
|
command -v munge || true
|
||
|
|
command -v unmunge || true
|
||
|
|
|
||
|
|
echo "### sinfo"
|
||
|
|
sinfo 2>&1 || true
|
||
|
|
|
||
|
|
echo "### scontrol ping"
|
||
|
|
scontrol ping 2>&1 || true
|
||
|
|
args:
|
||
|
|
executable: /bin/bash
|
||
|
|
register: slurm_commands
|
||
|
|
changed_when: false
|
||
|
|
|
||
|
|
- name: Show inspection report
|
||
|
|
ansible.builtin.debug:
|
||
|
|
msg:
|
||
|
|
- "===== {{ inventory_hostname }} :: host_info ====="
|
||
|
|
- "{{ host_info.stdout_lines }}"
|
||
|
|
- "===== {{ inventory_hostname }} :: packages ====="
|
||
|
|
- "{{ package_info.stdout_lines }}"
|
||
|
|
- "===== {{ inventory_hostname }} :: config_paths ====="
|
||
|
|
- "{{ config_paths.stdout_lines }}"
|
||
|
|
- "===== {{ inventory_hostname }} :: services ====="
|
||
|
|
- "{{ service_state.stdout_lines }}"
|
||
|
|
- "===== {{ inventory_hostname }} :: slurm_commands ====="
|
||
|
|
- "{{ slurm_commands.stdout_lines }}"
|