Files
portfolio/platform-projects/hpc-slurm-ai-cluster/playbooks/tests/test-sreport-usage.yml
T
2026-06-05 15:38:56 +00:00

61 lines
1.6 KiB
YAML

---
- name: Generate measurable Slurm usage for sreport
hosts: slurm_controller
become: true
gather_facts: false
tasks:
- name: Submit CPU usage job
ansible.builtin.shell: |
set -euo pipefail
job_id="$(
sudo -iu slurmuser sbatch --parsable <<'SBATCH'
#!/bin/bash
#SBATCH --job-name=sreport-usage
#SBATCH --partition=debug
#SBATCH --cpus-per-task=2
#SBATCH --mem=512M
#SBATCH --time=00:03:00
#SBATCH --output=/shared/sreport-usage-%j.out
echo "HOST=$(hostname)"
echo "SLURM_JOB_ID=$SLURM_JOB_ID"
echo "SLURM_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK:-}"
echo "CPUS_ALLOWED=$(grep Cpus_allowed_list /proc/self/status)"
echo "Burning CPU for 90 seconds"
timeout 90 bash -c 'while true; do :; done' &
timeout 90 bash -c 'while true; do :; done' &
wait
echo "Done"
date
SBATCH
)"
echo "JOB_ID=$job_id"
for i in $(seq 1 150); do
if squeue -h -j "$job_id" | grep -q .; then
squeue -j "$job_id"
sleep 2
else
break
fi
done
echo "### sacct"
sacct -j "$job_id" --format=JobID,JobName,User,Partition,State,ExitCode,Elapsed,AllocCPUS,ReqMem,NodeList
echo "### output"
cat "/shared/sreport-usage-${job_id}.out"
args:
executable: /bin/bash
register: sreport_usage_job
changed_when: true
- name: Show usage job result
ansible.builtin.debug:
var: sreport_usage_job.stdout_lines