59 lines
1.6 KiB
YAML
59 lines
1.6 KiB
YAML
|
|
---
|
||
|
|
- name: Test GPU access without GRES allocation
|
||
|
|
hosts: slurm_controller
|
||
|
|
become: true
|
||
|
|
gather_facts: false
|
||
|
|
|
||
|
|
tasks:
|
||
|
|
- name: Submit job to gpu01 without requesting GPU
|
||
|
|
ansible.builtin.shell: |
|
||
|
|
set -euo pipefail
|
||
|
|
|
||
|
|
job_id="$(
|
||
|
|
sudo -iu slurmuser sbatch --parsable <<'SBATCH'
|
||
|
|
#!/bin/bash
|
||
|
|
#SBATCH --job-name=gpu-deny-test
|
||
|
|
#SBATCH --partition=all
|
||
|
|
#SBATCH --nodelist=gpu01
|
||
|
|
#SBATCH --cpus-per-task=1
|
||
|
|
#SBATCH --mem=1G
|
||
|
|
#SBATCH --time=00:02:00
|
||
|
|
#SBATCH --output=/shared/gpu-deny-test-%j.out
|
||
|
|
|
||
|
|
echo "HOST=$(hostname)"
|
||
|
|
echo "SLURM_JOB_ID=$SLURM_JOB_ID"
|
||
|
|
echo "SLURM_JOB_NODELIST=$SLURM_JOB_NODELIST"
|
||
|
|
echo "SLURM_JOB_GPUS=${SLURM_JOB_GPUS:-}"
|
||
|
|
echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}"
|
||
|
|
echo "CPUS_ALLOWED=$(grep Cpus_allowed_list /proc/self/status)"
|
||
|
|
echo
|
||
|
|
echo "### ls nvidia devices"
|
||
|
|
ls -l /dev/nvidia* 2>&1 || true
|
||
|
|
echo
|
||
|
|
echo "### nvidia-smi without GRES"
|
||
|
|
nvidia-smi 2>&1 || true
|
||
|
|
SBATCH
|
||
|
|
)"
|
||
|
|
|
||
|
|
echo "JOB_ID=$job_id"
|
||
|
|
|
||
|
|
for i in $(seq 1 60); do
|
||
|
|
if sudo -iu slurmuser squeue -h -j "$job_id" | grep -q .; then
|
||
|
|
sudo -iu slurmuser squeue -j "$job_id"
|
||
|
|
sleep 1
|
||
|
|
else
|
||
|
|
break
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
|
||
|
|
echo "### output"
|
||
|
|
cat "/shared/gpu-deny-test-${job_id}.out"
|
||
|
|
args:
|
||
|
|
executable: /bin/bash
|
||
|
|
register: gpu_deny_result
|
||
|
|
changed_when: true
|
||
|
|
|
||
|
|
- name: Show GPU deny test result
|
||
|
|
ansible.builtin.debug:
|
||
|
|
var: gpu_deny_result.stdout_lines
|