Add Slurm AI/HPC cluster platform project
This commit is contained in:
+98
@@ -0,0 +1,98 @@
|
||||
---
|
||||
- name: Restore-check latest SlurmDBD backup into test database
|
||||
hosts: slurm_controller
|
||||
become: true
|
||||
gather_facts: false
|
||||
|
||||
vars:
|
||||
restore_check_db: "{{ slurmdbd_storage_loc }}_restorecheck"
|
||||
slurmdbd_backup_dir: /var/backups/slurmdbd
|
||||
|
||||
tasks:
|
||||
- name: Validate MariaDB is running
|
||||
ansible.builtin.command:
|
||||
cmd: systemctl is-active mariadb
|
||||
changed_when: false
|
||||
|
||||
- name: Find latest SlurmDBD backup
|
||||
ansible.builtin.shell: |
|
||||
set -euo pipefail
|
||||
ls -1t {{ slurmdbd_backup_dir }}/{{ slurmdbd_storage_loc }}-*.sql.gz | head -n 1
|
||||
args:
|
||||
executable: /bin/bash
|
||||
register: latest_backup
|
||||
changed_when: false
|
||||
|
||||
- name: Validate latest backup exists
|
||||
ansible.builtin.stat:
|
||||
path: "{{ latest_backup.stdout }}"
|
||||
register: latest_backup_stat
|
||||
|
||||
- name: Fail if latest backup is missing or empty
|
||||
ansible.builtin.fail:
|
||||
msg: "Latest SlurmDBD backup is missing or empty: {{ latest_backup.stdout }}"
|
||||
when:
|
||||
- not latest_backup_stat.stat.exists or latest_backup_stat.stat.size | int < 1024
|
||||
|
||||
- name: Recreate restore-check database
|
||||
ansible.builtin.shell: |
|
||||
set -euo pipefail
|
||||
mysql <<SQL
|
||||
DROP DATABASE IF EXISTS {{ restore_check_db }};
|
||||
CREATE DATABASE {{ restore_check_db }};
|
||||
SQL
|
||||
args:
|
||||
executable: /bin/bash
|
||||
changed_when: true
|
||||
|
||||
- name: Import backup into restore-check database
|
||||
ansible.builtin.shell: |
|
||||
set -euo pipefail
|
||||
zcat "{{ latest_backup.stdout }}" | mysql {{ restore_check_db }}
|
||||
args:
|
||||
executable: /bin/bash
|
||||
changed_when: true
|
||||
|
||||
- name: Validate restored table count
|
||||
ansible.builtin.shell: |
|
||||
set -euo pipefail
|
||||
mysql -N -B -e "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema='{{ restore_check_db }}';"
|
||||
args:
|
||||
executable: /bin/bash
|
||||
register: restored_tables
|
||||
changed_when: false
|
||||
failed_when: restored_tables.stdout | int < 1
|
||||
|
||||
- name: Validate restored row count sample
|
||||
ansible.builtin.shell: |
|
||||
set -euo pipefail
|
||||
|
||||
echo "### restored database"
|
||||
echo "{{ restore_check_db }}"
|
||||
|
||||
echo
|
||||
echo "### table count"
|
||||
mysql -N -B -e "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema='{{ restore_check_db }}';"
|
||||
|
||||
echo
|
||||
echo "### largest tables"
|
||||
mysql -N -B -e "
|
||||
SELECT table_name, table_rows
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema='{{ restore_check_db }}'
|
||||
ORDER BY table_rows DESC
|
||||
LIMIT 10;
|
||||
"
|
||||
args:
|
||||
executable: /bin/bash
|
||||
register: restore_check_summary
|
||||
changed_when: false
|
||||
|
||||
- name: Show restore-check result
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "Imported backup: {{ latest_backup.stdout }}"
|
||||
- "Restore-check DB: {{ restore_check_db }}"
|
||||
- "Restored tables: {{ restored_tables.stdout }}"
|
||||
- "Summary:"
|
||||
- "{{ restore_check_summary.stdout_lines }}"
|
||||
Reference in New Issue
Block a user