Add AI lab maintenance toolkit
lint / shell-yaml-ansible (push) Failing after 17s

This commit is contained in:
Mateusz Suski
2026-06-06 00:10:44 +00:00
parent 1843796e92
commit 8cb92de06f
21 changed files with 1031 additions and 0 deletions
+111
View File
@@ -0,0 +1,111 @@
#!/usr/bin/env bash
set -o errexit
set -o nounset
set -o pipefail
section() {
printf '\n== %s ==\n' "$1"
}
run_optional() {
local description="$1"
shift
if "$@"; then
return 0
fi
printf 'WARNING: %s failed\n' "$description"
return 0
}
section "Host identity"
if command -v hostnamectl >/dev/null 2>&1; then
run_optional "hostnamectl" hostnamectl
else
run_optional "hostname" hostname
fi
run_optional "kernel information" uname -a
run_optional "uptime" uptime
section "Memory"
if command -v free >/dev/null 2>&1; then
run_optional "memory report" free -h
else
printf 'WARNING: free is not available\n'
fi
section "Filesystems"
if command -v df >/dev/null 2>&1; then
run_optional "filesystem report" df -hT
printf '\nKey mountpoints present:\n'
for mountpoint in / /boot /var /srv /opt /home; do
if findmnt -rn --target "$mountpoint" >/dev/null 2>&1; then
run_optional "filesystem report for $mountpoint" df -hT "$mountpoint"
fi
done
else
printf 'WARNING: df is not available\n'
fi
section "Journal usage"
if command -v journalctl >/dev/null 2>&1; then
run_optional "journal disk usage" journalctl --disk-usage
else
printf 'WARNING: journalctl is not available\n'
fi
section "Docker"
if command -v docker >/dev/null 2>&1; then
if command -v systemctl >/dev/null 2>&1; then
run_optional "Docker service state" systemctl is-active docker
fi
run_optional "Docker container list" docker ps --format 'table {{.Names}}\t{{.Image}}\t{{.Status}}\t{{.Ports}}'
run_optional "Docker disk usage" docker system df
else
printf 'INFO: Docker is not installed\n'
fi
section "Libvirt"
if command -v virsh >/dev/null 2>&1; then
if command -v systemctl >/dev/null 2>&1; then
run_optional "libvirtd service state" systemctl is-active libvirtd
fi
run_optional "libvirt guest list" virsh list --all
else
printf 'INFO: virsh is not installed\n'
fi
section "NVIDIA"
if command -v nvidia-smi >/dev/null 2>&1; then
run_optional "NVIDIA status" nvidia-smi
else
printf 'INFO: nvidia-smi is not installed\n'
fi
section "Failed systemd units"
if command -v systemctl >/dev/null 2>&1; then
run_optional "failed systemd unit report" systemctl --failed --no-pager
else
printf 'WARNING: systemctl is not available\n'
fi
section "SMART quick health"
if command -v smartctl >/dev/null 2>&1; then
shopt -s nullglob
devices=(/dev/sd? /dev/nvme?n?)
shopt -u nullglob
if ((${#devices[@]} == 0)); then
printf 'INFO: no matching SATA/SCSI or NVMe devices found\n'
else
for device in "${devices[@]}"; do
printf '\n-- %s --\n' "$device"
run_optional "SMART health check for $device" smartctl -H "$device"
done
fi
else
printf 'INFO: smartctl is not installed\n'
fi
exit 0