Files
Mateusz Suski ca5a876d03
lint / shell-yaml-ansible (push) Failing after 21s
Improve infra-run portfolio credibility
2026-05-08 21:18:22 +00:00

88 lines
2.2 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck source=00_env.sh
. "$SCRIPT_DIR/00_env.sh"
usage() {
printf 'Usage: %s --fs <filesystem>\n' "$(basename "$0")"
}
while [[ "$#" -gt 0 ]]; do
case "$1" in
--fs) FILESYSTEM="${2:-}"; shift 2 ;;
-h|--help) usage; exit 0 ;;
*) critical "Unknown argument: $1"; usage; exit 2 ;;
esac
done
if [[ -z "$FILESYSTEM" ]]; then
critical "Missing required --fs <filesystem>"
usage
exit 2
fi
issues=0
run_check() {
local description="$1"
shift
section "$description"
if command -v "$1" >/dev/null 2>&1; then
"$@" 2>&1 | tee -a "$LOG_FILE" || {
critical "$description failed"
issues=1
}
else
warning "$1 command not available, skipping"
fi
}
run_check "GPFS daemon state" mmgetstate -a
run_check "Target filesystem mount state" mmlsmount "$FILESYSTEM"
run_check "Target filesystem disks" mmlsdisk "$FILESYSTEM"
run_check "NSD inventory" mmlsnsd
section "Filesystem capacity"
if command -v df >/dev/null 2>&1; then
df -h 2>&1 | awk -v fs="$FILESYSTEM" 'NR == 1 || $0 ~ fs || $0 ~ /gpfs|mmfs/' | tee -a "$LOG_FILE"
else
warning "df command not available, skipping"
fi
section "Cluster health"
if command -v mmhealth >/dev/null 2>&1; then
health_output="$(mmhealth cluster show 2>&1 || true)"
printf '%s\n' "$health_output" | tee -a "$LOG_FILE"
if printf '%s\n' "$health_output" | grep -Eiq 'degraded|failed|down|error|unhealthy'; then
critical "Cluster health output indicates an issue"
issues=1
fi
else
warning "mmhealth command not available, skipping"
fi
section "Recent GPFS journal entries"
if command -v journalctl >/dev/null 2>&1; then
journalctl -u 'gpfs*' -n 50 --no-pager 2>&1 | tee -a "$LOG_FILE" || warning "journalctl GPFS query failed"
else
warning "journalctl command not available, skipping"
fi
section "Recent kernel messages"
if command -v dmesg >/dev/null 2>&1; then
dmesg -T 2>/dev/null | tail -50 | tee -a "$LOG_FILE" || warning "dmesg query failed"
else
warning "dmesg command not available, skipping"
fi
if [[ "$issues" -eq 0 ]]; then
ok "Post-check completed without detected operational failures"
exit 0
fi
critical "Post-check detected one or more issues"
exit 1