Add GPFS storage expansion toolkit
This commit is contained in:
+89
@@ -0,0 +1,89 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
# shellcheck source=00_env.sh
|
||||
. "$SCRIPT_DIR/00_env.sh"
|
||||
|
||||
usage() {
|
||||
printf 'Usage: %s --fs <filesystem>\n' "$(basename "$0")"
|
||||
}
|
||||
|
||||
while [[ "$#" -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--fs) FILESYSTEM="${2:-}"; shift 2 ;;
|
||||
-h|--help) usage; exit 0 ;;
|
||||
*) critical "Unknown argument: $1"; usage; exit 2 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$FILESYSTEM" ]]; then
|
||||
critical "Missing required --fs <filesystem>"
|
||||
usage
|
||||
exit 2
|
||||
fi
|
||||
|
||||
issues=0
|
||||
|
||||
run_check() {
|
||||
local description="$1"
|
||||
shift
|
||||
|
||||
section "$description"
|
||||
if command -v "$1" >/dev/null 2>&1; then
|
||||
"$@" 2>&1 | tee -a "$LOG_FILE" || {
|
||||
critical "$description failed"
|
||||
issues=1
|
||||
}
|
||||
else
|
||||
warning "$1 command not available, skipping"
|
||||
fi
|
||||
}
|
||||
|
||||
run_check "GPFS daemon state" mmgetstate -a
|
||||
run_check "Target filesystem mount state" mmlsmount "$FILESYSTEM"
|
||||
run_check "Target filesystem disks" mmlsdisk "$FILESYSTEM"
|
||||
run_check "NSD inventory" mmlsnsd
|
||||
|
||||
section "Filesystem capacity"
|
||||
if command -v df >/dev/null 2>&1; then
|
||||
df -h 2>&1 | awk -v fs="$FILESYSTEM" 'NR == 1 || $0 ~ fs || $0 ~ /gpfs|mmfs/' | tee -a "$LOG_FILE"
|
||||
else
|
||||
warning "df command not available, skipping"
|
||||
fi
|
||||
|
||||
section "Cluster health"
|
||||
if command -v mmhealth >/dev/null 2>&1; then
|
||||
health_output="$(mmhealth cluster show 2>&1 || true)"
|
||||
printf '%s\n' "$health_output" | tee -a "$LOG_FILE"
|
||||
if printf '%s\n' "$health_output" | grep -Eiq 'degraded|failed|down|error|unhealthy'; then
|
||||
critical "Cluster health output indicates an issue"
|
||||
issues=1
|
||||
fi
|
||||
else
|
||||
warning "mmhealth command not available, skipping"
|
||||
fi
|
||||
|
||||
section "Recent GPFS journal entries"
|
||||
if command -v journalctl >/dev/null 2>&1; then
|
||||
journalctl -u 'gpfs*' -n 50 --no-pager 2>&1 | tee -a "$LOG_FILE" || warning "journalctl GPFS query failed"
|
||||
else
|
||||
warning "journalctl command not available, skipping"
|
||||
fi
|
||||
|
||||
section "Recent kernel messages"
|
||||
if command -v dmesg >/dev/null 2>&1; then
|
||||
dmesg -T 2>/dev/null | tail -50 | tee -a "$LOG_FILE" || warning "dmesg query failed"
|
||||
else
|
||||
warning "dmesg command not available, skipping"
|
||||
fi
|
||||
|
||||
if [[ "$issues" -eq 0 ]]; then
|
||||
ok "Post-check completed without detected operational failures"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
critical "Post-check detected one or more issues"
|
||||
exit 1
|
||||
Reference in New Issue
Block a user