Files
portfolio/infra-run/scripts/bash/gpfs/02_precheck_gpfs.sh
T

104 lines
2.6 KiB
Bash
Raw Normal View History

2026-05-05 21:40:46 +00:00
#!/usr/bin/env bash
set -o errexit
set -o nounset
set -o pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck source=00_env.sh
. "$SCRIPT_DIR/00_env.sh"
usage() {
printf 'Usage: %s --fs <filesystem>\n' "$(basename "$0")"
}
while [[ "$#" -gt 0 ]]; do
case "$1" in
--fs)
FILESYSTEM="${2:-}"
shift 2
;;
-h|--help)
usage
exit 0
;;
*)
critical "Unknown argument: $1"
usage
exit 2
;;
esac
done
if [[ -z "$FILESYSTEM" ]]; then
critical "Missing required --fs <filesystem>"
usage
exit 2
fi
missing=0
for cmd in mmgetstate mmlscluster mmlsfs mmlsdisk mmlsmount mmlsmgr df; do
require_cmd "$cmd" || missing=1
done
if [[ "$missing" -ne 0 ]]; then
exit 2
fi
issues=0
section "GPFS daemon state"
state_output="$(mmgetstate -a 2>&1 || true)"
printf '%s\n' "$state_output" | tee -a "$LOG_FILE"
if printf '%s\n' "$state_output" | awk 'NR > 1 && $0 !~ / active / { found=1 } END { exit found ? 0 : 1 }'; then
warning "Not all GPFS nodes appear active"
fi
section "Target filesystem definition"
if mmlsfs "$FILESYSTEM" 2>&1 | tee -a "$LOG_FILE"; then
ok "Filesystem exists: $FILESYSTEM"
else
critical "Filesystem does not exist or cannot be queried: $FILESYSTEM"
exit 1
fi
section "Target filesystem mount state"
mount_output="$(mmlsmount "$FILESYSTEM" 2>&1 || true)"
printf '%s\n' "$mount_output" | tee -a "$LOG_FILE"
if printf '%s\n' "$mount_output" | grep -Eiq 'not mounted|no file systems were found|not found'; then
warning "Filesystem may not be mounted anywhere: $FILESYSTEM"
fi
section "Existing disks"
if ! mmlsdisk "$FILESYSTEM" 2>&1 | tee -a "$LOG_FILE"; then
critical "Unable to list disks for filesystem: $FILESYSTEM"
issues=1
fi
section "Filesystem capacity"
df -h 2>&1 | awk -v fs="$FILESYSTEM" 'NR == 1 || $0 ~ fs || $0 ~ /gpfs|mmfs/' | tee -a "$LOG_FILE"
section "Cluster health"
if command -v mmhealth >/dev/null 2>&1; then
health_output="$(mmhealth cluster show 2>&1 || true)"
printf '%s\n' "$health_output" | tee -a "$LOG_FILE"
if printf '%s\n' "$health_output" | grep -Eiq 'degraded|failed|down|error|unhealthy'; then
warning "Cluster health output indicates a degraded condition"
fi
else
warning "mmhealth command not available, skipping health check"
fi
section "Managers and quorum"
mmlsmgr 2>&1 | tee -a "$LOG_FILE" || {
critical "Unable to query GPFS manager/quorum information"
issues=1
}
if [[ "$issues" -eq 0 ]]; then
ok "Precheck completed for filesystem: $FILESYSTEM"
exit 0
fi
critical "Precheck found operational validation failures"
exit 1