Add GPFS storage expansion toolkit
This commit is contained in:
Executable
+114
@@ -0,0 +1,114 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
TIMESTAMP="${TIMESTAMP:-$(date +%Y%m%d_%H%M%S)}"
|
||||
DRY_RUN="${DRY_RUN:-true}"
|
||||
LOG_FILE="${LOG_FILE:-/tmp/gpfs_extend_${TIMESTAMP}.log}"
|
||||
|
||||
FILESYSTEM="${FILESYSTEM:-}"
|
||||
NSD_STANZA="${NSD_STANZA:-}"
|
||||
FAILURE_GROUP="${FAILURE_GROUP:-}"
|
||||
STORAGE_POOL="${STORAGE_POOL:-system}"
|
||||
USAGE="${USAGE:-dataAndMetadata}"
|
||||
|
||||
log() {
|
||||
local level="$1"
|
||||
shift
|
||||
local message="$*"
|
||||
|
||||
printf '%s: %s\n' "$level" "$message" | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
ok() {
|
||||
log "OK" "$@"
|
||||
}
|
||||
|
||||
warning() {
|
||||
log "WARNING" "$@"
|
||||
}
|
||||
|
||||
critical() {
|
||||
log "CRITICAL" "$@"
|
||||
}
|
||||
|
||||
require_cmd() {
|
||||
local cmd="$1"
|
||||
|
||||
if command -v "$cmd" >/dev/null 2>&1; then
|
||||
ok "Command available: $cmd"
|
||||
return 0
|
||||
fi
|
||||
|
||||
critical "Required command not found: $cmd"
|
||||
return 1
|
||||
}
|
||||
|
||||
validate_gpfs_command() {
|
||||
local cmd="$1"
|
||||
|
||||
if command -v "$cmd" >/dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
warning "GPFS command not available, skipping: $cmd"
|
||||
return 1
|
||||
}
|
||||
|
||||
run_cmd() {
|
||||
if [[ "$#" -eq 0 ]]; then
|
||||
critical "run_cmd called without a command"
|
||||
return 2
|
||||
fi
|
||||
|
||||
if [[ "$DRY_RUN" == "true" ]]; then
|
||||
log "OK" "DRY-RUN: $*"
|
||||
return 0
|
||||
fi
|
||||
|
||||
log "OK" "RUN: $*"
|
||||
"$@" 2>&1 | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
run_readonly() {
|
||||
if [[ "$#" -eq 0 ]]; then
|
||||
critical "run_readonly called without a command"
|
||||
return 2
|
||||
fi
|
||||
|
||||
log "OK" "READ-ONLY: $*"
|
||||
"$@" 2>&1 | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
confirm_execute() {
|
||||
local target="${1:-GPFS change}"
|
||||
|
||||
if [[ "$DRY_RUN" == "true" ]]; then
|
||||
ok "Dry-run mode enabled. No changes will be made."
|
||||
return 0
|
||||
fi
|
||||
|
||||
warning "Execution mode requested for: $target"
|
||||
warning "Coordinate this change with storage, GPFS, application, and change-management teams."
|
||||
printf 'Type EXECUTE to continue: '
|
||||
read -r confirmation
|
||||
|
||||
if [[ "$confirmation" != "EXECUTE" ]]; then
|
||||
critical "Confirmation failed. Aborting."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ok "Execution confirmed by operator."
|
||||
}
|
||||
|
||||
usage_value_valid() {
|
||||
case "$1" in
|
||||
dataOnly|metadataOnly|dataAndMetadata) return 0 ;;
|
||||
*) return 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
section() {
|
||||
printf '\n== %s ==\n' "$1" | tee -a "$LOG_FILE"
|
||||
}
|
||||
+37
@@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
# shellcheck source=00_env.sh
|
||||
. "$SCRIPT_DIR/00_env.sh"
|
||||
|
||||
run_optional() {
|
||||
local description="$1"
|
||||
shift
|
||||
|
||||
section "$description"
|
||||
if validate_gpfs_command "$1"; then
|
||||
run_readonly "$@" || warning "$description command failed"
|
||||
fi
|
||||
}
|
||||
|
||||
section "GPFS / Spectrum Scale Cluster Overview"
|
||||
ok "Log file: $LOG_FILE"
|
||||
|
||||
run_optional "GPFS daemon state on all nodes" mmgetstate -a
|
||||
run_optional "Cluster definition" mmlscluster
|
||||
run_optional "Cluster configuration" mmlsconfig
|
||||
run_optional "Managers and quorum information" mmlsmgr
|
||||
run_optional "NSD inventory" mmlsnsd
|
||||
run_optional "Disk inventory for all filesystems" mmlsdisk all
|
||||
run_optional "Filesystem definitions" mmlsfs all
|
||||
run_optional "Mount state for all filesystems" mmlsmount all
|
||||
|
||||
section "Mounted GPFS filesystems from df"
|
||||
if command -v df >/dev/null 2>&1; then
|
||||
df -h -t gpfs 2>/dev/null | tee -a "$LOG_FILE" || df -h | awk 'NR == 1 || /gpfs|mmfs/' | tee -a "$LOG_FILE"
|
||||
else
|
||||
warning "df command not available"
|
||||
fi
|
||||
+103
@@ -0,0 +1,103 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
# shellcheck source=00_env.sh
|
||||
. "$SCRIPT_DIR/00_env.sh"
|
||||
|
||||
usage() {
|
||||
printf 'Usage: %s --fs <filesystem>\n' "$(basename "$0")"
|
||||
}
|
||||
|
||||
while [[ "$#" -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--fs)
|
||||
FILESYSTEM="${2:-}"
|
||||
shift 2
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
critical "Unknown argument: $1"
|
||||
usage
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$FILESYSTEM" ]]; then
|
||||
critical "Missing required --fs <filesystem>"
|
||||
usage
|
||||
exit 2
|
||||
fi
|
||||
|
||||
missing=0
|
||||
for cmd in mmgetstate mmlscluster mmlsfs mmlsdisk mmlsmount mmlsmgr df; do
|
||||
require_cmd "$cmd" || missing=1
|
||||
done
|
||||
|
||||
if [[ "$missing" -ne 0 ]]; then
|
||||
exit 2
|
||||
fi
|
||||
|
||||
issues=0
|
||||
|
||||
section "GPFS daemon state"
|
||||
state_output="$(mmgetstate -a 2>&1 || true)"
|
||||
printf '%s\n' "$state_output" | tee -a "$LOG_FILE"
|
||||
if printf '%s\n' "$state_output" | awk 'NR > 1 && $0 !~ / active / { found=1 } END { exit found ? 0 : 1 }'; then
|
||||
warning "Not all GPFS nodes appear active"
|
||||
fi
|
||||
|
||||
section "Target filesystem definition"
|
||||
if mmlsfs "$FILESYSTEM" 2>&1 | tee -a "$LOG_FILE"; then
|
||||
ok "Filesystem exists: $FILESYSTEM"
|
||||
else
|
||||
critical "Filesystem does not exist or cannot be queried: $FILESYSTEM"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
section "Target filesystem mount state"
|
||||
mount_output="$(mmlsmount "$FILESYSTEM" 2>&1 || true)"
|
||||
printf '%s\n' "$mount_output" | tee -a "$LOG_FILE"
|
||||
if printf '%s\n' "$mount_output" | grep -Eiq 'not mounted|no file systems were found|not found'; then
|
||||
warning "Filesystem may not be mounted anywhere: $FILESYSTEM"
|
||||
fi
|
||||
|
||||
section "Existing disks"
|
||||
if ! mmlsdisk "$FILESYSTEM" 2>&1 | tee -a "$LOG_FILE"; then
|
||||
critical "Unable to list disks for filesystem: $FILESYSTEM"
|
||||
issues=1
|
||||
fi
|
||||
|
||||
section "Filesystem capacity"
|
||||
df -h 2>&1 | awk -v fs="$FILESYSTEM" 'NR == 1 || $0 ~ fs || $0 ~ /gpfs|mmfs/' | tee -a "$LOG_FILE"
|
||||
|
||||
section "Cluster health"
|
||||
if command -v mmhealth >/dev/null 2>&1; then
|
||||
health_output="$(mmhealth cluster show 2>&1 || true)"
|
||||
printf '%s\n' "$health_output" | tee -a "$LOG_FILE"
|
||||
if printf '%s\n' "$health_output" | grep -Eiq 'degraded|failed|down|error|unhealthy'; then
|
||||
warning "Cluster health output indicates a degraded condition"
|
||||
fi
|
||||
else
|
||||
warning "mmhealth command not available, skipping health check"
|
||||
fi
|
||||
|
||||
section "Managers and quorum"
|
||||
mmlsmgr 2>&1 | tee -a "$LOG_FILE" || {
|
||||
critical "Unable to query GPFS manager/quorum information"
|
||||
issues=1
|
||||
}
|
||||
|
||||
if [[ "$issues" -eq 0 ]]; then
|
||||
ok "Precheck completed for filesystem: $FILESYSTEM"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
critical "Precheck found operational validation failures"
|
||||
exit 1
|
||||
+83
@@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
# shellcheck source=00_env.sh
|
||||
. "$SCRIPT_DIR/00_env.sh"
|
||||
|
||||
EXCLUDE_MOUNTED=false
|
||||
EXCLUDE_EXISTING_NSD=false
|
||||
|
||||
usage() {
|
||||
printf 'Usage: %s [--exclude-mounted] [--exclude-existing-nsd]\n' "$(basename "$0")"
|
||||
}
|
||||
|
||||
while [[ "$#" -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--exclude-mounted)
|
||||
EXCLUDE_MOUNTED=true
|
||||
shift
|
||||
;;
|
||||
--exclude-existing-nsd)
|
||||
EXCLUDE_EXISTING_NSD=true
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
critical "Unknown argument: $1"
|
||||
usage
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
for cmd in lsblk findmnt; do
|
||||
require_cmd "$cmd" || exit 2
|
||||
done
|
||||
|
||||
warning "Candidate devices are not automatically safe. Confirm every device with the storage and cluster teams before use."
|
||||
|
||||
existing_gpfs_devices=""
|
||||
if [[ "$EXCLUDE_EXISTING_NSD" == "true" ]]; then
|
||||
if command -v mmlsnsd >/dev/null 2>&1; then
|
||||
existing_gpfs_devices="$(mmlsnsd 2>/dev/null || true)"
|
||||
elif command -v mmlsdisk >/dev/null 2>&1; then
|
||||
existing_gpfs_devices="$(mmlsdisk all 2>/dev/null || true)"
|
||||
else
|
||||
warning "mmlsnsd and mmlsdisk are unavailable; cannot exclude existing GPFS devices"
|
||||
fi
|
||||
fi
|
||||
|
||||
section "Block device inventory"
|
||||
lsblk -dpno NAME,TYPE,SIZE,MODEL,SERIAL,MOUNTPOINT 2>&1 | tee -a "$LOG_FILE"
|
||||
|
||||
section "Candidate devices"
|
||||
found=0
|
||||
while read -r name type size model serial mountpoint; do
|
||||
[[ "$type" == "disk" ]] || continue
|
||||
|
||||
if [[ "$EXCLUDE_MOUNTED" == "true" ]]; then
|
||||
if [[ -n "${mountpoint:-}" ]] || findmnt -rn --source "$name" >/dev/null 2>&1; then
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "$EXCLUDE_EXISTING_NSD" == "true" ]] && [[ -n "$existing_gpfs_devices" ]]; then
|
||||
if printf '%s\n' "$existing_gpfs_devices" | grep -Fq "$name"; then
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
||||
printf 'OK: candidate=%s size=%s model=%s serial=%s mountpoint=%s\n' \
|
||||
"$name" "${size:-unknown}" "${model:-unknown}" "${serial:-unknown}" "${mountpoint:-none}" | tee -a "$LOG_FILE"
|
||||
found=1
|
||||
done < <(lsblk -dpno NAME,TYPE,SIZE,MODEL,SERIAL,MOUNTPOINT)
|
||||
|
||||
if [[ "$found" -eq 0 ]]; then
|
||||
warning "No candidate devices found with the selected filters"
|
||||
fi
|
||||
+76
@@ -0,0 +1,76 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
# shellcheck source=00_env.sh
|
||||
. "$SCRIPT_DIR/00_env.sh"
|
||||
|
||||
DEVICES=""
|
||||
SERVERS=""
|
||||
OUTPUT=""
|
||||
|
||||
usage() {
|
||||
printf 'Usage: %s --fs <filesystem> --devices "/dev/sdb /dev/sdc" --servers "node1,node2" --failure-group <number> --pool <storage_pool> --usage <dataOnly|metadataOnly|dataAndMetadata> [--output <path>]\n' "$(basename "$0")"
|
||||
}
|
||||
|
||||
while [[ "$#" -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--fs) FILESYSTEM="${2:-}"; shift 2 ;;
|
||||
--devices) DEVICES="${2:-}"; shift 2 ;;
|
||||
--servers) SERVERS="${2:-}"; shift 2 ;;
|
||||
--failure-group) FAILURE_GROUP="${2:-}"; shift 2 ;;
|
||||
--pool) STORAGE_POOL="${2:-}"; shift 2 ;;
|
||||
--usage) USAGE="${2:-}"; shift 2 ;;
|
||||
--output) OUTPUT="${2:-}"; shift 2 ;;
|
||||
-h|--help) usage; exit 0 ;;
|
||||
*) critical "Unknown argument: $1"; usage; exit 2 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$FILESYSTEM" || -z "$DEVICES" || -z "$SERVERS" || -z "$FAILURE_GROUP" || -z "$STORAGE_POOL" || -z "$USAGE" ]]; then
|
||||
critical "Missing required input"
|
||||
usage
|
||||
exit 2
|
||||
fi
|
||||
|
||||
if ! [[ "$FAILURE_GROUP" =~ ^-?[0-9]+$ ]]; then
|
||||
critical "--failure-group must be an integer"
|
||||
exit 2
|
||||
fi
|
||||
|
||||
if ! usage_value_valid "$USAGE"; then
|
||||
critical "--usage must be one of: dataOnly, metadataOnly, dataAndMetadata"
|
||||
exit 2
|
||||
fi
|
||||
|
||||
if [[ -z "$OUTPUT" ]]; then
|
||||
OUTPUT="/tmp/gpfs_nsd_${FILESYSTEM}_${TIMESTAMP}.stanza"
|
||||
fi
|
||||
|
||||
safe_fs="$(printf '%s' "$FILESYSTEM" | tr -c '[:alnum:]_' '_')"
|
||||
|
||||
{
|
||||
printf '# Generated GPFS NSD stanza for filesystem %s\n' "$FILESYSTEM"
|
||||
printf '# Review with storage and cluster teams before use.\n\n'
|
||||
for device in $DEVICES; do
|
||||
if [[ "$device" != /dev/* ]]; then
|
||||
critical "Device must be an absolute /dev path: $device"
|
||||
exit 2
|
||||
fi
|
||||
|
||||
device_base="$(basename "$device" | tr -c '[:alnum:]_' '_')"
|
||||
nsd_name="nsd_${safe_fs}_${device_base}"
|
||||
printf '%%nsd:\n'
|
||||
printf ' device=%s\n' "$device"
|
||||
printf ' nsd=%s\n' "$nsd_name"
|
||||
printf ' servers=%s\n' "$SERVERS"
|
||||
printf ' usage=%s\n' "$USAGE"
|
||||
printf ' failureGroup=%s\n' "$FAILURE_GROUP"
|
||||
printf ' pool=%s\n\n' "$STORAGE_POOL"
|
||||
done
|
||||
} > "$OUTPUT"
|
||||
|
||||
ok "Generated NSD stanza: $OUTPUT"
|
||||
warning "This script only writes a stanza file. It does not create NSDs or modify GPFS."
|
||||
+59
@@ -0,0 +1,59 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
# shellcheck source=00_env.sh
|
||||
. "$SCRIPT_DIR/00_env.sh"
|
||||
|
||||
usage() {
|
||||
printf 'Usage: %s --fs <filesystem> --stanza <stanza_file> [--execute]\n' "$(basename "$0")"
|
||||
}
|
||||
|
||||
while [[ "$#" -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--fs) FILESYSTEM="${2:-}"; shift 2 ;;
|
||||
--stanza) NSD_STANZA="${2:-}"; shift 2 ;;
|
||||
--execute) DRY_RUN=false; shift ;;
|
||||
-h|--help) usage; exit 0 ;;
|
||||
*) critical "Unknown argument: $1"; usage; exit 2 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$FILESYSTEM" || -z "$NSD_STANZA" ]]; then
|
||||
critical "Missing required --fs or --stanza"
|
||||
usage
|
||||
exit 2
|
||||
fi
|
||||
|
||||
if [[ ! -r "$NSD_STANZA" ]]; then
|
||||
critical "Stanza file does not exist or is not readable: $NSD_STANZA"
|
||||
exit 2
|
||||
fi
|
||||
|
||||
for cmd in mmlsfs mmcrnsd mmadddisk; do
|
||||
require_cmd "$cmd" || exit 2
|
||||
done
|
||||
|
||||
if ! mmlsfs "$FILESYSTEM" >/dev/null 2>&1; then
|
||||
critical "Filesystem does not exist or cannot be queried: $FILESYSTEM"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
warning "Adding NSDs must be coordinated with storage, GPFS, application, and change-management teams."
|
||||
section "Planned GPFS changes"
|
||||
ok "DRY-RUN: mmcrnsd -F $NSD_STANZA"
|
||||
ok "DRY-RUN: mmadddisk $FILESYSTEM -F $NSD_STANZA"
|
||||
|
||||
confirm_execute "create NSDs and add disks to $FILESYSTEM"
|
||||
|
||||
if [[ "$DRY_RUN" == "false" ]]; then
|
||||
run_cmd mmcrnsd -F "$NSD_STANZA"
|
||||
run_cmd mmadddisk "$FILESYSTEM" -F "$NSD_STANZA"
|
||||
|
||||
section "Post-add NSD inventory"
|
||||
mmlsnsd 2>&1 | tee -a "$LOG_FILE" || warning "mmlsnsd command failed after execution"
|
||||
section "Post-add filesystem disks"
|
||||
mmlsdisk "$FILESYSTEM" 2>&1 | tee -a "$LOG_FILE" || warning "mmlsdisk command failed after execution"
|
||||
fi
|
||||
+56
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
# shellcheck source=00_env.sh
|
||||
. "$SCRIPT_DIR/00_env.sh"
|
||||
|
||||
BACKGROUND=false
|
||||
|
||||
usage() {
|
||||
printf 'Usage: %s --fs <filesystem> [--execute] [--background]\n' "$(basename "$0")"
|
||||
}
|
||||
|
||||
while [[ "$#" -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--fs) FILESYSTEM="${2:-}"; shift 2 ;;
|
||||
--execute) DRY_RUN=false; shift ;;
|
||||
--background) BACKGROUND=true; shift ;;
|
||||
-h|--help) usage; exit 0 ;;
|
||||
*) critical "Unknown argument: $1"; usage; exit 2 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$FILESYSTEM" ]]; then
|
||||
critical "Missing required --fs <filesystem>"
|
||||
usage
|
||||
exit 2
|
||||
fi
|
||||
|
||||
for cmd in mmlsdisk mmrestripefs; do
|
||||
require_cmd "$cmd" || exit 2
|
||||
done
|
||||
|
||||
warning "Restripe/rebalance can be I/O intensive. Run only in an approved change window."
|
||||
|
||||
section "Current disk balance"
|
||||
mmlsdisk "$FILESYSTEM" 2>&1 | tee -a "$LOG_FILE" || warning "Unable to show current disk state"
|
||||
|
||||
section "Planned rebalance"
|
||||
if [[ "$BACKGROUND" == "true" ]]; then
|
||||
if [[ "$DRY_RUN" == "true" ]]; then
|
||||
ok "DRY-RUN: mmrestripefs $FILESYSTEM -b &"
|
||||
else
|
||||
confirm_execute "background restripe for $FILESYSTEM"
|
||||
ok "RUN: mmrestripefs $FILESYSTEM -b &"
|
||||
mmrestripefs "$FILESYSTEM" -b 2>&1 | tee -a "$LOG_FILE" &
|
||||
fi
|
||||
else
|
||||
ok "DRY-RUN: mmrestripefs $FILESYSTEM -b"
|
||||
confirm_execute "restripe for $FILESYSTEM"
|
||||
if [[ "$DRY_RUN" == "false" ]]; then
|
||||
run_cmd mmrestripefs "$FILESYSTEM" -b
|
||||
fi
|
||||
fi
|
||||
+89
@@ -0,0 +1,89 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
# shellcheck source=00_env.sh
|
||||
. "$SCRIPT_DIR/00_env.sh"
|
||||
|
||||
usage() {
|
||||
printf 'Usage: %s --fs <filesystem>\n' "$(basename "$0")"
|
||||
}
|
||||
|
||||
while [[ "$#" -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--fs) FILESYSTEM="${2:-}"; shift 2 ;;
|
||||
-h|--help) usage; exit 0 ;;
|
||||
*) critical "Unknown argument: $1"; usage; exit 2 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$FILESYSTEM" ]]; then
|
||||
critical "Missing required --fs <filesystem>"
|
||||
usage
|
||||
exit 2
|
||||
fi
|
||||
|
||||
issues=0
|
||||
|
||||
run_check() {
|
||||
local description="$1"
|
||||
shift
|
||||
|
||||
section "$description"
|
||||
if command -v "$1" >/dev/null 2>&1; then
|
||||
"$@" 2>&1 | tee -a "$LOG_FILE" || {
|
||||
critical "$description failed"
|
||||
issues=1
|
||||
}
|
||||
else
|
||||
warning "$1 command not available, skipping"
|
||||
fi
|
||||
}
|
||||
|
||||
run_check "GPFS daemon state" mmgetstate -a
|
||||
run_check "Target filesystem mount state" mmlsmount "$FILESYSTEM"
|
||||
run_check "Target filesystem disks" mmlsdisk "$FILESYSTEM"
|
||||
run_check "NSD inventory" mmlsnsd
|
||||
|
||||
section "Filesystem capacity"
|
||||
if command -v df >/dev/null 2>&1; then
|
||||
df -h 2>&1 | awk -v fs="$FILESYSTEM" 'NR == 1 || $0 ~ fs || $0 ~ /gpfs|mmfs/' | tee -a "$LOG_FILE"
|
||||
else
|
||||
warning "df command not available, skipping"
|
||||
fi
|
||||
|
||||
section "Cluster health"
|
||||
if command -v mmhealth >/dev/null 2>&1; then
|
||||
health_output="$(mmhealth cluster show 2>&1 || true)"
|
||||
printf '%s\n' "$health_output" | tee -a "$LOG_FILE"
|
||||
if printf '%s\n' "$health_output" | grep -Eiq 'degraded|failed|down|error|unhealthy'; then
|
||||
critical "Cluster health output indicates an issue"
|
||||
issues=1
|
||||
fi
|
||||
else
|
||||
warning "mmhealth command not available, skipping"
|
||||
fi
|
||||
|
||||
section "Recent GPFS journal entries"
|
||||
if command -v journalctl >/dev/null 2>&1; then
|
||||
journalctl -u 'gpfs*' -n 50 --no-pager 2>&1 | tee -a "$LOG_FILE" || warning "journalctl GPFS query failed"
|
||||
else
|
||||
warning "journalctl command not available, skipping"
|
||||
fi
|
||||
|
||||
section "Recent kernel messages"
|
||||
if command -v dmesg >/dev/null 2>&1; then
|
||||
dmesg -T 2>/dev/null | tail -50 | tee -a "$LOG_FILE" || warning "dmesg query failed"
|
||||
else
|
||||
warning "dmesg command not available, skipping"
|
||||
fi
|
||||
|
||||
if [[ "$issues" -eq 0 ]]; then
|
||||
ok "Post-check completed without detected operational failures"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
critical "Post-check detected one or more issues"
|
||||
exit 1
|
||||
+78
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
# shellcheck source=00_env.sh
|
||||
. "$SCRIPT_DIR/00_env.sh"
|
||||
|
||||
REPORT_FILE=""
|
||||
|
||||
usage() {
|
||||
printf 'Usage: %s --fs <filesystem>\n' "$(basename "$0")"
|
||||
}
|
||||
|
||||
while [[ "$#" -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--fs) FILESYSTEM="${2:-}"; shift 2 ;;
|
||||
-h|--help) usage; exit 0 ;;
|
||||
*) critical "Unknown argument: $1"; usage; exit 2 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$FILESYSTEM" ]]; then
|
||||
critical "Missing required --fs <filesystem>"
|
||||
usage
|
||||
exit 2
|
||||
fi
|
||||
|
||||
REPORT_FILE="/tmp/gpfs_extend_report_${FILESYSTEM}_${TIMESTAMP}.txt"
|
||||
|
||||
append_section() {
|
||||
local title="$1"
|
||||
shift
|
||||
|
||||
{
|
||||
printf '\n== %s ==\n' "$title"
|
||||
if command -v "$1" >/dev/null 2>&1; then
|
||||
"$@" 2>&1 || printf 'WARNING: command failed: %s\n' "$*"
|
||||
else
|
||||
printf 'WARNING: command not available: %s\n' "$1"
|
||||
fi
|
||||
} >> "$REPORT_FILE"
|
||||
}
|
||||
|
||||
{
|
||||
printf 'GPFS / Spectrum Scale Filesystem Expansion Report\n'
|
||||
printf 'Hostname: %s\n' "$(hostname 2>/dev/null || printf 'unknown')"
|
||||
printf 'Date: %s\n' "$(date)"
|
||||
printf 'Target filesystem: %s\n' "$FILESYSTEM"
|
||||
} > "$REPORT_FILE"
|
||||
|
||||
append_section "GPFS daemon state" mmgetstate -a
|
||||
append_section "Cluster definition" mmlscluster
|
||||
append_section "Managers and quorum" mmlsmgr
|
||||
append_section "Target filesystem mount state" mmlsmount "$FILESYSTEM"
|
||||
append_section "Target filesystem disks" mmlsdisk "$FILESYSTEM"
|
||||
append_section "NSD inventory" mmlsnsd
|
||||
append_section "Filesystem capacity" df -h
|
||||
|
||||
if command -v mmhealth >/dev/null 2>&1; then
|
||||
append_section "Cluster health" mmhealth cluster show
|
||||
else
|
||||
printf '\n== Cluster health ==\nWARNING: mmhealth command not available\n' >> "$REPORT_FILE"
|
||||
fi
|
||||
|
||||
if command -v journalctl >/dev/null 2>&1; then
|
||||
append_section "Recent GPFS journal entries" journalctl -u 'gpfs*' -n 50 --no-pager
|
||||
fi
|
||||
|
||||
if command -v dmesg >/dev/null 2>&1; then
|
||||
{
|
||||
printf '\n== Recent kernel messages ==\n'
|
||||
dmesg -T 2>/dev/null | tail -50 || printf 'WARNING: dmesg query failed\n'
|
||||
} >> "$REPORT_FILE"
|
||||
fi
|
||||
|
||||
ok "Generated report: $REPORT_FILE"
|
||||
@@ -0,0 +1,136 @@
|
||||
# GPFS / IBM Spectrum Scale Filesystem Expansion Toolkit
|
||||
|
||||
Safe, sanitized Bash examples for planning and executing a GPFS / IBM Spectrum Scale filesystem expansion. The scripts are written as portfolio-grade operational tooling for a Linux Infrastructure Engineer: conservative defaults, clear validation, dry-run behavior, and explicit operator confirmation before changes.
|
||||
|
||||
These scripts are examples. Exact GPFS commands, flags, quorum practices, failure-group design, and storage naming standards vary by Spectrum Scale version and site policy.
|
||||
|
||||
## Concepts
|
||||
|
||||
- **Cluster** - the Spectrum Scale administrative domain containing the nodes, daemon configuration, quorum policy, filesystems, and NSDs.
|
||||
- **Node** - a server participating in the GPFS cluster. Nodes may be clients, NSD servers, quorum nodes, manager-capable nodes, or a mix of roles.
|
||||
- **Quorum** - the voting mechanism that protects the cluster from split-brain conditions. Expansion work should not proceed during quorum instability.
|
||||
- **Filesystem** - the GPFS namespace and data layout presented to clients, backed by one or more NSDs.
|
||||
- **NSD** - Network Shared Disk, the GPFS abstraction for a disk or LUN that is served to the cluster.
|
||||
- **Failure group** - a placement hint that tells GPFS which disks share a failure domain, such as an enclosure, rack, site, controller pair, or storage array.
|
||||
- **Storage pool** - a named pool of NSDs used for placement and lifecycle policy, commonly `system` plus optional data pools.
|
||||
- **Restripe/rebalance** - the operation that redistributes data after disks are added. It can be I/O intensive and should run only in an approved change window.
|
||||
|
||||
## Required Tools
|
||||
|
||||
Common GPFS / Spectrum Scale tools expected in production include:
|
||||
|
||||
- `mmgetstate`
|
||||
- `mmlscluster`
|
||||
- `mmlsfs`
|
||||
- `mmlsdisk`
|
||||
- `mmlsnsd`
|
||||
- `mmcrnsd`
|
||||
- `mmadddisk`
|
||||
- `mmrestripefs`
|
||||
|
||||
The toolkit also uses common Linux tools such as `df`, `lsblk`, `findmnt`, `journalctl`, and `dmesg` where available. Missing optional commands are reported as `WARNING` and skipped.
|
||||
|
||||
## Safety Model
|
||||
|
||||
- Default mode is dry-run.
|
||||
- Real GPFS modifications require `--execute`.
|
||||
- Destructive or high-impact steps also prompt for `EXECUTE`.
|
||||
- Disk detection is read-only and never partitions, formats, wipes, or modifies devices.
|
||||
- Device selection must always be confirmed with the storage team and cluster owners.
|
||||
- The scripts do not assume production disk names.
|
||||
|
||||
Output uses a consistent status format:
|
||||
|
||||
- `OK`
|
||||
- `WARNING`
|
||||
- `CRITICAL`
|
||||
|
||||
Exit codes:
|
||||
|
||||
- `0` - OK
|
||||
- `1` - operational validation failure
|
||||
- `2` - invalid input or missing requirement
|
||||
|
||||
## Scripts
|
||||
|
||||
- `00_env.sh` - shared configuration and helper functions.
|
||||
- `01_cluster_overview.sh` - read-only cluster overview.
|
||||
- `02_precheck_gpfs.sh` - pre-expansion validation for a target filesystem.
|
||||
- `03_detect_new_disks.sh` - read-only candidate block-device discovery.
|
||||
- `04_create_nsd_stanza.sh` - generate an NSD stanza file.
|
||||
- `05_add_nsd_to_filesystem.sh` - create NSDs and add disks to a filesystem, dry-run by default.
|
||||
- `06_rebalance_filesystem.sh` - optional restripe/rebalance, dry-run by default.
|
||||
- `07_postcheck_gpfs.sh` - post-change validation.
|
||||
- `08_generate_report.sh` - text report for the change record.
|
||||
- `gpfs_extend_runbook.sh` - guided order of operations plus safe read-only checks.
|
||||
|
||||
## Example Workflow
|
||||
|
||||
```bash
|
||||
cd infra-run/scripts/bash/gpfs
|
||||
|
||||
./01_cluster_overview.sh
|
||||
./02_precheck_gpfs.sh --fs gpfs01
|
||||
./03_detect_new_disks.sh --exclude-mounted --exclude-existing-nsd
|
||||
|
||||
./04_create_nsd_stanza.sh \
|
||||
--fs gpfs01 \
|
||||
--devices "/dev/sdb /dev/sdc" \
|
||||
--servers "gpfsnsd01,gpfsnsd02" \
|
||||
--failure-group 10 \
|
||||
--pool system \
|
||||
--usage dataAndMetadata
|
||||
```
|
||||
|
||||
Review the generated stanza with the storage and cluster teams. Confirm device identity, LUN masking, multipath naming, failure group placement, and site standards before continuing.
|
||||
|
||||
Dry-run the add step:
|
||||
|
||||
```bash
|
||||
./05_add_nsd_to_filesystem.sh \
|
||||
--fs gpfs01 \
|
||||
--stanza /tmp/gpfs_nsd_gpfs01_YYYYmmdd_HHMMSS.stanza
|
||||
```
|
||||
|
||||
Execute only in an approved change window:
|
||||
|
||||
```bash
|
||||
./05_add_nsd_to_filesystem.sh \
|
||||
--fs gpfs01 \
|
||||
--stanza /tmp/gpfs_nsd_gpfs01_YYYYmmdd_HHMMSS.stanza \
|
||||
--execute
|
||||
```
|
||||
|
||||
Optional rebalance:
|
||||
|
||||
```bash
|
||||
./06_rebalance_filesystem.sh --fs gpfs01
|
||||
./06_rebalance_filesystem.sh --fs gpfs01 --execute --background
|
||||
```
|
||||
|
||||
Post-check and report:
|
||||
|
||||
```bash
|
||||
./07_postcheck_gpfs.sh --fs gpfs01
|
||||
./08_generate_report.sh --fs gpfs01
|
||||
```
|
||||
|
||||
Runbook helper:
|
||||
|
||||
```bash
|
||||
./gpfs_extend_runbook.sh \
|
||||
--fs gpfs01 \
|
||||
--devices "/dev/sdb /dev/sdc" \
|
||||
--servers "gpfsnsd01,gpfsnsd02" \
|
||||
--failure-group 10 \
|
||||
--pool system \
|
||||
--usage dataAndMetadata
|
||||
```
|
||||
|
||||
## Operational Notes
|
||||
|
||||
- Do not run these scripts blindly on production clusters.
|
||||
- Confirm disk and multipath identity with the storage team before creating NSDs.
|
||||
- Validate quorum and manager health before expansion.
|
||||
- Confirm application I/O risk and rollback procedures before `mmadddisk` or `mmrestripefs`.
|
||||
- Confirm the Spectrum Scale version and local standards for stanza fields before executing changes.
|
||||
+94
@@ -0,0 +1,94 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
# shellcheck source=00_env.sh
|
||||
. "$SCRIPT_DIR/00_env.sh"
|
||||
|
||||
DEVICES=""
|
||||
SERVERS=""
|
||||
EXECUTE=false
|
||||
|
||||
usage() {
|
||||
printf 'Usage: %s --fs <filesystem> --devices "/dev/sdb /dev/sdc" --servers "node1,node2" --failure-group <number> --pool <storage_pool> --usage <dataOnly|metadataOnly|dataAndMetadata> [--execute]\n' "$(basename "$0")"
|
||||
}
|
||||
|
||||
while [[ "$#" -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--fs) FILESYSTEM="${2:-}"; shift 2 ;;
|
||||
--devices) DEVICES="${2:-}"; shift 2 ;;
|
||||
--servers) SERVERS="${2:-}"; shift 2 ;;
|
||||
--failure-group) FAILURE_GROUP="${2:-}"; shift 2 ;;
|
||||
--pool) STORAGE_POOL="${2:-}"; shift 2 ;;
|
||||
--usage) USAGE="${2:-}"; shift 2 ;;
|
||||
--execute) EXECUTE=true; DRY_RUN=false; shift ;;
|
||||
-h|--help) usage; exit 0 ;;
|
||||
*) critical "Unknown argument: $1"; usage; exit 2 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
section "Recommended GPFS Expansion Flow"
|
||||
cat <<FLOW
|
||||
Step 1: Cluster overview
|
||||
$SCRIPT_DIR/01_cluster_overview.sh
|
||||
|
||||
Step 2: GPFS precheck
|
||||
$SCRIPT_DIR/02_precheck_gpfs.sh --fs <filesystem>
|
||||
|
||||
Step 3: Detect candidate disks
|
||||
$SCRIPT_DIR/03_detect_new_disks.sh --exclude-mounted --exclude-existing-nsd
|
||||
|
||||
Step 4: Generate NSD stanza
|
||||
$SCRIPT_DIR/04_create_nsd_stanza.sh --fs <filesystem> --devices "/dev/sdb /dev/sdc" --servers "node1,node2" --failure-group <number> --pool <storage_pool> --usage <usage>
|
||||
|
||||
Step 5: Create NSDs and add disks to filesystem
|
||||
$SCRIPT_DIR/05_add_nsd_to_filesystem.sh --fs <filesystem> --stanza <stanza_file> [--execute]
|
||||
|
||||
Step 6: Optional restripe/rebalance
|
||||
$SCRIPT_DIR/06_rebalance_filesystem.sh --fs <filesystem> [--execute] [--background]
|
||||
|
||||
Step 7: Post-check
|
||||
$SCRIPT_DIR/07_postcheck_gpfs.sh --fs <filesystem>
|
||||
|
||||
Step 8: Generate report
|
||||
$SCRIPT_DIR/08_generate_report.sh --fs <filesystem>
|
||||
FLOW
|
||||
|
||||
if [[ -z "$FILESYSTEM" ]]; then
|
||||
warning "No --fs supplied. Printed runbook only."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ "$EXECUTE" == "true" ]]; then
|
||||
warning "--execute was supplied. Destructive steps still require the individual script confirmation prompt."
|
||||
else
|
||||
DRY_RUN=true
|
||||
fi
|
||||
|
||||
section "Running Safe Read-Only Steps"
|
||||
"$SCRIPT_DIR/01_cluster_overview.sh" || warning "Cluster overview reported warnings or failures"
|
||||
"$SCRIPT_DIR/02_precheck_gpfs.sh" --fs "$FILESYSTEM" || warning "Precheck reported warnings or failures"
|
||||
"$SCRIPT_DIR/03_detect_new_disks.sh" --exclude-mounted --exclude-existing-nsd || warning "Disk detection reported warnings or failures"
|
||||
|
||||
if [[ -n "$DEVICES" || -n "$SERVERS" || -n "$FAILURE_GROUP" ]]; then
|
||||
if [[ -z "$DEVICES" || -z "$SERVERS" || -z "$FAILURE_GROUP" ]]; then
|
||||
warning "NSD stanza generation requires --devices, --servers, --failure-group, --pool, and --usage"
|
||||
else
|
||||
"$SCRIPT_DIR/04_create_nsd_stanza.sh" \
|
||||
--fs "$FILESYSTEM" \
|
||||
--devices "$DEVICES" \
|
||||
--servers "$SERVERS" \
|
||||
--failure-group "$FAILURE_GROUP" \
|
||||
--pool "$STORAGE_POOL" \
|
||||
--usage "$USAGE"
|
||||
fi
|
||||
fi
|
||||
|
||||
section "Next Manual Step"
|
||||
if [[ "$EXECUTE" == "true" ]]; then
|
||||
warning "Run 05_add_nsd_to_filesystem.sh manually with --execute after reviewing the generated stanza."
|
||||
else
|
||||
ok "Review outputs and generated stanza. Add disks only through 05_add_nsd_to_filesystem.sh with --execute."
|
||||
fi
|
||||
Reference in New Issue
Block a user