#!/usr/bin/env bash set -o errexit set -o nounset set -o pipefail warning_threshold=80 critical_threshold=90 top_count=10 usage() { cat <<'USAGE' Usage: check_inode_usage.sh [--warning PERCENT] [--critical PERCENT] [--top N] [--help] Detect inode exhaustion using df -i. USAGE } is_number() { [[ "$1" =~ ^[0-9]+$ ]] } while (($# > 0)); do case "$1" in --warning) [[ $# -ge 2 ]] || { printf 'CRITICAL: --warning requires a value\n'; exit 2; }; warning_threshold="$2"; shift 2 ;; --critical) [[ $# -ge 2 ]] || { printf 'CRITICAL: --critical requires a value\n'; exit 2; }; critical_threshold="$2"; shift 2 ;; --top) [[ $# -ge 2 ]] || { printf 'CRITICAL: --top requires a value\n'; exit 2; }; top_count="$2"; shift 2 ;; --help|-h) usage; exit 0 ;; *) printf 'CRITICAL: unknown option: %s\n' "$1"; usage; exit 2 ;; esac done for value in "$warning_threshold" "$critical_threshold" "$top_count"; do if ! is_number "$value"; then printf 'CRITICAL: numeric option expected, got: %s\n' "$value" exit 2 fi done if ((warning_threshold >= critical_threshold)); then printf 'CRITICAL: --warning must be lower than --critical\n' exit 2 fi if ! command -v df >/dev/null 2>&1; then printf 'CRITICAL: required command not found: df\n' exit 2 fi tmp_df="$(mktemp)" tmp_alerts="$(mktemp)" trap 'rm -f "$tmp_df" "$tmp_alerts"' EXIT df -Pi > "$tmp_df" awk -v warn="$warning_threshold" ' NR > 1 { pct=$5 gsub(/%/, "", pct) if (pct >= warn) { print $0 } } ' "$tmp_df" > "$tmp_alerts" max_pct="$(awk 'NR > 1 { pct=$5; gsub(/%/, "", pct); if (pct > max) max=pct } END { printf "%d", max }' "$tmp_df")" status="OK" exit_code=0 if ((max_pct >= critical_threshold)); then status="CRITICAL" exit_code=3 elif ((max_pct >= warning_threshold)); then status="WARNING" exit_code=1 fi printf '%s: Highest inode usage is %s%%\n\n' "$status" "$max_pct" printf 'Filesystems above threshold:\n' if [[ -s "$tmp_alerts" ]]; then cat "$tmp_alerts" else printf 'OK: no filesystems above warning threshold\n' fi printf '\n' printf 'Inode usage table:\n' cat "$tmp_df" printf '\n' printf 'Top affected mount points:\n' awk 'NR > 1 { pct=$5; gsub(/%/, "", pct); print pct, $6, $1, $2, $3, $4 }' "$tmp_df" \ | sort -rn | head -n "$top_count" \ | awk '{ printf "%s%% %s %s inodes=%s used=%s free=%s\n", $1, $2, $3, $4, $5, $6 }' printf '\n' printf 'Evidence:\n' printf 'Thresholds: warning=%s%% critical=%s%%\n\n' "$warning_threshold" "$critical_threshold" printf 'Recommended next steps:\n' printf -- '- Find directories with many small files under affected mount points\n' printf -- '- Check logs, cache, spool, session, and temporary directories\n' printf -- '- Avoid deleting blindly; confirm ownership and application impact first\n' printf -- '- Confirm whether inode exhaustion is causing write or deploy failures\n' printf -- '- Attach this output to incident ticket\n' exit "$exit_code"