104 lines
2.9 KiB
Bash
Executable File
104 lines
2.9 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -o errexit
|
|
set -o nounset
|
|
set -o pipefail
|
|
|
|
warning_threshold=80
|
|
critical_threshold=90
|
|
top_count=10
|
|
|
|
usage() {
|
|
cat <<'USAGE'
|
|
Usage: check_inode_usage.sh [--warning PERCENT] [--critical PERCENT] [--top N] [--help]
|
|
|
|
Detect inode exhaustion using df -i.
|
|
USAGE
|
|
}
|
|
|
|
is_number() {
|
|
[[ "$1" =~ ^[0-9]+$ ]]
|
|
}
|
|
|
|
while (($# > 0)); do
|
|
case "$1" in
|
|
--warning) [[ $# -ge 2 ]] || { printf 'CRITICAL: --warning requires a value\n'; exit 2; }; warning_threshold="$2"; shift 2 ;;
|
|
--critical) [[ $# -ge 2 ]] || { printf 'CRITICAL: --critical requires a value\n'; exit 2; }; critical_threshold="$2"; shift 2 ;;
|
|
--top) [[ $# -ge 2 ]] || { printf 'CRITICAL: --top requires a value\n'; exit 2; }; top_count="$2"; shift 2 ;;
|
|
--help|-h) usage; exit 0 ;;
|
|
*) printf 'CRITICAL: unknown option: %s\n' "$1"; usage; exit 2 ;;
|
|
esac
|
|
done
|
|
|
|
for value in "$warning_threshold" "$critical_threshold" "$top_count"; do
|
|
if ! is_number "$value"; then
|
|
printf 'CRITICAL: numeric option expected, got: %s\n' "$value"
|
|
exit 2
|
|
fi
|
|
done
|
|
if ((warning_threshold >= critical_threshold)); then
|
|
printf 'CRITICAL: --warning must be lower than --critical\n'
|
|
exit 2
|
|
fi
|
|
if ! command -v df >/dev/null 2>&1; then
|
|
printf 'CRITICAL: required command not found: df\n'
|
|
exit 2
|
|
fi
|
|
|
|
tmp_df="$(mktemp)"
|
|
tmp_alerts="$(mktemp)"
|
|
trap 'rm -f "$tmp_df" "$tmp_alerts"' EXIT
|
|
|
|
df -Pi > "$tmp_df"
|
|
awk -v warn="$warning_threshold" '
|
|
NR > 1 {
|
|
pct=$5
|
|
gsub(/%/, "", pct)
|
|
if (pct >= warn) {
|
|
print $0
|
|
}
|
|
}
|
|
' "$tmp_df" > "$tmp_alerts"
|
|
|
|
max_pct="$(awk 'NR > 1 { pct=$5; gsub(/%/, "", pct); if (pct > max) max=pct } END { printf "%d", max }' "$tmp_df")"
|
|
status="OK"
|
|
exit_code=0
|
|
if ((max_pct >= critical_threshold)); then
|
|
status="CRITICAL"
|
|
exit_code=3
|
|
elif ((max_pct >= warning_threshold)); then
|
|
status="WARNING"
|
|
exit_code=1
|
|
fi
|
|
|
|
printf '%s: Highest inode usage is %s%%\n\n' "$status" "$max_pct"
|
|
|
|
printf 'Filesystems above threshold:\n'
|
|
if [[ -s "$tmp_alerts" ]]; then
|
|
cat "$tmp_alerts"
|
|
else
|
|
printf 'OK: no filesystems above warning threshold\n'
|
|
fi
|
|
printf '\n'
|
|
|
|
printf 'Inode usage table:\n'
|
|
cat "$tmp_df"
|
|
printf '\n'
|
|
|
|
printf 'Top affected mount points:\n'
|
|
awk 'NR > 1 { pct=$5; gsub(/%/, "", pct); print pct, $6, $1, $2, $3, $4 }' "$tmp_df" \
|
|
| sort -rn | head -n "$top_count" \
|
|
| awk '{ printf "%s%% %s %s inodes=%s used=%s free=%s\n", $1, $2, $3, $4, $5, $6 }'
|
|
printf '\n'
|
|
|
|
printf 'Evidence:\n'
|
|
printf 'Thresholds: warning=%s%% critical=%s%%\n\n' "$warning_threshold" "$critical_threshold"
|
|
|
|
printf 'Recommended next steps:\n'
|
|
printf -- '- Find directories with many small files under affected mount points\n'
|
|
printf -- '- Check logs, cache, spool, session, and temporary directories\n'
|
|
printf -- '- Avoid deleting blindly; confirm ownership and application impact first\n'
|
|
printf -- '- Confirm whether inode exhaustion is causing write or deploy failures\n'
|
|
printf -- '- Attach this output to incident ticket\n'
|
|
|
|
exit "$exit_code"
|