Files
Mateusz Suski e851568c8c
lint / shell-yaml-ansible (push) Failing after 16s
Add standalone Bash incident check scripts
2026-05-11 18:49:00 +00:00

135 lines
5.0 KiB
Bash
Executable File

#!/usr/bin/env bash
set -o errexit
set -o nounset
set -o pipefail
target_pid=""
match_string=""
top_count=10
usage() {
cat <<'USAGE'
Usage: check_jvm_threads_heap.sh [--pid PID | --match STRING] [--top N] [--help]
Provide lightweight JVM process diagnostics. Does not create heap dumps or modify processes.
USAGE
}
is_number() {
[[ "$1" =~ ^[0-9]+$ ]]
}
while (($# > 0)); do
case "$1" in
--pid) [[ $# -ge 2 ]] || { printf 'CRITICAL: --pid requires a value\n'; exit 2; }; target_pid="$2"; shift 2 ;;
--match) [[ $# -ge 2 ]] || { printf 'CRITICAL: --match requires a value\n'; exit 2; }; match_string="$2"; shift 2 ;;
--top) [[ $# -ge 2 ]] || { printf 'CRITICAL: --top requires a value\n'; exit 2; }; top_count="$2"; shift 2 ;;
--help|-h) usage; exit 0 ;;
*) printf 'CRITICAL: unknown option: %s\n' "$1"; usage; exit 2 ;;
esac
done
if [[ -n "$target_pid" && -n "$match_string" ]]; then
printf 'CRITICAL: use either --pid or --match, not both\n'
exit 2
fi
if [[ -n "$target_pid" ]] && ! is_number "$target_pid"; then
printf 'CRITICAL: --pid must be numeric\n'
exit 2
fi
if ! is_number "$top_count"; then
printf 'CRITICAL: --top must be numeric\n'
exit 2
fi
if ! command -v ps >/dev/null 2>&1; then
printf 'CRITICAL: required command not found: ps\n'
exit 2
fi
tmp_java="$(mktemp)"
trap 'rm -f "$tmp_java"' EXIT
ps -eo pid=,user=,rss=,pcpu=,comm=,args= \
| awk 'tolower($0) ~ /java/ && $1 != "" { print }' > "$tmp_java"
if [[ -z "$target_pid" && -n "$match_string" ]]; then
target_pid="$(grep -F "$match_string" "$tmp_java" | awk 'NR == 1 { print $1 }' || true)"
fi
if [[ -z "$target_pid" ]]; then
detected_count="$(wc -l < "$tmp_java" | awk '{print $1}')"
if ((detected_count == 0)); then
printf 'WARNING: No Java processes detected\n\n'
else
printf 'OK: Detected %s Java process(es); rerun with --pid PID for heap detail\n\n' "$detected_count"
fi
printf 'Detected JVM processes:\n'
printf 'PID USER RSS_MB CPU COMMAND\n'
awk '{ pid=$1; user=$2; rss=int($3 / 1024); cpu=$4; $1=$2=$3=$4=""; sub(/^ +/, ""); printf "%s %s %s %s %s\n", pid, user, rss, cpu, $0 }' "$tmp_java" | head -n "$top_count"
printf '\nRecommended next steps:\n'
printf -- '- Select a JVM process with --pid for focused diagnostics\n'
printf -- '- Review GC logs and application logs for the selected process\n'
printf -- '- Check heap sizing and thread count trend\n'
printf -- '- Capture jstack only if approved by operational process\n'
exit 1
fi
if ! ps -p "$target_pid" >/dev/null 2>&1; then
printf 'CRITICAL: process does not exist or is not visible: %s\n' "$target_pid"
exit 2
fi
proc_line="$(ps -p "$target_pid" -o pid=,user=,rss=,pcpu=,comm=,args=)"
if ! printf '%s\n' "$proc_line" | grep -qi 'java'; then
printf 'WARNING: PID %s does not appear to be a Java process from ps output\n\n' "$target_pid"
status="WARNING"
exit_code=1
else
status="OK"
exit_code=0
fi
thread_count="unavailable"
if [[ -r "/proc/${target_pid}/status" ]]; then
thread_count="$(awk '/^Threads:/ { print $2 }' "/proc/${target_pid}/status")"
fi
printf '%s: JVM diagnostics collected for PID %s\n\n' "$status" "$target_pid"
printf 'Detected JVM process:\n'
printf 'PID USER RSS_MB CPU COMMAND\n'
printf '%s\n' "$proc_line" | awk '{ pid=$1; user=$2; rss=int($3 / 1024); cpu=$4; $1=$2=$3=$4=""; sub(/^ +/, ""); printf "%s %s %s %s %s\n", pid, user, rss, cpu, $0 }'
printf 'Thread count: %s\n\n' "$thread_count"
printf 'Heap and JVM evidence:\n'
if command -v jcmd >/dev/null 2>&1; then
printf '\n[jcmd VM.flags]\n'
jcmd "$target_pid" VM.flags 2>/dev/null || printf 'WARNING: jcmd VM.flags failed; permissions may be limited\n'
printf '\n[jcmd GC.heap_info]\n'
jcmd "$target_pid" GC.heap_info 2>/dev/null || printf 'WARNING: jcmd GC.heap_info failed; permissions may be limited\n'
printf '\n[jcmd Thread.print summary]\n'
jcmd "$target_pid" Thread.print 2>/dev/null | awk '/java.lang.Thread.State/ { state[$0]++ } END { for (item in state) print state[item], item }' | sort -rn | head -n "$top_count" || printf 'WARNING: jcmd Thread.print failed; permissions may be limited\n'
elif command -v jstat >/dev/null 2>&1; then
printf '\n[jstat -gc]\n'
jstat -gc "$target_pid" 1 1 2>/dev/null || printf 'WARNING: jstat failed; permissions may be limited\n'
else
printf 'WARNING: jcmd and jstat are unavailable; heap details skipped\n'
fi
printf '\n'
printf 'Evidence:\n'
printf 'PID=%s thread_count=%s top=%s\n' "$target_pid" "$thread_count" "$top_count"
if [[ "${EUID:-$(id -u 2>/dev/null || printf '1')}" != "0" ]]; then
printf 'WARNING: running without root; JVM attach and /proc details may be limited by process ownership\n'
fi
printf '\n'
printf 'Recommended next steps:\n'
printf -- '- Review GC logs and recent application errors\n'
printf -- '- Check JVM heap sizing against container or host memory limits\n'
printf -- '- Check thread count trend in monitoring before concluding a leak\n'
printf -- '- Capture jstack only if approved by operational process\n'
printf -- '- Attach this output to incident ticket\n'
exit "$exit_code"