135 lines
5.0 KiB
Bash
135 lines
5.0 KiB
Bash
|
|
#!/usr/bin/env bash
|
||
|
|
set -o errexit
|
||
|
|
set -o nounset
|
||
|
|
set -o pipefail
|
||
|
|
|
||
|
|
target_pid=""
|
||
|
|
match_string=""
|
||
|
|
top_count=10
|
||
|
|
|
||
|
|
usage() {
|
||
|
|
cat <<'USAGE'
|
||
|
|
Usage: check_jvm_threads_heap.sh [--pid PID | --match STRING] [--top N] [--help]
|
||
|
|
|
||
|
|
Provide lightweight JVM process diagnostics. Does not create heap dumps or modify processes.
|
||
|
|
USAGE
|
||
|
|
}
|
||
|
|
|
||
|
|
is_number() {
|
||
|
|
[[ "$1" =~ ^[0-9]+$ ]]
|
||
|
|
}
|
||
|
|
|
||
|
|
while (($# > 0)); do
|
||
|
|
case "$1" in
|
||
|
|
--pid) [[ $# -ge 2 ]] || { printf 'CRITICAL: --pid requires a value\n'; exit 2; }; target_pid="$2"; shift 2 ;;
|
||
|
|
--match) [[ $# -ge 2 ]] || { printf 'CRITICAL: --match requires a value\n'; exit 2; }; match_string="$2"; shift 2 ;;
|
||
|
|
--top) [[ $# -ge 2 ]] || { printf 'CRITICAL: --top requires a value\n'; exit 2; }; top_count="$2"; shift 2 ;;
|
||
|
|
--help|-h) usage; exit 0 ;;
|
||
|
|
*) printf 'CRITICAL: unknown option: %s\n' "$1"; usage; exit 2 ;;
|
||
|
|
esac
|
||
|
|
done
|
||
|
|
|
||
|
|
if [[ -n "$target_pid" && -n "$match_string" ]]; then
|
||
|
|
printf 'CRITICAL: use either --pid or --match, not both\n'
|
||
|
|
exit 2
|
||
|
|
fi
|
||
|
|
if [[ -n "$target_pid" ]] && ! is_number "$target_pid"; then
|
||
|
|
printf 'CRITICAL: --pid must be numeric\n'
|
||
|
|
exit 2
|
||
|
|
fi
|
||
|
|
if ! is_number "$top_count"; then
|
||
|
|
printf 'CRITICAL: --top must be numeric\n'
|
||
|
|
exit 2
|
||
|
|
fi
|
||
|
|
if ! command -v ps >/dev/null 2>&1; then
|
||
|
|
printf 'CRITICAL: required command not found: ps\n'
|
||
|
|
exit 2
|
||
|
|
fi
|
||
|
|
|
||
|
|
tmp_java="$(mktemp)"
|
||
|
|
trap 'rm -f "$tmp_java"' EXIT
|
||
|
|
|
||
|
|
ps -eo pid=,user=,rss=,pcpu=,comm=,args= \
|
||
|
|
| awk 'tolower($0) ~ /java/ && $1 != "" { print }' > "$tmp_java"
|
||
|
|
|
||
|
|
if [[ -z "$target_pid" && -n "$match_string" ]]; then
|
||
|
|
target_pid="$(grep -F "$match_string" "$tmp_java" | awk 'NR == 1 { print $1 }' || true)"
|
||
|
|
fi
|
||
|
|
|
||
|
|
if [[ -z "$target_pid" ]]; then
|
||
|
|
detected_count="$(wc -l < "$tmp_java" | awk '{print $1}')"
|
||
|
|
if ((detected_count == 0)); then
|
||
|
|
printf 'WARNING: No Java processes detected\n\n'
|
||
|
|
else
|
||
|
|
printf 'OK: Detected %s Java process(es); rerun with --pid PID for heap detail\n\n' "$detected_count"
|
||
|
|
fi
|
||
|
|
printf 'Detected JVM processes:\n'
|
||
|
|
printf 'PID USER RSS_MB CPU COMMAND\n'
|
||
|
|
awk '{ pid=$1; user=$2; rss=int($3 / 1024); cpu=$4; $1=$2=$3=$4=""; sub(/^ +/, ""); printf "%s %s %s %s %s\n", pid, user, rss, cpu, $0 }' "$tmp_java" | head -n "$top_count"
|
||
|
|
printf '\nRecommended next steps:\n'
|
||
|
|
printf -- '- Select a JVM process with --pid for focused diagnostics\n'
|
||
|
|
printf -- '- Review GC logs and application logs for the selected process\n'
|
||
|
|
printf -- '- Check heap sizing and thread count trend\n'
|
||
|
|
printf -- '- Capture jstack only if approved by operational process\n'
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
if ! ps -p "$target_pid" >/dev/null 2>&1; then
|
||
|
|
printf 'CRITICAL: process does not exist or is not visible: %s\n' "$target_pid"
|
||
|
|
exit 2
|
||
|
|
fi
|
||
|
|
|
||
|
|
proc_line="$(ps -p "$target_pid" -o pid=,user=,rss=,pcpu=,comm=,args=)"
|
||
|
|
if ! printf '%s\n' "$proc_line" | grep -qi 'java'; then
|
||
|
|
printf 'WARNING: PID %s does not appear to be a Java process from ps output\n\n' "$target_pid"
|
||
|
|
status="WARNING"
|
||
|
|
exit_code=1
|
||
|
|
else
|
||
|
|
status="OK"
|
||
|
|
exit_code=0
|
||
|
|
fi
|
||
|
|
|
||
|
|
thread_count="unavailable"
|
||
|
|
if [[ -r "/proc/${target_pid}/status" ]]; then
|
||
|
|
thread_count="$(awk '/^Threads:/ { print $2 }' "/proc/${target_pid}/status")"
|
||
|
|
fi
|
||
|
|
|
||
|
|
printf '%s: JVM diagnostics collected for PID %s\n\n' "$status" "$target_pid"
|
||
|
|
|
||
|
|
printf 'Detected JVM process:\n'
|
||
|
|
printf 'PID USER RSS_MB CPU COMMAND\n'
|
||
|
|
printf '%s\n' "$proc_line" | awk '{ pid=$1; user=$2; rss=int($3 / 1024); cpu=$4; $1=$2=$3=$4=""; sub(/^ +/, ""); printf "%s %s %s %s %s\n", pid, user, rss, cpu, $0 }'
|
||
|
|
printf 'Thread count: %s\n\n' "$thread_count"
|
||
|
|
|
||
|
|
printf 'Heap and JVM evidence:\n'
|
||
|
|
if command -v jcmd >/dev/null 2>&1; then
|
||
|
|
printf '\n[jcmd VM.flags]\n'
|
||
|
|
jcmd "$target_pid" VM.flags 2>/dev/null || printf 'WARNING: jcmd VM.flags failed; permissions may be limited\n'
|
||
|
|
printf '\n[jcmd GC.heap_info]\n'
|
||
|
|
jcmd "$target_pid" GC.heap_info 2>/dev/null || printf 'WARNING: jcmd GC.heap_info failed; permissions may be limited\n'
|
||
|
|
printf '\n[jcmd Thread.print summary]\n'
|
||
|
|
jcmd "$target_pid" Thread.print 2>/dev/null | awk '/java.lang.Thread.State/ { state[$0]++ } END { for (item in state) print state[item], item }' | sort -rn | head -n "$top_count" || printf 'WARNING: jcmd Thread.print failed; permissions may be limited\n'
|
||
|
|
elif command -v jstat >/dev/null 2>&1; then
|
||
|
|
printf '\n[jstat -gc]\n'
|
||
|
|
jstat -gc "$target_pid" 1 1 2>/dev/null || printf 'WARNING: jstat failed; permissions may be limited\n'
|
||
|
|
else
|
||
|
|
printf 'WARNING: jcmd and jstat are unavailable; heap details skipped\n'
|
||
|
|
fi
|
||
|
|
printf '\n'
|
||
|
|
|
||
|
|
printf 'Evidence:\n'
|
||
|
|
printf 'PID=%s thread_count=%s top=%s\n' "$target_pid" "$thread_count" "$top_count"
|
||
|
|
if [[ "${EUID:-$(id -u 2>/dev/null || printf '1')}" != "0" ]]; then
|
||
|
|
printf 'WARNING: running without root; JVM attach and /proc details may be limited by process ownership\n'
|
||
|
|
fi
|
||
|
|
printf '\n'
|
||
|
|
|
||
|
|
printf 'Recommended next steps:\n'
|
||
|
|
printf -- '- Review GC logs and recent application errors\n'
|
||
|
|
printf -- '- Check JVM heap sizing against container or host memory limits\n'
|
||
|
|
printf -- '- Check thread count trend in monitoring before concluding a leak\n'
|
||
|
|
printf -- '- Capture jstack only if approved by operational process\n'
|
||
|
|
printf -- '- Attach this output to incident ticket\n'
|
||
|
|
|
||
|
|
exit "$exit_code"
|