Update docs for os-healthcheck toolkit layout
This commit is contained in:
+29
@@ -0,0 +1,29 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
threshold="${1:-80}"
|
||||
|
||||
if [[ ! "$threshold" =~ ^[0-9]+$ ]] || (( threshold < 1 || threshold > 100 )); then
|
||||
printf 'CRITICAL: invalid threshold "%s"; provide an integer from 1 to 100\n' "$threshold" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
status=0
|
||||
warning_threshold=$(( threshold > 5 ? threshold - 5 : threshold ))
|
||||
|
||||
while read -r filesystem size used avail use_percent mountpoint; do
|
||||
usage="${use_percent%\%}"
|
||||
|
||||
if (( usage >= threshold )); then
|
||||
printf 'CRITICAL: %s mounted on %s is %s used; threshold is %s%% (%s free)\n' "$filesystem" "$mountpoint" "$use_percent" "$threshold" "$avail"
|
||||
status=1
|
||||
elif (( usage >= warning_threshold )); then
|
||||
printf 'WARNING: %s mounted on %s is %s used; threshold is %s%%\n' "$filesystem" "$mountpoint" "$use_percent" "$threshold"
|
||||
else
|
||||
printf 'OK: %s mounted on %s is %s used\n' "$filesystem" "$mountpoint" "$use_percent"
|
||||
fi
|
||||
done < <(df -P -x tmpfs -x devtmpfs | awk 'NR > 1 {print $1, $2, $3, $4, $5, $6}')
|
||||
|
||||
exit "$status"
|
||||
+68
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
section() {
|
||||
printf '\n== %s ==\n' "$1"
|
||||
}
|
||||
|
||||
run_or_warn() {
|
||||
local description="$1"
|
||||
shift
|
||||
|
||||
if command -v "$1" >/dev/null 2>&1; then
|
||||
"$@" || printf 'WARNING: %s command failed\n' "$description"
|
||||
else
|
||||
printf 'WARNING: %s command not available\n' "$1"
|
||||
fi
|
||||
}
|
||||
|
||||
top_processes() {
|
||||
local sort_key="$1"
|
||||
|
||||
if command -v ps >/dev/null 2>&1; then
|
||||
ps -eo pid,ppid,comm,%cpu,%mem --sort="$sort_key" | head -n 11
|
||||
else
|
||||
printf 'WARNING: ps command not available\n'
|
||||
fi
|
||||
}
|
||||
|
||||
section "Host"
|
||||
hostname
|
||||
uptime
|
||||
|
||||
section "OS"
|
||||
if [[ -r /etc/os-release ]]; then
|
||||
. /etc/os-release
|
||||
printf '%s\n' "${PRETTY_NAME:-Unknown Linux}"
|
||||
else
|
||||
printf 'WARNING: /etc/os-release not readable\n'
|
||||
fi
|
||||
uname -r
|
||||
|
||||
section "CPU Load"
|
||||
if [[ -r /proc/loadavg ]]; then
|
||||
awk '{print "1m="$1, "5m="$2, "15m="$3}' /proc/loadavg
|
||||
else
|
||||
uptime
|
||||
fi
|
||||
|
||||
section "Memory"
|
||||
run_or_warn "memory usage" free -h
|
||||
|
||||
section "Disk"
|
||||
run_or_warn "disk usage" df -h -x tmpfs -x devtmpfs
|
||||
|
||||
section "Failed systemd Services"
|
||||
if command -v systemctl >/dev/null 2>&1; then
|
||||
systemctl --failed --no-pager || true
|
||||
else
|
||||
printf 'WARNING: systemctl command not available\n'
|
||||
fi
|
||||
|
||||
section "Top CPU Processes"
|
||||
top_processes "-%cpu"
|
||||
|
||||
section "Top Memory Processes"
|
||||
top_processes "-%mem"
|
||||
@@ -0,0 +1,148 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
target="${1:-}"
|
||||
status=0
|
||||
warnings=()
|
||||
criticals=()
|
||||
|
||||
section() {
|
||||
printf '\n[%s]\n' "$1"
|
||||
}
|
||||
|
||||
warn() {
|
||||
warnings+=("$1")
|
||||
printf 'WARNING: %s\n' "$1"
|
||||
}
|
||||
|
||||
critical() {
|
||||
criticals+=("$1")
|
||||
status=1
|
||||
printf 'CRITICAL: %s\n' "$1"
|
||||
}
|
||||
|
||||
have() {
|
||||
command -v "$1" >/dev/null 2>&1
|
||||
}
|
||||
|
||||
run_if_available() {
|
||||
local command_name="$1"
|
||||
shift
|
||||
|
||||
if have "$command_name"; then
|
||||
"$@" || warn "$command_name command failed"
|
||||
else
|
||||
warn "$command_name command not available"
|
||||
fi
|
||||
}
|
||||
|
||||
section "LOCAL NETWORK"
|
||||
if have ip; then
|
||||
ip addr || warn "ip addr command failed"
|
||||
printf '\nRouting table:\n'
|
||||
ip route || warn "ip route command failed"
|
||||
printf '\nDefault gateway:\n'
|
||||
if ! ip route show default; then
|
||||
critical "default gateway not found"
|
||||
elif ! ip route show default | grep -q '^default '; then
|
||||
critical "default gateway not configured"
|
||||
fi
|
||||
else
|
||||
warn "ip command not available"
|
||||
fi
|
||||
|
||||
section "INTERFACES"
|
||||
active_interfaces=0
|
||||
if have ip; then
|
||||
ip -br link || warn "interface state query failed"
|
||||
active_interfaces="$(ip -br link 2>/dev/null | awk '$2 == "UP" && $1 != "lo" {count++} END {print count+0}')"
|
||||
if (( active_interfaces == 0 )); then
|
||||
critical "no active non-loopback interface detected"
|
||||
else
|
||||
printf 'OK: %s active non-loopback interface(s) detected\n' "$active_interfaces"
|
||||
fi
|
||||
else
|
||||
warn "cannot inspect interface state without ip command"
|
||||
fi
|
||||
|
||||
section "DNS"
|
||||
if [[ -r /etc/resolv.conf ]]; then
|
||||
cat /etc/resolv.conf
|
||||
else
|
||||
warn "/etc/resolv.conf not readable"
|
||||
fi
|
||||
|
||||
dns_target="${target:-localhost}"
|
||||
if have getent; then
|
||||
if getent hosts "$dns_target" >/dev/null 2>&1; then
|
||||
printf 'OK: DNS resolution succeeded for %s\n' "$dns_target"
|
||||
getent hosts "$dns_target"
|
||||
else
|
||||
critical "DNS resolution failed for ${dns_target}"
|
||||
fi
|
||||
elif have nslookup; then
|
||||
if nslookup "$dns_target"; then
|
||||
printf 'OK: DNS resolution succeeded for %s\n' "$dns_target"
|
||||
else
|
||||
critical "DNS resolution failed for ${dns_target}"
|
||||
fi
|
||||
else
|
||||
warn "no DNS lookup tool available"
|
||||
fi
|
||||
|
||||
section "CONNECTIVITY"
|
||||
if [[ -n "$target" ]]; then
|
||||
if have ping; then
|
||||
if ping -c 3 -W 2 "$target"; then
|
||||
printf 'OK: ping succeeded for %s\n' "$target"
|
||||
else
|
||||
critical "ping failed for ${target}"
|
||||
fi
|
||||
else
|
||||
warn "ping command not available"
|
||||
fi
|
||||
|
||||
run_if_available traceroute traceroute "$target"
|
||||
|
||||
if have nc; then
|
||||
if nc -vz -w 3 "$target" 443; then
|
||||
printf 'OK: TCP 443 reachable on %s\n' "$target"
|
||||
else
|
||||
critical "TCP 443 connectivity failed for ${target}"
|
||||
fi
|
||||
elif have curl; then
|
||||
if curl --head --silent --show-error --connect-timeout 5 "https://${target}" >/dev/null; then
|
||||
printf 'OK: HTTPS connectivity succeeded for %s\n' "$target"
|
||||
else
|
||||
critical "HTTPS connectivity failed for ${target}"
|
||||
fi
|
||||
else
|
||||
warn "no TCP connectivity test tool available (nc or curl)"
|
||||
fi
|
||||
else
|
||||
printf 'OK: no target provided; skipped remote connectivity checks\n'
|
||||
fi
|
||||
|
||||
section "PORTS"
|
||||
if have ss; then
|
||||
ss -tuln || warn "ss command failed"
|
||||
else
|
||||
warn "ss command not available"
|
||||
fi
|
||||
|
||||
section "SUMMARY"
|
||||
if (( ${#criticals[@]} > 0 )); then
|
||||
printf 'CRITICAL: %s issue(s) detected\n' "${#criticals[@]}"
|
||||
fi
|
||||
|
||||
if (( ${#warnings[@]} > 0 )); then
|
||||
printf 'WARNING: %s warning(s) detected\n' "${#warnings[@]}"
|
||||
fi
|
||||
|
||||
if (( status == 0 )); then
|
||||
printf 'OK: no obvious DNS or connectivity problems detected\n'
|
||||
fi
|
||||
|
||||
exit "$status"
|
||||
+60
@@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
services=("$@")
|
||||
|
||||
service_exists() {
|
||||
local service="$1"
|
||||
systemctl list-unit-files "${service}.service" --no-legend 2>/dev/null | awk '{print $1}' | grep -qx "${service}.service"
|
||||
}
|
||||
|
||||
pick_default_scheduler() {
|
||||
if service_exists cron; then
|
||||
printf 'cron'
|
||||
elif service_exists crond; then
|
||||
printf 'crond'
|
||||
else
|
||||
printf 'cron'
|
||||
fi
|
||||
}
|
||||
|
||||
pick_default_ssh() {
|
||||
if service_exists sshd; then
|
||||
printf 'sshd'
|
||||
elif service_exists ssh; then
|
||||
printf 'ssh'
|
||||
else
|
||||
printf 'sshd'
|
||||
fi
|
||||
}
|
||||
|
||||
if ! command -v systemctl >/dev/null 2>&1; then
|
||||
printf 'CRITICAL: systemctl command not available; cannot check services\n' >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if (( ${#services[@]} == 0 )); then
|
||||
services=("$(pick_default_ssh)" "$(pick_default_scheduler)")
|
||||
fi
|
||||
|
||||
status=0
|
||||
|
||||
for service in "${services[@]}"; do
|
||||
if ! service_exists "$service"; then
|
||||
printf 'CRITICAL: %s service not found\n' "$service"
|
||||
status=1
|
||||
continue
|
||||
fi
|
||||
|
||||
if systemctl is-active --quiet "$service"; then
|
||||
printf 'OK: %s is active\n' "$service"
|
||||
else
|
||||
state="$(systemctl is-active "$service" 2>/dev/null || true)"
|
||||
printf 'CRITICAL: %s is %s\n' "$service" "${state:-unknown}"
|
||||
status=1
|
||||
fi
|
||||
done
|
||||
|
||||
exit "$status"
|
||||
+81
@@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env bash
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
host="$(hostname)"
|
||||
timestamp="$(date '+%Y-%m-%d_%H%M%S')"
|
||||
report="/tmp/system_report_${host}_${timestamp}.txt"
|
||||
|
||||
section() {
|
||||
printf '\n== %s ==\n' "$1"
|
||||
}
|
||||
|
||||
run_or_warn() {
|
||||
local description="$1"
|
||||
shift
|
||||
|
||||
if command -v "$1" >/dev/null 2>&1; then
|
||||
"$@" || printf 'WARNING: %s command failed\n' "$description"
|
||||
else
|
||||
printf 'WARNING: %s command not available\n' "$1"
|
||||
fi
|
||||
}
|
||||
|
||||
{
|
||||
section "Host"
|
||||
hostname
|
||||
|
||||
section "Date"
|
||||
date
|
||||
|
||||
section "Uptime"
|
||||
uptime
|
||||
|
||||
section "OS"
|
||||
if [[ -r /etc/os-release ]]; then
|
||||
. /etc/os-release
|
||||
printf '%s\n' "${PRETTY_NAME:-Unknown Linux}"
|
||||
else
|
||||
printf 'WARNING: /etc/os-release not readable\n'
|
||||
fi
|
||||
|
||||
section "Kernel"
|
||||
uname -r
|
||||
|
||||
section "CPU Load"
|
||||
if [[ -r /proc/loadavg ]]; then
|
||||
awk '{print "1m="$1, "5m="$2, "15m="$3}' /proc/loadavg
|
||||
else
|
||||
uptime
|
||||
fi
|
||||
|
||||
section "Memory"
|
||||
run_or_warn "memory usage" free -h
|
||||
|
||||
section "Disk"
|
||||
run_or_warn "disk usage" df -h -x tmpfs -x devtmpfs
|
||||
|
||||
section "Failed systemd Services"
|
||||
if command -v systemctl >/dev/null 2>&1; then
|
||||
systemctl --failed --no-pager || true
|
||||
else
|
||||
printf 'WARNING: systemctl command not available\n'
|
||||
fi
|
||||
|
||||
section "Listening Ports"
|
||||
if command -v ss >/dev/null 2>&1; then
|
||||
ss -tuln || printf 'WARNING: ss command failed\n'
|
||||
else
|
||||
printf 'WARNING: ss command not available\n'
|
||||
fi
|
||||
|
||||
section "Recent Kernel Messages"
|
||||
if command -v journalctl >/dev/null 2>&1; then
|
||||
journalctl -k -n 50 --no-pager || printf 'WARNING: journalctl kernel log query failed\n'
|
||||
else
|
||||
printf 'WARNING: journalctl command not available\n'
|
||||
fi
|
||||
} > "$report"
|
||||
|
||||
printf 'System report written to: %s\n' "$report"
|
||||
Reference in New Issue
Block a user