Update docs for os-healthcheck toolkit layout

This commit is contained in:
Mateusz Suski
2026-05-05 21:50:20 +00:00
parent 65c7c82f0f
commit c88428d092
8 changed files with 16 additions and 10 deletions
+29
View File
@@ -0,0 +1,29 @@
#!/usr/bin/env bash
set -o errexit
set -o nounset
set -o pipefail
threshold="${1:-80}"
if [[ ! "$threshold" =~ ^[0-9]+$ ]] || (( threshold < 1 || threshold > 100 )); then
printf 'CRITICAL: invalid threshold "%s"; provide an integer from 1 to 100\n' "$threshold" >&2
exit 2
fi
status=0
warning_threshold=$(( threshold > 5 ? threshold - 5 : threshold ))
while read -r filesystem size used avail use_percent mountpoint; do
usage="${use_percent%\%}"
if (( usage >= threshold )); then
printf 'CRITICAL: %s mounted on %s is %s used; threshold is %s%% (%s free)\n' "$filesystem" "$mountpoint" "$use_percent" "$threshold" "$avail"
status=1
elif (( usage >= warning_threshold )); then
printf 'WARNING: %s mounted on %s is %s used; threshold is %s%%\n' "$filesystem" "$mountpoint" "$use_percent" "$threshold"
else
printf 'OK: %s mounted on %s is %s used\n' "$filesystem" "$mountpoint" "$use_percent"
fi
done < <(df -P -x tmpfs -x devtmpfs | awk 'NR > 1 {print $1, $2, $3, $4, $5, $6}')
exit "$status"
+68
View File
@@ -0,0 +1,68 @@
#!/usr/bin/env bash
set -o errexit
set -o nounset
set -o pipefail
section() {
printf '\n== %s ==\n' "$1"
}
run_or_warn() {
local description="$1"
shift
if command -v "$1" >/dev/null 2>&1; then
"$@" || printf 'WARNING: %s command failed\n' "$description"
else
printf 'WARNING: %s command not available\n' "$1"
fi
}
top_processes() {
local sort_key="$1"
if command -v ps >/dev/null 2>&1; then
ps -eo pid,ppid,comm,%cpu,%mem --sort="$sort_key" | head -n 11
else
printf 'WARNING: ps command not available\n'
fi
}
section "Host"
hostname
uptime
section "OS"
if [[ -r /etc/os-release ]]; then
. /etc/os-release
printf '%s\n' "${PRETTY_NAME:-Unknown Linux}"
else
printf 'WARNING: /etc/os-release not readable\n'
fi
uname -r
section "CPU Load"
if [[ -r /proc/loadavg ]]; then
awk '{print "1m="$1, "5m="$2, "15m="$3}' /proc/loadavg
else
uptime
fi
section "Memory"
run_or_warn "memory usage" free -h
section "Disk"
run_or_warn "disk usage" df -h -x tmpfs -x devtmpfs
section "Failed systemd Services"
if command -v systemctl >/dev/null 2>&1; then
systemctl --failed --no-pager || true
else
printf 'WARNING: systemctl command not available\n'
fi
section "Top CPU Processes"
top_processes "-%cpu"
section "Top Memory Processes"
top_processes "-%mem"
@@ -0,0 +1,148 @@
#!/usr/bin/env bash
set -o errexit
set -o nounset
set -o pipefail
target="${1:-}"
status=0
warnings=()
criticals=()
section() {
printf '\n[%s]\n' "$1"
}
warn() {
warnings+=("$1")
printf 'WARNING: %s\n' "$1"
}
critical() {
criticals+=("$1")
status=1
printf 'CRITICAL: %s\n' "$1"
}
have() {
command -v "$1" >/dev/null 2>&1
}
run_if_available() {
local command_name="$1"
shift
if have "$command_name"; then
"$@" || warn "$command_name command failed"
else
warn "$command_name command not available"
fi
}
section "LOCAL NETWORK"
if have ip; then
ip addr || warn "ip addr command failed"
printf '\nRouting table:\n'
ip route || warn "ip route command failed"
printf '\nDefault gateway:\n'
if ! ip route show default; then
critical "default gateway not found"
elif ! ip route show default | grep -q '^default '; then
critical "default gateway not configured"
fi
else
warn "ip command not available"
fi
section "INTERFACES"
active_interfaces=0
if have ip; then
ip -br link || warn "interface state query failed"
active_interfaces="$(ip -br link 2>/dev/null | awk '$2 == "UP" && $1 != "lo" {count++} END {print count+0}')"
if (( active_interfaces == 0 )); then
critical "no active non-loopback interface detected"
else
printf 'OK: %s active non-loopback interface(s) detected\n' "$active_interfaces"
fi
else
warn "cannot inspect interface state without ip command"
fi
section "DNS"
if [[ -r /etc/resolv.conf ]]; then
cat /etc/resolv.conf
else
warn "/etc/resolv.conf not readable"
fi
dns_target="${target:-localhost}"
if have getent; then
if getent hosts "$dns_target" >/dev/null 2>&1; then
printf 'OK: DNS resolution succeeded for %s\n' "$dns_target"
getent hosts "$dns_target"
else
critical "DNS resolution failed for ${dns_target}"
fi
elif have nslookup; then
if nslookup "$dns_target"; then
printf 'OK: DNS resolution succeeded for %s\n' "$dns_target"
else
critical "DNS resolution failed for ${dns_target}"
fi
else
warn "no DNS lookup tool available"
fi
section "CONNECTIVITY"
if [[ -n "$target" ]]; then
if have ping; then
if ping -c 3 -W 2 "$target"; then
printf 'OK: ping succeeded for %s\n' "$target"
else
critical "ping failed for ${target}"
fi
else
warn "ping command not available"
fi
run_if_available traceroute traceroute "$target"
if have nc; then
if nc -vz -w 3 "$target" 443; then
printf 'OK: TCP 443 reachable on %s\n' "$target"
else
critical "TCP 443 connectivity failed for ${target}"
fi
elif have curl; then
if curl --head --silent --show-error --connect-timeout 5 "https://${target}" >/dev/null; then
printf 'OK: HTTPS connectivity succeeded for %s\n' "$target"
else
critical "HTTPS connectivity failed for ${target}"
fi
else
warn "no TCP connectivity test tool available (nc or curl)"
fi
else
printf 'OK: no target provided; skipped remote connectivity checks\n'
fi
section "PORTS"
if have ss; then
ss -tuln || warn "ss command failed"
else
warn "ss command not available"
fi
section "SUMMARY"
if (( ${#criticals[@]} > 0 )); then
printf 'CRITICAL: %s issue(s) detected\n' "${#criticals[@]}"
fi
if (( ${#warnings[@]} > 0 )); then
printf 'WARNING: %s warning(s) detected\n' "${#warnings[@]}"
fi
if (( status == 0 )); then
printf 'OK: no obvious DNS or connectivity problems detected\n'
fi
exit "$status"
+60
View File
@@ -0,0 +1,60 @@
#!/usr/bin/env bash
set -o errexit
set -o nounset
set -o pipefail
services=("$@")
service_exists() {
local service="$1"
systemctl list-unit-files "${service}.service" --no-legend 2>/dev/null | awk '{print $1}' | grep -qx "${service}.service"
}
pick_default_scheduler() {
if service_exists cron; then
printf 'cron'
elif service_exists crond; then
printf 'crond'
else
printf 'cron'
fi
}
pick_default_ssh() {
if service_exists sshd; then
printf 'sshd'
elif service_exists ssh; then
printf 'ssh'
else
printf 'sshd'
fi
}
if ! command -v systemctl >/dev/null 2>&1; then
printf 'CRITICAL: systemctl command not available; cannot check services\n' >&2
exit 1
fi
if (( ${#services[@]} == 0 )); then
services=("$(pick_default_ssh)" "$(pick_default_scheduler)")
fi
status=0
for service in "${services[@]}"; do
if ! service_exists "$service"; then
printf 'CRITICAL: %s service not found\n' "$service"
status=1
continue
fi
if systemctl is-active --quiet "$service"; then
printf 'OK: %s is active\n' "$service"
else
state="$(systemctl is-active "$service" 2>/dev/null || true)"
printf 'CRITICAL: %s is %s\n' "$service" "${state:-unknown}"
status=1
fi
done
exit "$status"
+81
View File
@@ -0,0 +1,81 @@
#!/usr/bin/env bash
set -o errexit
set -o nounset
set -o pipefail
host="$(hostname)"
timestamp="$(date '+%Y-%m-%d_%H%M%S')"
report="/tmp/system_report_${host}_${timestamp}.txt"
section() {
printf '\n== %s ==\n' "$1"
}
run_or_warn() {
local description="$1"
shift
if command -v "$1" >/dev/null 2>&1; then
"$@" || printf 'WARNING: %s command failed\n' "$description"
else
printf 'WARNING: %s command not available\n' "$1"
fi
}
{
section "Host"
hostname
section "Date"
date
section "Uptime"
uptime
section "OS"
if [[ -r /etc/os-release ]]; then
. /etc/os-release
printf '%s\n' "${PRETTY_NAME:-Unknown Linux}"
else
printf 'WARNING: /etc/os-release not readable\n'
fi
section "Kernel"
uname -r
section "CPU Load"
if [[ -r /proc/loadavg ]]; then
awk '{print "1m="$1, "5m="$2, "15m="$3}' /proc/loadavg
else
uptime
fi
section "Memory"
run_or_warn "memory usage" free -h
section "Disk"
run_or_warn "disk usage" df -h -x tmpfs -x devtmpfs
section "Failed systemd Services"
if command -v systemctl >/dev/null 2>&1; then
systemctl --failed --no-pager || true
else
printf 'WARNING: systemctl command not available\n'
fi
section "Listening Ports"
if command -v ss >/dev/null 2>&1; then
ss -tuln || printf 'WARNING: ss command failed\n'
else
printf 'WARNING: ss command not available\n'
fi
section "Recent Kernel Messages"
if command -v journalctl >/dev/null 2>&1; then
journalctl -k -n 50 --no-pager || printf 'WARNING: journalctl kernel log query failed\n'
else
printf 'WARNING: journalctl command not available\n'
fi
} > "$report"
printf 'System report written to: %s\n' "$report"