Add initial Linux operations Bash toolkit with network diagnostics
This commit is contained in:
@@ -0,0 +1,51 @@
|
|||||||
|
# Linux Operations Bash Toolkit
|
||||||
|
|
||||||
|
Small, practical Bash scripts for Linux operations checks and incident triage. The scripts are sanitized examples inspired by production Linux operations work and avoid destructive actions or root-only assumptions.
|
||||||
|
|
||||||
|
## Scripts
|
||||||
|
|
||||||
|
- `healthcheck.sh` - general host health overview.
|
||||||
|
- `disk_check.sh` - filesystem usage threshold check.
|
||||||
|
- `service_check.sh` - critical service status check.
|
||||||
|
- `system_report.sh` - writes a timestamped system report to `/tmp`.
|
||||||
|
- `network_troubleshoot.sh` - local and optional remote network diagnostics.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./healthcheck.sh
|
||||||
|
./disk_check.sh
|
||||||
|
./disk_check.sh 90
|
||||||
|
./service_check.sh
|
||||||
|
./service_check.sh sshd nginx zabbix-agent
|
||||||
|
./system_report.sh
|
||||||
|
./network_troubleshoot.sh
|
||||||
|
./network_troubleshoot.sh google.com
|
||||||
|
```
|
||||||
|
|
||||||
|
## Exit Codes
|
||||||
|
|
||||||
|
`disk_check.sh`:
|
||||||
|
|
||||||
|
- `0` - all filesystems are below the threshold.
|
||||||
|
- `1` - one or more filesystems are at or above the threshold.
|
||||||
|
- `2` - invalid threshold input.
|
||||||
|
|
||||||
|
`service_check.sh`:
|
||||||
|
|
||||||
|
- `0` - all checked services are active.
|
||||||
|
- `1` - at least one service is inactive, failed, missing, or cannot be checked.
|
||||||
|
|
||||||
|
`network_troubleshoot.sh`:
|
||||||
|
|
||||||
|
- `0` - no obvious local, DNS, or connectivity issue detected.
|
||||||
|
- `1` - DNS, interface, gateway, or target connectivity problems detected.
|
||||||
|
|
||||||
|
`healthcheck.sh` and `system_report.sh` are informational. They print warnings for missing tools where possible.
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
- Requires Bash.
|
||||||
|
- Designed for RHEL, Oracle Linux, and Ubuntu style systems.
|
||||||
|
- Handles missing tools such as `ss`, `traceroute`, `nc`, and `journalctl` gracefully.
|
||||||
|
- Does not require root and does not make system changes.
|
||||||
Executable
+29
@@ -0,0 +1,29 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -o errexit
|
||||||
|
set -o nounset
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
threshold="${1:-80}"
|
||||||
|
|
||||||
|
if [[ ! "$threshold" =~ ^[0-9]+$ ]] || (( threshold < 1 || threshold > 100 )); then
|
||||||
|
printf 'CRITICAL: invalid threshold "%s"; provide an integer from 1 to 100\n' "$threshold" >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
status=0
|
||||||
|
warning_threshold=$(( threshold > 5 ? threshold - 5 : threshold ))
|
||||||
|
|
||||||
|
while read -r filesystem size used avail use_percent mountpoint; do
|
||||||
|
usage="${use_percent%\%}"
|
||||||
|
|
||||||
|
if (( usage >= threshold )); then
|
||||||
|
printf 'CRITICAL: %s mounted on %s is %s used; threshold is %s%% (%s free)\n' "$filesystem" "$mountpoint" "$use_percent" "$threshold" "$avail"
|
||||||
|
status=1
|
||||||
|
elif (( usage >= warning_threshold )); then
|
||||||
|
printf 'WARNING: %s mounted on %s is %s used; threshold is %s%%\n' "$filesystem" "$mountpoint" "$use_percent" "$threshold"
|
||||||
|
else
|
||||||
|
printf 'OK: %s mounted on %s is %s used\n' "$filesystem" "$mountpoint" "$use_percent"
|
||||||
|
fi
|
||||||
|
done < <(df -P -x tmpfs -x devtmpfs | awk 'NR > 1 {print $1, $2, $3, $4, $5, $6}')
|
||||||
|
|
||||||
|
exit "$status"
|
||||||
Executable
+68
@@ -0,0 +1,68 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -o errexit
|
||||||
|
set -o nounset
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
section() {
|
||||||
|
printf '\n== %s ==\n' "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
run_or_warn() {
|
||||||
|
local description="$1"
|
||||||
|
shift
|
||||||
|
|
||||||
|
if command -v "$1" >/dev/null 2>&1; then
|
||||||
|
"$@" || printf 'WARNING: %s command failed\n' "$description"
|
||||||
|
else
|
||||||
|
printf 'WARNING: %s command not available\n' "$1"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
top_processes() {
|
||||||
|
local sort_key="$1"
|
||||||
|
|
||||||
|
if command -v ps >/dev/null 2>&1; then
|
||||||
|
ps -eo pid,ppid,comm,%cpu,%mem --sort="$sort_key" | head -n 11
|
||||||
|
else
|
||||||
|
printf 'WARNING: ps command not available\n'
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
section "Host"
|
||||||
|
hostname
|
||||||
|
uptime
|
||||||
|
|
||||||
|
section "OS"
|
||||||
|
if [[ -r /etc/os-release ]]; then
|
||||||
|
. /etc/os-release
|
||||||
|
printf '%s\n' "${PRETTY_NAME:-Unknown Linux}"
|
||||||
|
else
|
||||||
|
printf 'WARNING: /etc/os-release not readable\n'
|
||||||
|
fi
|
||||||
|
uname -r
|
||||||
|
|
||||||
|
section "CPU Load"
|
||||||
|
if [[ -r /proc/loadavg ]]; then
|
||||||
|
awk '{print "1m="$1, "5m="$2, "15m="$3}' /proc/loadavg
|
||||||
|
else
|
||||||
|
uptime
|
||||||
|
fi
|
||||||
|
|
||||||
|
section "Memory"
|
||||||
|
run_or_warn "memory usage" free -h
|
||||||
|
|
||||||
|
section "Disk"
|
||||||
|
run_or_warn "disk usage" df -h -x tmpfs -x devtmpfs
|
||||||
|
|
||||||
|
section "Failed systemd Services"
|
||||||
|
if command -v systemctl >/dev/null 2>&1; then
|
||||||
|
systemctl --failed --no-pager || true
|
||||||
|
else
|
||||||
|
printf 'WARNING: systemctl command not available\n'
|
||||||
|
fi
|
||||||
|
|
||||||
|
section "Top CPU Processes"
|
||||||
|
top_processes "-%cpu"
|
||||||
|
|
||||||
|
section "Top Memory Processes"
|
||||||
|
top_processes "-%mem"
|
||||||
+148
@@ -0,0 +1,148 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -o errexit
|
||||||
|
set -o nounset
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
target="${1:-}"
|
||||||
|
status=0
|
||||||
|
warnings=()
|
||||||
|
criticals=()
|
||||||
|
|
||||||
|
section() {
|
||||||
|
printf '\n[%s]\n' "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
warn() {
|
||||||
|
warnings+=("$1")
|
||||||
|
printf 'WARNING: %s\n' "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
critical() {
|
||||||
|
criticals+=("$1")
|
||||||
|
status=1
|
||||||
|
printf 'CRITICAL: %s\n' "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
have() {
|
||||||
|
command -v "$1" >/dev/null 2>&1
|
||||||
|
}
|
||||||
|
|
||||||
|
run_if_available() {
|
||||||
|
local command_name="$1"
|
||||||
|
shift
|
||||||
|
|
||||||
|
if have "$command_name"; then
|
||||||
|
"$@" || warn "$command_name command failed"
|
||||||
|
else
|
||||||
|
warn "$command_name command not available"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
section "LOCAL NETWORK"
|
||||||
|
if have ip; then
|
||||||
|
ip addr || warn "ip addr command failed"
|
||||||
|
printf '\nRouting table:\n'
|
||||||
|
ip route || warn "ip route command failed"
|
||||||
|
printf '\nDefault gateway:\n'
|
||||||
|
if ! ip route show default; then
|
||||||
|
critical "default gateway not found"
|
||||||
|
elif ! ip route show default | grep -q '^default '; then
|
||||||
|
critical "default gateway not configured"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
warn "ip command not available"
|
||||||
|
fi
|
||||||
|
|
||||||
|
section "INTERFACES"
|
||||||
|
active_interfaces=0
|
||||||
|
if have ip; then
|
||||||
|
ip -br link || warn "interface state query failed"
|
||||||
|
active_interfaces="$(ip -br link 2>/dev/null | awk '$2 == "UP" && $1 != "lo" {count++} END {print count+0}')"
|
||||||
|
if (( active_interfaces == 0 )); then
|
||||||
|
critical "no active non-loopback interface detected"
|
||||||
|
else
|
||||||
|
printf 'OK: %s active non-loopback interface(s) detected\n' "$active_interfaces"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
warn "cannot inspect interface state without ip command"
|
||||||
|
fi
|
||||||
|
|
||||||
|
section "DNS"
|
||||||
|
if [[ -r /etc/resolv.conf ]]; then
|
||||||
|
cat /etc/resolv.conf
|
||||||
|
else
|
||||||
|
warn "/etc/resolv.conf not readable"
|
||||||
|
fi
|
||||||
|
|
||||||
|
dns_target="${target:-localhost}"
|
||||||
|
if have getent; then
|
||||||
|
if getent hosts "$dns_target" >/dev/null 2>&1; then
|
||||||
|
printf 'OK: DNS resolution succeeded for %s\n' "$dns_target"
|
||||||
|
getent hosts "$dns_target"
|
||||||
|
else
|
||||||
|
critical "DNS resolution failed for ${dns_target}"
|
||||||
|
fi
|
||||||
|
elif have nslookup; then
|
||||||
|
if nslookup "$dns_target"; then
|
||||||
|
printf 'OK: DNS resolution succeeded for %s\n' "$dns_target"
|
||||||
|
else
|
||||||
|
critical "DNS resolution failed for ${dns_target}"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
warn "no DNS lookup tool available"
|
||||||
|
fi
|
||||||
|
|
||||||
|
section "CONNECTIVITY"
|
||||||
|
if [[ -n "$target" ]]; then
|
||||||
|
if have ping; then
|
||||||
|
if ping -c 3 -W 2 "$target"; then
|
||||||
|
printf 'OK: ping succeeded for %s\n' "$target"
|
||||||
|
else
|
||||||
|
critical "ping failed for ${target}"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
warn "ping command not available"
|
||||||
|
fi
|
||||||
|
|
||||||
|
run_if_available traceroute traceroute "$target"
|
||||||
|
|
||||||
|
if have nc; then
|
||||||
|
if nc -vz -w 3 "$target" 443; then
|
||||||
|
printf 'OK: TCP 443 reachable on %s\n' "$target"
|
||||||
|
else
|
||||||
|
critical "TCP 443 connectivity failed for ${target}"
|
||||||
|
fi
|
||||||
|
elif have curl; then
|
||||||
|
if curl --head --silent --show-error --connect-timeout 5 "https://${target}" >/dev/null; then
|
||||||
|
printf 'OK: HTTPS connectivity succeeded for %s\n' "$target"
|
||||||
|
else
|
||||||
|
critical "HTTPS connectivity failed for ${target}"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
warn "no TCP connectivity test tool available (nc or curl)"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
printf 'OK: no target provided; skipped remote connectivity checks\n'
|
||||||
|
fi
|
||||||
|
|
||||||
|
section "PORTS"
|
||||||
|
if have ss; then
|
||||||
|
ss -tuln || warn "ss command failed"
|
||||||
|
else
|
||||||
|
warn "ss command not available"
|
||||||
|
fi
|
||||||
|
|
||||||
|
section "SUMMARY"
|
||||||
|
if (( ${#criticals[@]} > 0 )); then
|
||||||
|
printf 'CRITICAL: %s issue(s) detected\n' "${#criticals[@]}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if (( ${#warnings[@]} > 0 )); then
|
||||||
|
printf 'WARNING: %s warning(s) detected\n' "${#warnings[@]}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if (( status == 0 )); then
|
||||||
|
printf 'OK: no obvious DNS or connectivity problems detected\n'
|
||||||
|
fi
|
||||||
|
|
||||||
|
exit "$status"
|
||||||
Executable
+60
@@ -0,0 +1,60 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -o errexit
|
||||||
|
set -o nounset
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
services=("$@")
|
||||||
|
|
||||||
|
service_exists() {
|
||||||
|
local service="$1"
|
||||||
|
systemctl list-unit-files "${service}.service" --no-legend 2>/dev/null | awk '{print $1}' | grep -qx "${service}.service"
|
||||||
|
}
|
||||||
|
|
||||||
|
pick_default_scheduler() {
|
||||||
|
if service_exists cron; then
|
||||||
|
printf 'cron'
|
||||||
|
elif service_exists crond; then
|
||||||
|
printf 'crond'
|
||||||
|
else
|
||||||
|
printf 'cron'
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
pick_default_ssh() {
|
||||||
|
if service_exists sshd; then
|
||||||
|
printf 'sshd'
|
||||||
|
elif service_exists ssh; then
|
||||||
|
printf 'ssh'
|
||||||
|
else
|
||||||
|
printf 'sshd'
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
if ! command -v systemctl >/dev/null 2>&1; then
|
||||||
|
printf 'CRITICAL: systemctl command not available; cannot check services\n' >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if (( ${#services[@]} == 0 )); then
|
||||||
|
services=("$(pick_default_ssh)" "$(pick_default_scheduler)")
|
||||||
|
fi
|
||||||
|
|
||||||
|
status=0
|
||||||
|
|
||||||
|
for service in "${services[@]}"; do
|
||||||
|
if ! service_exists "$service"; then
|
||||||
|
printf 'CRITICAL: %s service not found\n' "$service"
|
||||||
|
status=1
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
if systemctl is-active --quiet "$service"; then
|
||||||
|
printf 'OK: %s is active\n' "$service"
|
||||||
|
else
|
||||||
|
state="$(systemctl is-active "$service" 2>/dev/null || true)"
|
||||||
|
printf 'CRITICAL: %s is %s\n' "$service" "${state:-unknown}"
|
||||||
|
status=1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
exit "$status"
|
||||||
Executable
+81
@@ -0,0 +1,81 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -o errexit
|
||||||
|
set -o nounset
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
host="$(hostname)"
|
||||||
|
timestamp="$(date '+%Y-%m-%d_%H%M%S')"
|
||||||
|
report="/tmp/system_report_${host}_${timestamp}.txt"
|
||||||
|
|
||||||
|
section() {
|
||||||
|
printf '\n== %s ==\n' "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
run_or_warn() {
|
||||||
|
local description="$1"
|
||||||
|
shift
|
||||||
|
|
||||||
|
if command -v "$1" >/dev/null 2>&1; then
|
||||||
|
"$@" || printf 'WARNING: %s command failed\n' "$description"
|
||||||
|
else
|
||||||
|
printf 'WARNING: %s command not available\n' "$1"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
section "Host"
|
||||||
|
hostname
|
||||||
|
|
||||||
|
section "Date"
|
||||||
|
date
|
||||||
|
|
||||||
|
section "Uptime"
|
||||||
|
uptime
|
||||||
|
|
||||||
|
section "OS"
|
||||||
|
if [[ -r /etc/os-release ]]; then
|
||||||
|
. /etc/os-release
|
||||||
|
printf '%s\n' "${PRETTY_NAME:-Unknown Linux}"
|
||||||
|
else
|
||||||
|
printf 'WARNING: /etc/os-release not readable\n'
|
||||||
|
fi
|
||||||
|
|
||||||
|
section "Kernel"
|
||||||
|
uname -r
|
||||||
|
|
||||||
|
section "CPU Load"
|
||||||
|
if [[ -r /proc/loadavg ]]; then
|
||||||
|
awk '{print "1m="$1, "5m="$2, "15m="$3}' /proc/loadavg
|
||||||
|
else
|
||||||
|
uptime
|
||||||
|
fi
|
||||||
|
|
||||||
|
section "Memory"
|
||||||
|
run_or_warn "memory usage" free -h
|
||||||
|
|
||||||
|
section "Disk"
|
||||||
|
run_or_warn "disk usage" df -h -x tmpfs -x devtmpfs
|
||||||
|
|
||||||
|
section "Failed systemd Services"
|
||||||
|
if command -v systemctl >/dev/null 2>&1; then
|
||||||
|
systemctl --failed --no-pager || true
|
||||||
|
else
|
||||||
|
printf 'WARNING: systemctl command not available\n'
|
||||||
|
fi
|
||||||
|
|
||||||
|
section "Listening Ports"
|
||||||
|
if command -v ss >/dev/null 2>&1; then
|
||||||
|
ss -tuln || printf 'WARNING: ss command failed\n'
|
||||||
|
else
|
||||||
|
printf 'WARNING: ss command not available\n'
|
||||||
|
fi
|
||||||
|
|
||||||
|
section "Recent Kernel Messages"
|
||||||
|
if command -v journalctl >/dev/null 2>&1; then
|
||||||
|
journalctl -k -n 50 --no-pager || printf 'WARNING: journalctl kernel log query failed\n'
|
||||||
|
else
|
||||||
|
printf 'WARNING: journalctl command not available\n'
|
||||||
|
fi
|
||||||
|
} > "$report"
|
||||||
|
|
||||||
|
printf 'System report written to: %s\n' "$report"
|
||||||
Reference in New Issue
Block a user