Add initial Linux operations Bash toolkit with network diagnostics

This commit is contained in:
Mateusz Suski
2026-05-05 21:26:02 +00:00
parent 0a242e82b7
commit 9fb291f834
6 changed files with 437 additions and 0 deletions
+51
View File
@@ -0,0 +1,51 @@
# Linux Operations Bash Toolkit
Small, practical Bash scripts for Linux operations checks and incident triage. The scripts are sanitized examples inspired by production Linux operations work and avoid destructive actions or root-only assumptions.
## Scripts
- `healthcheck.sh` - general host health overview.
- `disk_check.sh` - filesystem usage threshold check.
- `service_check.sh` - critical service status check.
- `system_report.sh` - writes a timestamped system report to `/tmp`.
- `network_troubleshoot.sh` - local and optional remote network diagnostics.
## Usage
```bash
./healthcheck.sh
./disk_check.sh
./disk_check.sh 90
./service_check.sh
./service_check.sh sshd nginx zabbix-agent
./system_report.sh
./network_troubleshoot.sh
./network_troubleshoot.sh google.com
```
## Exit Codes
`disk_check.sh`:
- `0` - all filesystems are below the threshold.
- `1` - one or more filesystems are at or above the threshold.
- `2` - invalid threshold input.
`service_check.sh`:
- `0` - all checked services are active.
- `1` - at least one service is inactive, failed, missing, or cannot be checked.
`network_troubleshoot.sh`:
- `0` - no obvious local, DNS, or connectivity issue detected.
- `1` - DNS, interface, gateway, or target connectivity problems detected.
`healthcheck.sh` and `system_report.sh` are informational. They print warnings for missing tools where possible.
## Notes
- Requires Bash.
- Designed for RHEL, Oracle Linux, and Ubuntu style systems.
- Handles missing tools such as `ss`, `traceroute`, `nc`, and `journalctl` gracefully.
- Does not require root and does not make system changes.
+29
View File
@@ -0,0 +1,29 @@
#!/usr/bin/env bash
set -o errexit
set -o nounset
set -o pipefail
threshold="${1:-80}"
if [[ ! "$threshold" =~ ^[0-9]+$ ]] || (( threshold < 1 || threshold > 100 )); then
printf 'CRITICAL: invalid threshold "%s"; provide an integer from 1 to 100\n' "$threshold" >&2
exit 2
fi
status=0
warning_threshold=$(( threshold > 5 ? threshold - 5 : threshold ))
while read -r filesystem size used avail use_percent mountpoint; do
usage="${use_percent%\%}"
if (( usage >= threshold )); then
printf 'CRITICAL: %s mounted on %s is %s used; threshold is %s%% (%s free)\n' "$filesystem" "$mountpoint" "$use_percent" "$threshold" "$avail"
status=1
elif (( usage >= warning_threshold )); then
printf 'WARNING: %s mounted on %s is %s used; threshold is %s%%\n' "$filesystem" "$mountpoint" "$use_percent" "$threshold"
else
printf 'OK: %s mounted on %s is %s used\n' "$filesystem" "$mountpoint" "$use_percent"
fi
done < <(df -P -x tmpfs -x devtmpfs | awk 'NR > 1 {print $1, $2, $3, $4, $5, $6}')
exit "$status"
+68
View File
@@ -0,0 +1,68 @@
#!/usr/bin/env bash
set -o errexit
set -o nounset
set -o pipefail
section() {
printf '\n== %s ==\n' "$1"
}
run_or_warn() {
local description="$1"
shift
if command -v "$1" >/dev/null 2>&1; then
"$@" || printf 'WARNING: %s command failed\n' "$description"
else
printf 'WARNING: %s command not available\n' "$1"
fi
}
top_processes() {
local sort_key="$1"
if command -v ps >/dev/null 2>&1; then
ps -eo pid,ppid,comm,%cpu,%mem --sort="$sort_key" | head -n 11
else
printf 'WARNING: ps command not available\n'
fi
}
section "Host"
hostname
uptime
section "OS"
if [[ -r /etc/os-release ]]; then
. /etc/os-release
printf '%s\n' "${PRETTY_NAME:-Unknown Linux}"
else
printf 'WARNING: /etc/os-release not readable\n'
fi
uname -r
section "CPU Load"
if [[ -r /proc/loadavg ]]; then
awk '{print "1m="$1, "5m="$2, "15m="$3}' /proc/loadavg
else
uptime
fi
section "Memory"
run_or_warn "memory usage" free -h
section "Disk"
run_or_warn "disk usage" df -h -x tmpfs -x devtmpfs
section "Failed systemd Services"
if command -v systemctl >/dev/null 2>&1; then
systemctl --failed --no-pager || true
else
printf 'WARNING: systemctl command not available\n'
fi
section "Top CPU Processes"
top_processes "-%cpu"
section "Top Memory Processes"
top_processes "-%mem"
+148
View File
@@ -0,0 +1,148 @@
#!/usr/bin/env bash
set -o errexit
set -o nounset
set -o pipefail
target="${1:-}"
status=0
warnings=()
criticals=()
section() {
printf '\n[%s]\n' "$1"
}
warn() {
warnings+=("$1")
printf 'WARNING: %s\n' "$1"
}
critical() {
criticals+=("$1")
status=1
printf 'CRITICAL: %s\n' "$1"
}
have() {
command -v "$1" >/dev/null 2>&1
}
run_if_available() {
local command_name="$1"
shift
if have "$command_name"; then
"$@" || warn "$command_name command failed"
else
warn "$command_name command not available"
fi
}
section "LOCAL NETWORK"
if have ip; then
ip addr || warn "ip addr command failed"
printf '\nRouting table:\n'
ip route || warn "ip route command failed"
printf '\nDefault gateway:\n'
if ! ip route show default; then
critical "default gateway not found"
elif ! ip route show default | grep -q '^default '; then
critical "default gateway not configured"
fi
else
warn "ip command not available"
fi
section "INTERFACES"
active_interfaces=0
if have ip; then
ip -br link || warn "interface state query failed"
active_interfaces="$(ip -br link 2>/dev/null | awk '$2 == "UP" && $1 != "lo" {count++} END {print count+0}')"
if (( active_interfaces == 0 )); then
critical "no active non-loopback interface detected"
else
printf 'OK: %s active non-loopback interface(s) detected\n' "$active_interfaces"
fi
else
warn "cannot inspect interface state without ip command"
fi
section "DNS"
if [[ -r /etc/resolv.conf ]]; then
cat /etc/resolv.conf
else
warn "/etc/resolv.conf not readable"
fi
dns_target="${target:-localhost}"
if have getent; then
if getent hosts "$dns_target" >/dev/null 2>&1; then
printf 'OK: DNS resolution succeeded for %s\n' "$dns_target"
getent hosts "$dns_target"
else
critical "DNS resolution failed for ${dns_target}"
fi
elif have nslookup; then
if nslookup "$dns_target"; then
printf 'OK: DNS resolution succeeded for %s\n' "$dns_target"
else
critical "DNS resolution failed for ${dns_target}"
fi
else
warn "no DNS lookup tool available"
fi
section "CONNECTIVITY"
if [[ -n "$target" ]]; then
if have ping; then
if ping -c 3 -W 2 "$target"; then
printf 'OK: ping succeeded for %s\n' "$target"
else
critical "ping failed for ${target}"
fi
else
warn "ping command not available"
fi
run_if_available traceroute traceroute "$target"
if have nc; then
if nc -vz -w 3 "$target" 443; then
printf 'OK: TCP 443 reachable on %s\n' "$target"
else
critical "TCP 443 connectivity failed for ${target}"
fi
elif have curl; then
if curl --head --silent --show-error --connect-timeout 5 "https://${target}" >/dev/null; then
printf 'OK: HTTPS connectivity succeeded for %s\n' "$target"
else
critical "HTTPS connectivity failed for ${target}"
fi
else
warn "no TCP connectivity test tool available (nc or curl)"
fi
else
printf 'OK: no target provided; skipped remote connectivity checks\n'
fi
section "PORTS"
if have ss; then
ss -tuln || warn "ss command failed"
else
warn "ss command not available"
fi
section "SUMMARY"
if (( ${#criticals[@]} > 0 )); then
printf 'CRITICAL: %s issue(s) detected\n' "${#criticals[@]}"
fi
if (( ${#warnings[@]} > 0 )); then
printf 'WARNING: %s warning(s) detected\n' "${#warnings[@]}"
fi
if (( status == 0 )); then
printf 'OK: no obvious DNS or connectivity problems detected\n'
fi
exit "$status"
+60
View File
@@ -0,0 +1,60 @@
#!/usr/bin/env bash
set -o errexit
set -o nounset
set -o pipefail
services=("$@")
service_exists() {
local service="$1"
systemctl list-unit-files "${service}.service" --no-legend 2>/dev/null | awk '{print $1}' | grep -qx "${service}.service"
}
pick_default_scheduler() {
if service_exists cron; then
printf 'cron'
elif service_exists crond; then
printf 'crond'
else
printf 'cron'
fi
}
pick_default_ssh() {
if service_exists sshd; then
printf 'sshd'
elif service_exists ssh; then
printf 'ssh'
else
printf 'sshd'
fi
}
if ! command -v systemctl >/dev/null 2>&1; then
printf 'CRITICAL: systemctl command not available; cannot check services\n' >&2
exit 1
fi
if (( ${#services[@]} == 0 )); then
services=("$(pick_default_ssh)" "$(pick_default_scheduler)")
fi
status=0
for service in "${services[@]}"; do
if ! service_exists "$service"; then
printf 'CRITICAL: %s service not found\n' "$service"
status=1
continue
fi
if systemctl is-active --quiet "$service"; then
printf 'OK: %s is active\n' "$service"
else
state="$(systemctl is-active "$service" 2>/dev/null || true)"
printf 'CRITICAL: %s is %s\n' "$service" "${state:-unknown}"
status=1
fi
done
exit "$status"
+81
View File
@@ -0,0 +1,81 @@
#!/usr/bin/env bash
set -o errexit
set -o nounset
set -o pipefail
host="$(hostname)"
timestamp="$(date '+%Y-%m-%d_%H%M%S')"
report="/tmp/system_report_${host}_${timestamp}.txt"
section() {
printf '\n== %s ==\n' "$1"
}
run_or_warn() {
local description="$1"
shift
if command -v "$1" >/dev/null 2>&1; then
"$@" || printf 'WARNING: %s command failed\n' "$description"
else
printf 'WARNING: %s command not available\n' "$1"
fi
}
{
section "Host"
hostname
section "Date"
date
section "Uptime"
uptime
section "OS"
if [[ -r /etc/os-release ]]; then
. /etc/os-release
printf '%s\n' "${PRETTY_NAME:-Unknown Linux}"
else
printf 'WARNING: /etc/os-release not readable\n'
fi
section "Kernel"
uname -r
section "CPU Load"
if [[ -r /proc/loadavg ]]; then
awk '{print "1m="$1, "5m="$2, "15m="$3}' /proc/loadavg
else
uptime
fi
section "Memory"
run_or_warn "memory usage" free -h
section "Disk"
run_or_warn "disk usage" df -h -x tmpfs -x devtmpfs
section "Failed systemd Services"
if command -v systemctl >/dev/null 2>&1; then
systemctl --failed --no-pager || true
else
printf 'WARNING: systemctl command not available\n'
fi
section "Listening Ports"
if command -v ss >/dev/null 2>&1; then
ss -tuln || printf 'WARNING: ss command failed\n'
else
printf 'WARNING: ss command not available\n'
fi
section "Recent Kernel Messages"
if command -v journalctl >/dev/null 2>&1; then
journalctl -k -n 50 --no-pager || printf 'WARNING: journalctl kernel log query failed\n'
else
printf 'WARNING: journalctl command not available\n'
fi
} > "$report"
printf 'System report written to: %s\n' "$report"