Files
Mateusz Suski e851568c8c
lint / shell-yaml-ansible (push) Failing after 16s
Add standalone Bash incident check scripts
2026-05-11 18:49:00 +00:00

122 lines
3.8 KiB
Bash
Executable File

#!/usr/bin/env bash
set -o errexit
set -o nounset
set -o pipefail
warning_offset_ms=500
critical_offset_ms=5000
usage() {
cat <<'USAGE'
Usage: check_ntp_time_drift.sh [--warning-offset MS] [--critical-offset MS] [--help]
Check time synchronization status and offset evidence when available.
USAGE
}
is_number() {
[[ "$1" =~ ^[0-9]+$ ]]
}
while (($# > 0)); do
case "$1" in
--warning-offset) [[ $# -ge 2 ]] || { printf 'CRITICAL: --warning-offset requires a value\n'; exit 2; }; warning_offset_ms="$2"; shift 2 ;;
--critical-offset) [[ $# -ge 2 ]] || { printf 'CRITICAL: --critical-offset requires a value\n'; exit 2; }; critical_offset_ms="$2"; shift 2 ;;
--help|-h) usage; exit 0 ;;
*) printf 'CRITICAL: unknown option: %s\n' "$1"; usage; exit 2 ;;
esac
done
for value in "$warning_offset_ms" "$critical_offset_ms"; do
if ! is_number "$value"; then
printf 'CRITICAL: numeric option expected, got: %s\n' "$value"
exit 2
fi
done
if ((warning_offset_ms >= critical_offset_ms)); then
printf 'CRITICAL: --warning-offset must be lower than --critical-offset\n'
exit 2
fi
system_time="$(date '+%Y-%m-%d %H:%M:%S %Z %z')"
timezone="$(date '+%Z %z')"
sync_status="unknown"
detected_tool="none"
offset_ms=""
timedate_output=""
if command -v timedatectl >/dev/null 2>&1; then
detected_tool="timedatectl"
timedate_output="$(timedatectl 2>/dev/null || true)"
sync_status="$(printf '%s\n' "$timedate_output" | awk -F: '/System clock synchronized|NTP synchronized/ { gsub(/^ +/, "", $2); print $2; exit }')"
[[ -n "$sync_status" ]] || sync_status="unknown"
fi
chronyc_output=""
if command -v chronyc >/dev/null 2>&1; then
detected_tool="chronyc"
chronyc_output="$(chronyc tracking 2>/dev/null || true)"
raw_offset="$(printf '%s\n' "$chronyc_output" | awk -F: '/Last offset|System time/ { gsub(/^ +| seconds.*$/, "", $2); print $2; exit }')"
if [[ -n "$raw_offset" ]]; then
offset_ms="$(awk -v seconds="$raw_offset" 'BEGIN { if (seconds < 0) seconds = -seconds; printf "%d", seconds * 1000 }')"
fi
elif command -v ntpq >/dev/null 2>&1; then
detected_tool="ntpq"
fi
status="OK"
exit_code=0
if [[ "$sync_status" =~ ^(no|false)$ ]]; then
status="WARNING"
exit_code=1
fi
if [[ -n "$offset_ms" ]]; then
if ((offset_ms >= critical_offset_ms)); then
status="CRITICAL"
exit_code=3
elif ((offset_ms >= warning_offset_ms)); then
status="WARNING"
exit_code=1
fi
elif [[ "$detected_tool" == "none" ]]; then
status="WARNING"
exit_code=1
fi
printf '%s: Time sync status=%s offset_ms=%s\n\n' "$status" "$sync_status" "${offset_ms:-unavailable}"
printf 'Time status:\n'
printf 'System time: %s\n' "$system_time"
printf 'Timezone: %s\n' "$timezone"
printf 'Detected tool: %s\n' "$detected_tool"
printf 'NTP synchronized: %s\n' "$sync_status"
printf 'Offset ms: %s\n\n' "${offset_ms:-unavailable}"
printf 'Tool evidence:\n'
if [[ -n "$chronyc_output" ]]; then
printf '%s\n' "$chronyc_output"
elif command -v ntpq >/dev/null 2>&1; then
ntpq -p 2>/dev/null || printf 'WARNING: ntpq command failed\n'
elif [[ -n "$timedate_output" ]]; then
printf '%s\n' "$timedate_output"
else
printf 'WARNING: timedatectl, chronyc, and ntpq are unavailable or returned no data\n'
fi
printf '\n'
printf 'Evidence:\n'
printf 'Thresholds: warning=%sms critical=%sms\n' "$warning_offset_ms" "$critical_offset_ms"
if [[ -z "$offset_ms" ]]; then
printf 'WARNING: offset unavailable; status is based on available synchronization indicators only\n'
fi
printf '\n'
printf 'Recommended next steps:\n'
printf -- '- Verify chrony or ntpd service status and configuration\n'
printf -- '- Check NTP sources and reachability\n'
printf -- '- Check virtualization host time if this is a VM\n'
printf -- '- Avoid restarting time services blindly in production\n'
printf -- '- Attach this output to incident ticket\n'
exit "$exit_code"